pax_global_header00006660000000000000000000000064152003756300014513gustar00rootroot0000000000000052 comment=06cc4f897defc4952a21504bdb3609da8163b1c6 liblognorm-2.1.0/000077500000000000000000000000001520037563000136575ustar00rootroot00000000000000liblognorm-2.1.0/.github/000077500000000000000000000000001520037563000152175ustar00rootroot00000000000000liblognorm-2.1.0/.github/CODEOWNERS000066400000000000000000000003221520037563000166070ustar00rootroot00000000000000# Security-sensitive repository metadata /.github/CODEOWNERS @rgerhards @alorbach /.gitmodules @rgerhards @alorbach /.gitattributes @rgerhards @alorbach # CI workflows /.github/workflows/ @rgerhards @alorbach liblognorm-2.1.0/.github/workflows/000077500000000000000000000000001520037563000172545ustar00rootroot00000000000000liblognorm-2.1.0/.github/workflows/actionlint.yml000066400000000000000000000012211520037563000221370ustar00rootroot00000000000000--- name: Workflow Lint on: pull_request: permissions: contents: read concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: lint: name: actionlint runs-on: ubuntu-latest steps: - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Lint GitHub Actions workflows uses: reviewdog/action-actionlint@e0207a28405ecad11953ba625a95d92f7889572a # v1 with: github_token: ${{ secrets.GITHUB_TOKEN }} filter_mode: file fail_level: any liblognorm-2.1.0/.github/workflows/codeql.yml000066400000000000000000000034461520037563000212550ustar00rootroot00000000000000name: "CodeQL" on: push: branches: [ "main" ] paths-ignore: - "ChangeLog" - "**/*.md" - "**/*.txt" - "doc/**" pull_request: branches: [ "main" ] paths-ignore: - "ChangeLog" - "**/*.md" - "**/*.txt" - "doc/**" schedule: - cron: "23 2 * * 3" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: {} jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read # CodeQL reads Actions metadata for workflow analysis. contents: read security-events: write # CodeQL uploads results to code scanning. strategy: fail-fast: false matrix: language: [cpp, actions] steps: - name: Checkout uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Install packages if: ${{ matrix.language == 'cpp' }} run: | sudo apt-get update sudo apt-get install --yes \ autoconf \ automake \ libestr-dev \ libfastjson-dev \ libtool \ pkg-config - name: Initialize CodeQL uses: github/codeql-action/init@53e96ec3b35fce51c141c0d6f0e31028a448722d # v3 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Build project if: ${{ matrix.language == 'cpp' }} run: | autoreconf -fvi ./configure make - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@53e96ec3b35fce51c141c0d6f0e31028a448722d # v3 with: category: "/language:${{ matrix.language }}" liblognorm-2.1.0/.github/workflows/draft_release.yml000066400000000000000000000177771520037563000226220ustar00rootroot00000000000000# Copyright 2026 Rainer Gerhards and Others # # https://github.com/rsyslog/liblognorm # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- name: draft source release 'on': push: tags: - 'v2.*' workflow_dispatch: inputs: tag_name: description: Optional tag name for manual draft-release tests required: false type: string target_commitish: description: >- Optional branch or commit SHA to use when a manual draft release must create its tag remotely required: false type: string create_draft_release: description: Create or update a draft GitHub release after building required: false default: false type: boolean concurrency: group: >- ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref_name || github.event.inputs.tag_name || github.run_id }} cancel-in-progress: false permissions: {} jobs: build_release_artifacts: name: build release artifacts runs-on: ubuntu-latest if: ${{ github.repository == 'rsyslog/liblognorm' }} permissions: contents: read steps: - name: Checkout repository # v5 uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd with: persist-credentials: false fetch-depth: 0 - name: Setup make dist build env run: | sudo apt-get update sudo apt-get install -y \ autoconf \ automake \ libestr-dev \ libfastjson-dev \ libpcre2-dev \ libtool \ libtool-bin \ make \ pkg-config - name: Build maintainer tarball run: | set -euo pipefail rm -f liblognorm-*.tar.gz liblognorm-*.tar.gz.sha256 autoreconf -fiv ./configure make dist - name: Generate checksum and locate assets id: dist_assets run: | set -euo pipefail shopt -s nullglob tarballs=(liblognorm-*.tar.gz) if [ "${#tarballs[@]}" -ne 1 ]; then echo "ERROR: expected exactly one liblognorm-*.tar.gz artifact" >&2 ls -1 liblognorm-*.tar.gz 2>/dev/null || true exit 1 fi dist_tarball="${tarballs[0]}" sha256sum "$dist_tarball" > "$dist_tarball.sha256" printf 'dist_tarball=%s\n' "$dist_tarball" >> "$GITHUB_OUTPUT" printf 'dist_tarball_sha256=%s\n' \ "$dist_tarball.sha256" >> "$GITHUB_OUTPUT" - name: Upload release artifacts # v4 uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 with: name: draft-release-assets path: | ${{ steps.dist_assets.outputs.dist_tarball }} ${{ steps.dist_assets.outputs.dist_tarball_sha256 }} if-no-files-found: error update_draft_release: name: update draft release runs-on: ubuntu-latest needs: build_release_artifacts if: >- ${{ github.repository == 'rsyslog/liblognorm' && ( github.event_name == 'push' || github.event.inputs.create_draft_release == 'true' ) }} permissions: contents: write # Creates/updates draft releases and uploads assets. steps: - name: Download release artifacts # v4 uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 with: name: draft-release-assets path: release-assets - name: Resolve release tag id: release_tag env: MANUAL_TAG_NAME: ${{ github.event.inputs.tag_name }} run: | set -euo pipefail official_tag_re='^v2\.[0-9]+\.[0-9]+$' manual_tag_re='^(v2\.[0-9]+\.[0-9]+|test-release-.*)$' if [ "${GITHUB_EVENT_NAME}" = "push" ]; then release_tag="${GITHUB_REF_NAME}" if [[ ! "$release_tag" =~ $official_tag_re ]]; then echo "ERROR: pushed tag '$release_tag' does not match" \ "^v2\\.[0-9]+\\.[0-9]+$" >&2 exit 1 fi else release_tag="${MANUAL_TAG_NAME:-}" if [ -z "$release_tag" ]; then echo "ERROR: workflow_dispatch requires inputs.tag_name" \ "when create_draft_release=true" >&2 exit 1 fi if [[ ! "$release_tag" =~ $manual_tag_re ]]; then echo "ERROR: manual tag '$release_tag' must be an official" \ "v2.x.y tag or start with test-release-" >&2 exit 1 fi fi printf 'release_tag=%s\n' "$release_tag" >> "$GITHUB_OUTPUT" - name: Validate downloaded assets id: asset_paths run: | set -euo pipefail cd release-assets shopt -s nullglob tarballs=(liblognorm-*.tar.gz) checksums=(liblognorm-*.tar.gz.sha256) if [ "${#tarballs[@]}" -ne 1 ] || [ "${#checksums[@]}" -ne 1 ]; then echo "ERROR: expected exactly one tarball and one checksum file" >&2 find . -maxdepth 1 -type f -printf '%P\n' | sort exit 1 fi sha256sum -c "${checksums[0]}" printf 'tarball_path=%s\n' \ "release-assets/${tarballs[0]}" >> "$GITHUB_OUTPUT" printf 'checksum_path=%s\n' \ "release-assets/${checksums[0]}" >> "$GITHUB_OUTPUT" - name: Create or update draft release env: GH_TOKEN: ${{ github.token }} GH_REPO: ${{ github.repository }} RELEASE_TAG: ${{ steps.release_tag.outputs.release_tag }} TARGET_COMMITISH: ${{ github.event.inputs.target_commitish }} TARBALL_PATH: ${{ steps.asset_paths.outputs.tarball_path }} CHECKSUM_PATH: ${{ steps.asset_paths.outputs.checksum_path }} run: | set -euo pipefail cat > release-notes.md <<'EOF' This draft release includes the maintainer-built `liblognorm-*.tar.gz` source tarball produced by `make dist`. These uploaded assets are the official source-release artifacts for this draft and are distinct from GitHub's auto-generated source snapshots. EOF if gh release view "$RELEASE_TAG" >/dev/null 2>&1; then gh release edit "$RELEASE_TAG" \ --draft \ --title "$RELEASE_TAG" \ --notes-file release-notes.md else create_args=( "$RELEASE_TAG" --draft --title "$RELEASE_TAG" --notes-file release-notes.md ) if gh api \ "repos/$GITHUB_REPOSITORY/git/ref/tags/$RELEASE_TAG" \ >/dev/null 2>&1; then create_args+=(--verify-tag) else if [ "${GITHUB_EVENT_NAME}" != "workflow_dispatch" ]; then echo "ERROR: expected pushed tag '$RELEASE_TAG' to" \ "exist remotely" >&2 exit 1 fi if [ -z "${TARGET_COMMITISH:-}" ]; then echo "ERROR: inputs.target_commitish is required when" \ "creating a manual release for a missing tag" >&2 exit 1 fi create_args+=(--target "$TARGET_COMMITISH") fi gh release create "${create_args[@]}" fi gh release upload "$RELEASE_TAG" \ "$TARBALL_PATH" \ "$CHECKSUM_PATH" \ --clobber liblognorm-2.1.0/.github/workflows/run_checks.yml000066400000000000000000000356041520037563000221330ustar00rootroot00000000000000# Copyright 2026 Rainer Gerhards and Others # # https://github.com/rsyslog/rsyslog-pkg-ubuntu # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- name: check on: pull_request: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: {} jobs: compile: name: compile (${{ matrix.config }}) runs-on: ubuntu-latest timeout-minutes: 20 permissions: contents: read strategy: fail-fast: true matrix: config: [gcc, clang] steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run compile check env: MATRIX_CONFIG: ${{ matrix.config }} run: | chmod -R go+rw . export LIBLOGNORM_CONTAINER_UID="" export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' export CFLAGS='-g' case "$MATRIX_CONFIG" in gcc) export CC='gcc' ;; clang) export CC='clang' ;; *) echo "unknown configuration" exit 1 ;; esac chmod +x devtools/run-build.sh devtools/devcontainer.sh --rm devtools/run-build.sh turbo_compile: name: turbo compile (${{ matrix.config }}) runs-on: ubuntu-latest timeout-minutes: 20 permissions: contents: read strategy: fail-fast: true matrix: config: [gcc, clang] steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run Turbo compile check env: MATRIX_CONFIG: ${{ matrix.config }} run: | chmod -R go+rw . export LIBLOGNORM_CONTAINER_UID="" export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' export LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA='--enable-turbo' export CFLAGS='-g' case "$MATRIX_CONFIG" in gcc) export CC='gcc' ;; clang) export CC='clang' ;; *) echo "unknown configuration" exit 1 ;; esac chmod +x devtools/run-build.sh devtools/devcontainer.sh --rm devtools/run-build.sh CI: needs: compile if: ${{ needs.compile.result == 'success' }} permissions: contents: read runs-on: ubuntu-latest timeout-minutes: 30 strategy: fail-fast: false matrix: # TODO: re-enable ubuntu_24_ubsan after the remaining sanitizer-related # fixes have landed in the outstanding PRs. # Disabled for now: # ubuntu_24_ubsan config: [centos_8, debian_13, fedora_43, ubuntu_24, ubuntu_24_asan, ubuntu_24_distcheck] name: ${{ matrix.config }} steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run container CI pipeline env: MATRIX_CONFIG: ${{ matrix.config }} run: | chmod -R go+rw . export LIBLOGNORM_CONTAINER_UID="" export CFLAGS='-g' export LDFLAGS='' export CC='gcc' export USE_AUTO_DEBUG='off' export CI_MAKE_CHECK_EXTRA='TESTSUITEFLAGS=--stop' export CI_CHECK_CMD='check' case "$MATRIX_CONFIG" in centos_8) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_centos:8' ;; debian_13) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_debian:13' ;; fedora_43) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_fedora:43' ;; ubuntu_24) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' ;; ubuntu_24_asan) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' export CC='clang' export CFLAGS="-fstack-protector -D_FORTIFY_SOURCE=2 \ -fsanitize=address -fsanitize-address-use-after-scope \ -g -O1 -fno-omit-frame-pointer -fno-color-diagnostics" export LDFLAGS='-fsanitize=address' export ASAN_OPTIONS='abort_on_error=1:symbolize=1:detect_leaks=0' ;; # Disabled for now. Re-enable after the remaining sanitizer-related # fixes have landed in the outstanding PRs. # ubuntu_24_ubsan) # export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' # export CC='clang' # export CFLAGS="-fstack-protector -D_FORTIFY_SOURCE=2 \ # -fsanitize=undefined,nullability,unsigned-integer-overflow \ # -fno-sanitize-recover=undefined,nullability,unsigned-integer-overflow \ # -g -O1 -fno-omit-frame-pointer -fno-color-diagnostics" # export LDFLAGS='-fsanitize=undefined,nullability,unsigned-integer-overflow' # export UBSAN_OPTIONS='print_stacktrace=1' # ;; ubuntu_24_distcheck) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' export CI_CHECK_CMD='distcheck' ;; *) echo "unknown configuration" exit 1 ;; esac devtools/devcontainer.sh --rm devtools/run-ci.sh - name: Show error logs if: ${{ failure() || cancelled() }} run: | devtools/gather-check-logs.sh cat failed-tests.log turbo_CI: needs: turbo_compile if: ${{ needs.turbo_compile.result == 'success' }} permissions: contents: read runs-on: ubuntu-latest timeout-minutes: 30 strategy: fail-fast: false matrix: config: [ubuntu_24_turbo, ubuntu_24_asan_turbo] name: ${{ matrix.config }} steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run Turbo container CI pipeline env: MATRIX_CONFIG: ${{ matrix.config }} run: | chmod -R go+rw . export LIBLOGNORM_CONTAINER_UID="" export LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA='--enable-turbo' export CFLAGS='-g' export LDFLAGS='' export CC='gcc' export USE_AUTO_DEBUG='off' export CI_MAKE_CHECK_EXTRA='TESTSUITEFLAGS=--stop' export CI_CHECK_CMD='check' case "$MATRIX_CONFIG" in ubuntu_24_turbo) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' ;; ubuntu_24_asan_turbo) export LIBLOGNORM_DEV_CONTAINER='rsyslog/rsyslog_dev_base_ubuntu:24.04' export CC='clang' export CFLAGS="-fstack-protector -D_FORTIFY_SOURCE=2 \ -fsanitize=address -fsanitize-address-use-after-scope \ -g -O1 -fno-omit-frame-pointer -fno-color-diagnostics" export LDFLAGS='-fsanitize=address' export ASAN_OPTIONS='abort_on_error=1:symbolize=1:detect_leaks=0' ;; *) echo "unknown configuration" exit 1 ;; esac devtools/devcontainer.sh --rm devtools/run-ci.sh - name: Show error logs if: ${{ failure() || cancelled() }} run: | devtools/gather-check-logs.sh cat failed-tests.log arm_CI: needs: compile if: ${{ needs.compile.result == 'success' }} permissions: contents: read runs-on: ${{ matrix.runs_on }} timeout-minutes: 120 strategy: fail-fast: false matrix: include: - arch: armhf runs_on: ubuntu-24.04 platform: linux/arm/v7 qemu_platform: arm use_qemu: true - arch: arm64 runs_on: ubuntu-24.04-arm platform: linux/arm64 qemu_platform: aarch64 use_qemu: false name: ${{ matrix.arch }} CI (${{ matrix.use_qemu && 'QEMU' || 'native, asan' }}) steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Set up QEMU for ${{ matrix.arch }} emulation if: ${{ matrix.use_qemu }} uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 with: platforms: ${{ matrix.qemu_platform }} - name: Set up Docker Buildx uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 - name: Build ${{ matrix.arch }} dev image uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6 with: context: . file: devtools/ci/Dockerfile.arm platforms: ${{ matrix.platform }} tags: liblognorm-arm-dev:${{ matrix.arch }} load: true cache-from: type=gha,scope=arm-dev-${{ matrix.arch }} cache-to: type=gha,mode=max,scope=arm-dev-${{ matrix.arch }} - name: Run ${{ matrix.arch }} CI pipeline env: MATRIX_ARCH: ${{ matrix.arch }} MATRIX_PLATFORM: ${{ matrix.platform }} WORKSPACE_PATH: ${{ github.workspace }} run: | chmod -R go+rw . docker run --rm --platform "$MATRIX_PLATFORM" \ -e ARCH="$MATRIX_ARCH" \ --cap-add SYS_ADMIN \ --cap-add SYS_PTRACE \ --security-opt seccomp=unconfined \ -v "$WORKSPACE_PATH:/rsyslog" \ -w /rsyslog \ "liblognorm-arm-dev:$MATRIX_ARCH" bash -c ' set -e export CC=gcc export CFLAGS="-g -O1 -fno-omit-frame-pointer" export LDFLAGS="" export CI_MAKE_CHECK_EXTRA="TESTSUITEFLAGS=--stop" export CI_MAKE_CHECK_OPT="-j1" export CI_CHECK_CMD="check" if [ "$ARCH" = "arm64" ]; then export CFLAGS="-g -O1 -fno-omit-frame-pointer -fsanitize=address -fsanitize-address-use-after-scope" export LDFLAGS="-fsanitize=address" export ASAN_OPTIONS="abort_on_error=1:symbolize=1:detect_leaks=0:disable_coredump=0" fi devtools/run-ci.sh ' - name: Show error logs if: ${{ failure() || cancelled() }} run: | devtools/gather-check-logs.sh cat failed-tests.log clang_analyzer_CI: needs: compile if: ${{ needs.compile.result == 'success' }} permissions: contents: read runs-on: ubuntu-latest timeout-minutes: 30 name: clang static analyzer steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false fetch-depth: 0 - name: Run clang static analyzer id: run-clang env: LIBLOGNORM_CONTAINER_UID: "" LIBLOGNORM_DEV_CONTAINER: rsyslog/rsyslog_dev_base_ubuntu:24.04 SCAN_BUILD: scan-build SCAN_BUILD_CC: clang SCAN_BUILD_REPORT_DIR: scan-build-report DOCKER_RUN_EXTRA_OPTS: >- -e SCAN_BUILD -e SCAN_BUILD_CC -e SCAN_BUILD_REPORT_DIR run: | chmod -R go+rw . chmod +x devtools/run-static-analyzer.sh set +e devtools/devcontainer.sh --rm devtools/run-static-analyzer.sh 2>&1 | tee clang-analyzer.log echo "exitcode=${PIPESTATUS[0]}" >> "$GITHUB_OUTPUT" - name: Upload clang static analyzer report if: ${{ always() }} id: upload-report uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: clang-static-analyzer-report path: scan-build-report retention-days: 7 if-no-files-found: ignore - name: Show clang static analyzer report link if: ${{ always() }} env: ARTIFACT_URL: ${{ steps.upload-report.outputs.artifact-url }} run: | artifact="$ARTIFACT_URL" cat >> "$GITHUB_STEP_SUMMARY" <&2 tail -n 200 clang-analyzer.log >&2 || true echo >&2 echo "Clang static analyzer HTML report (download): $artifact" >&2 exit 1 rsyslog_integration_CI: needs: [compile, CI, clang_analyzer_CI] if: >- ${{ needs.compile.result == 'success' && needs.CI.result == 'success' && needs.clang_analyzer_CI.result == 'success' }} permissions: contents: read runs-on: ubuntu-latest timeout-minutes: 45 name: rsyslog integration steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run rsyslog integration tests env: LIBLOGNORM_CONTAINER_UID: "" LIBLOGNORM_DEV_CONTAINER: rsyslog/rsyslog_dev_base_ubuntu:24.04 run: | # This is a downstream-consumer check, not another liblognorm unit test. # We only run it after the core compile, static-analyzer, and liblognorm # check jobs have all succeeded so failures here are much more likely to be # real integration regressions than basic build/test breakage. # # The helper script intentionally keeps rsyslog narrow: build against the # candidate liblognorm and run only the mmnormalize/pmnormalize tests. chmod -R go+rw . chmod +x devtools/run-rsyslog-integration.sh devtools/devcontainer.sh --rm devtools/run-rsyslog-integration.sh liblognorm-2.1.0/.github/workflows/shellcheck.yml000066400000000000000000000025631520037563000221120ustar00rootroot00000000000000--- name: shellcheck on: pull_request: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: contents: read jobs: lint: name: shellcheck runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false fetch-depth: 2 - name: Identify changed shell files id: changed uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46 with: separator: "\n" files: | ai/*.sh tests/*.sh - name: Install shellcheck if: steps.changed.outputs.any_changed == 'true' run: sudo apt-get update && sudo apt-get install --yes shellcheck - name: Run shellcheck if: steps.changed.outputs.any_changed == 'true' env: CHANGED_SHELL_FILES: ${{ steps.changed.outputs.all_changed_files }} run: | printf '%s\n' "$CHANGED_SHELL_FILES" | sed 's/\\$//' > changed-shell-files.txt mapfile -t files < changed-shell-files.txt shellcheck "${files[@]}" - name: Skip shellcheck, no shell changes if: steps.changed.outputs.any_changed != 'true' run: echo "No shell files modified" liblognorm-2.1.0/.github/workflows/yaml_lint.yml000066400000000000000000000040261520037563000217710ustar00rootroot00000000000000# Copyright 2026 Rainer Gerhards and Others # # https://github.com/rsyslog/rsyslog-pkg-ubuntu # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. --- name: yamllint check on: pull_request: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: contents: read jobs: lint: name: yamllint runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false fetch-depth: 2 - name: Identify changed YAML files id: changed uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46 with: separator: "\n" files: | **/*.yml **/*.yaml - name: Install yamllint if: steps.changed.outputs.any_changed == 'true' run: pip install yamllint - name: Run yamllint if: steps.changed.outputs.any_changed == 'true' env: CHANGED_YAML_FILES: ${{ steps.changed.outputs.all_changed_files }} run: | printf '%s\n' "$CHANGED_YAML_FILES" | sed 's/\\$//' > changed-yaml-files.txt mapfile -t files < changed-yaml-files.txt yamllint \ -d '{extends: relaxed, rules: {line-length: {max: 120}}}' \ "${files[@]}" - name: Skip yamllint, no YAML changes if: steps.changed.outputs.any_changed != 'true' run: echo "No YAML files modified" liblognorm-2.1.0/.github/workflows/zizmor.yml000066400000000000000000000016311520037563000213320ustar00rootroot00000000000000--- name: GitHub Actions Security Analysis with zizmor on: pull_request: push: branches: [ "main" ] paths: - ".github/**" schedule: - cron: "37 3 * * 2" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: {} jobs: zizmor: name: zizmor runs-on: ubuntu-latest timeout-minutes: 10 permissions: contents: read steps: - name: Checkout repository uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: persist-credentials: false - name: Run zizmor uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3 with: advanced-security: false annotations: true inputs: .github online-audits: false persona: auditor version: v1.24.1 liblognorm-2.1.0/.gitignore000066400000000000000000000012541520037563000156510ustar00rootroot00000000000000INSTALL Makefile Makefile.in aclocal.m4 autom4te.cache compile config.guess config.h config.h.in config.h.in~ config.log config.status config.sub configure depcomp install-sh libtool lognorm.pc ltmain.sh m4/libtool.m4 m4/ltoptions.m4 m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 missing stamp-h1 test-driver *.trs *.log test.out src/ln_test src/lognormalizer src/lognorm-features.h tests/options.sh tests/json_eq tests/err_callback_cookie tests/turbo_test_arena tests/turbo_test_result tests/turbo_test_simd tests/turbo_test_vm tests/turbo_test_json configure~ .deps/ .libs/ compat/.deps compat/.libs doc/_build/ *.la *.lo *.o # Local machine-specific Codex notes AGENTS.local.md liblognorm-2.1.0/.travis.yml000066400000000000000000000044421520037563000157740ustar00rootroot00000000000000#group: deprecated language: c compiler: - gcc - clang matrix: exclude: - compiler: gcc - compiler: clang include: - sudo: required compiler: gcc - sudo: required compiler: gcc dist: trusty # We disable 12.04 because we have a header issue here w/ clang #- sudo: required # compiler: clang - sudo: required compiler: clang dist: trusty before_install: - travis_retry sudo apt-get update -qq - travis_retry sudo apt-get install -qq libpcre3-dev libpcre3-dbg valgrind python-pip - travis_retry sudo pip install -U sphinx - travis_retry sudo add-apt-repository ppa:adiscon/v8-stable -y - travis_retry sudo apt-get update -qq install: - travis_retry sudo apt-get install -qq libestr-dev libfastjson-dev - travis_retry sudo apt-get install -qq clang # the following is a work-around to solve the # "too old autoconf-archive" problem - mkdir tmp - cd tmp - git clone git://git.sv.gnu.org/autoconf-archive.git - sudo cp autoconf-archive/m4/* /usr/share/aclocal - cd .. - rm -rf tmp script: - CI/check_codestyle.sh - autoreconf --force --verbose --install # note: we enable regexp to check the v1 compatibility layer. v2 does # not have it, nor is it recommended to enable it. - if [ "$CC" == "gcc" ] ; then ./configure --prefix=/opt/liblognorm --build=x86_64-pc-linux-gnu --host=x86_64-pc-linux-gnu --mandir=/usr/share/man --infodir=/usr/share/info --datadir=/usr/share --sysconfdir=/etc --localstatedir=/var/lib --disable-dependency-tracking --libdir=/usr/lib64 --enable-debug --enable-testbench --enable-docs --enable-regexp --enable-valgrind; fi - if [ "$CC" == "gcc" ] ; then make && make dist && make check && sudo make install; fi # here come the clang test. So far, we just call the static analyzer - if [ "$CC" == "clang" ] ; then export CFLAGS="-Werror -Wfatal-errors -std=c99" ; fi - if [ "$CC" == "clang" ] ; then scan-build ./configure --prefix=/opt/liblognorm --build=x86_64-pc-linux-gnu --host=x86_64-pc-linux-gnu --mandir=/usr/share/man --infodir=/usr/share/info --datadir=/usr/share --sysconfdir=/etc --localstatedir=/var/lib --disable-dependency-tracking --libdir=/usr/lib64 --enable-debug --enable-testbench --enable-regexp ; fi - if [ "$CC" == "clang" ] ; then scan-build --status-bugs make ; fi liblognorm-2.1.0/AGENTS.md000066400000000000000000000073221520037563000151660ustar00rootroot00000000000000# AI Agent Context for liblognorm ## Local Overlay Before starting work in this repository, read `AGENTS.local.md` if it exists. That file contains machine- and workflow-specific instructions that are not duplicated here. ## Project Overview **liblognorm** is a fast, samples-based log normalization library. It parses log messages (strings) into structured JSON objects based on a set of rules. ## Critical Context > [!IMPORTANT] > **READ THIS FIRST**: The core engine logic is documented in `doc/pdag_implementation_model.rst`. This file contains critical "implementation deltas" that explain how the actual C code differs from the theoretical design (Master's thesis). **Ignoring this file will lead to incorrect assumptions about the parser's behavior.** ## Key Directories - `src/`: Source code. - `doc/`: Documentation (Sphinx reStructuredText). - `tests/`: Test suite. ## Architecture Map For a detailed breakdown of which files implement which concepts, see: [doc/ai_architecture_map.md](doc/ai_architecture_map.md) ## Coding Standards & Constraints 1. **C99**: The codebase is written in C99. 2. **Memory Management**: Be careful with memory. The PDAG engine relies on specific ownership rules (see `pdag_implementation_model.rst`). 3. **Thread Safety**: The library is designed to be thread-safe. 4. **Error Handling**: Uses `goto done` pattern for cleanup and error propagation. ## Common Tasks - **Adding a new parser**: See `src/parser.c`. Use the `PARSER_Parse`, `PARSER_Construct`, etc. macros. - **Debugging**: The library has a debug callback system (`ln_setDebugCB`). ## Parser Change Guidance 1. **Check the Whole Parser Family**: If you touch a parser implementation in `src/parser.c` or the legacy v1 parser files, search for the parser name in `tests/` and run all matching tests. This includes `*_jsoncnf.sh`, `*_v1.sh`, and terminator/edge-case variants when present. 2. **Watch for v1/v2 Split**: Some parser names exist in both the modern PDAG parser code (`src/parser.c`) and the legacy v1 parser code (`src/v1_parser.c`, `src/v1_samp.c`, or `src/v1_parser.h`). When changing parser behavior, ensure you have identified and validated all relevant code paths if they exist. 3. **Do Not Test During Relink**: Never run parser tests against `src/ln_test` while `make -C src ln_test` is still compiling or relinking. Wait for the build command to finish before executing any tests. 4. **Prefer Family Validation Over Single Repros**: A local reproduction for one failing sample is not sufficient validation for parser work. Run the full parser-specific test family before committing or opening a PR. Prefer `ai/run-parser-family.sh ` when it fits the change. ## Validation Ladder 1. **Direct Repro**: Use a direct `ln_test` reproduction while isolating one specific parser failure or edge case. 2. **Parser Family**: For parser behavior changes, run `ai/run-parser-family.sh ` when a matching family exists. 3. **Broader Coverage**: If the change touches shared parser plumbing, shell harness logic, or build/test infrastructure, run a broader targeted subset or `make check` before opening a PR. ## Commit Rules 1. **Contextual Messages**: Commit messages must explain *why* a change was made, not just *what* changed. Relate changes to the project strategy (e.g., "AI first strategy") where applicable. 2. **Attribution**: All AI-assisted commits must include the following footer: `With the help of AI Agent: ` 3. **Atomic Changes**: Separate documentation updates from code changes when possible, or group them logically if they are tightly coupled. 4. **Relative Paths**: Always use relative paths in documentation (e.g., `doc/file.md`, not `file:///...`) to ensure portability across environments. liblognorm-2.1.0/AUTHORS000066400000000000000000000000661520037563000147310ustar00rootroot00000000000000Rainer Gerhards , Adiscon GmbH liblognorm-2.1.0/CI/000077500000000000000000000000001520037563000141525ustar00rootroot00000000000000liblognorm-2.1.0/CI/check_codestyle.sh000077500000000000000000000004041520037563000176370ustar00rootroot00000000000000#!/bin/bash mkdir _tmp_stylecheck cd _tmp_stylecheck git clone https://github.com/rsyslog/codestyle cd codestyle gcc --std=c99 stylecheck.c -o stylecheck cd ../.. find . -name "*.[ch]" | xargs _tmp_stylecheck/codestyle/stylecheck -w -f -l 120 rm -rf codestyle liblognorm-2.1.0/COPYING000066400000000000000000000626201520037563000147200ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. liblognorm, a fast samples-based log normalization library Copyright (C) 2010 Rainer Gerhards This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA liblognorm-2.1.0/COPYING.ASL20000066400000000000000000000216611520037563000155000ustar00rootroot00000000000000Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: 1. You must give any other recipients of the Work or Derivative Works a copy of this License; and 2. You must cause any modified files to carry prominent notices stating that You changed the files; and 3. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and 4. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. liblognorm-2.1.0/ChangeLog000066400000000000000000000445061520037563000154420ustar00rootroot00000000000000---------------------------------------------------------------------- Version 2.1.0, 2026-05-07 - add TurboVM bytecode engine for high-performance log parsing Compiles rulebases into optimized bytecode executed by a linear VM with SIMD-accelerated parsing primitives (SSE4.2 on x86-64, NEON on ARM64, scalar fallback for other architectures). Key features: - arena-based allocation (single malloc per message, O(1) reset) - fast result structure (typed fields, zero JSON overhead) - snapshot support for async consumers (rsyslog worker queues) - automatic fallback to recursive walker on compilation failure New API: ln_turbo_normalize(), ln_fast_result_get_string/int(), ln_turbo_snapshot_result(). New context option: LN_CTXOPT_TURBO. Enabled via: ./configure --enable-turbo Contributed by Jérémie Jourdin / Advens. - pcre2 used instead of pcre (which is no longer supported) - add a parameter skipempty to the json field type. If skipempty is set as follows, empty json objects are dropped from the parsed result. %field_name:json:skipempty% If any parameter other than "skipempty" is given ("bogus" in this example), an error message "invalid flag for JSON parser: bogus" is issued. Thanks to Noriko Hosoi for the patch. - json parser: add optional "skipempty" flag This removes empty strings, empty arrays and empty objects from parsed JSON content before it is attached to the result tree. Thanks to Noriko Hosoi for the original patch. Follow-up work to rebase, document, and align validation tests was done by Adiscon. NOTE: This is a potential backward-compatibility change for malformed rulebases that used extra json parser flags such as "%field:json:bogus%". Such invalid flags were previously ignored and are now rejected during rule loading. - Op-quoted-string : Add support for escaped characters Thanks to Kevin Guillemot for the patch. Some follow up work was done by Adiscon. - parser(quoted-string): return the extracted value without the surrounding quote characters. - parser(name-value-list): add ignore_whitespaces option and accept trailing empty trimmed values. - parser(repeat): allow one named dot among unnamed parsers. - fixed error callback cookie - the wrong one was provided - hardening: fix empty-tag rulebase crash, repeat while validation crash, parser EOF handling, and harden GitHub Actions security. - improved CI with additional checks, including sanitizers and an rsyslog integration test - introduced doc for AI agents, e.g. AGENTS.md ---------------------------------------------------------------------- Version 2.0.9, 2025-12-16 - fix build issue on some platforms - guard compile pragma use - remove unneeded variable - fix memory leak when a custom type in rules does not match Thanks to Meric Sentunali for the fix and Julian Thomas for alerting me of the missing merge. NOTE: Release of 2.0.8, accidentally did not include this fix. Thanks for Julien Thomas for alerting us! ---------------------------------------------------------------------- Version 2.0.8, 2025-12-04 - fix potential segfault on some platforms Thanks to Julian Thomas for a fix ---------------------------------------------------------------------- Version 2.0.7, 2025-10-14 - parser(name-value-list): add separator character option adds new option "separator" for custom key/value pair separation (replaces whitespace when defined) - parser(name-value-list): add assignator option adds new option "assignator" for custom key/value assignment character (replaces '=' when defined, disables key name validation) - parser(name-value-list): add support for quoting and escaped characters values can now be quoted and include escaped characters such as backslash, separator, or double-quote - parser(name-value-list): fix parsing of escaped characters fixes incorrect handling of backslash, double-quote, and separator characters in quoted values - parser(name-value-list): add test for quoted values - parser(name-value-list): improve handling of invalid pairs stops parsing and sets offset if name/value is not valid, so remaining text can still be parsed - parser(CEF): fix empty last value handling if the last field is empty, it is now set to an empty string instead of failing - parser(CEF): fix header and escape sequence handling improved handling of slashes and trailing spaces in header parsing - CI: add GitHub Actions workflow for CI runs - code cleanup: removed problematic language from code base - maintenance: fixed various misspellings in code and comments - string rulebase bugfix: segfault when using LF in json rule If a json rule used a LF inside a string rule (one not loaded rulebase file), liblognorm segfaults. closes https://github.com/rsyslog/liblognorm/issues/333 ---------------------------------------------------------------------- Version 2.0.6, 2018-11-06 - implement Checkpoint LEA transfer format ... at least if we guess right that this is the format name. This type of format seems to be seen in syslog message. Checkpoint does not provide a spec, so everything is guesswork... :-( closes https://github.com/rsyslog/liblognorm/issues/309 - made build on AIX Thanks to Philippe Duveau for the patch. - fixes and improvements in bash scripting mostly based on shellcheck recommandations (via CodeFactor.com) - string parser: add "lazy" matching mode This introduces paramter "matching.lazy". See doc for details. - bugfix: suppress invalid param error for field name "-" Suppress invalid param error for name for hexnumber, float, number, date-rfc3164 and date-rfc5424. It will just check if name is "-" to make sure that we only suppress the error message in case we do not want to capture something. Thanks to Sol Huebner for the patch. closes https://github.com/rsyslog/liblognorm/issues/270 - bugfix: cisco-interface-spec did not succeed when at end of line Thanks to Sol Huebner for the patch. closes https://github.com/rsyslog/liblognorm/issues/229 ---------------------------------------------------------------------- Version 2.0.5, 2018-04-26 - bugfix: es_str2cstr leak in string-to v1 parser Thanks to Harshvardhan Shrivastava for the patch. - make "make check" "succeed" on solaris 10 actually, we just ignore the CI failures so that OpenCSW can build new packages. The problems actually exist on that platform, but testing has shown they always existed. We currently run out of time to really fixing this, plus we never had any bug report on Solaris (I assme nobody uses it on Solaris 10). However, that issues is a blocker to make new rsyslog versions available on OpenCSW for Solaris 10, so we go the dirty way of pretenting there is no problem. Note: the issues was orignally not seen, as the failing tests have been added later on. So the problem was always there, just not visible. - some mostly cosmetic fixes detected by Coverity Scan e. g. memory leak if and only if system was completely out of memory ---------------------------------------------------------------------- Version 2.0.4, 2017-10-04 - added support for native JSON number formats supported by parsers: number, float, hex - added support for creating unix timestamps supported by parsers: date-rfc3164, date-rfc5424 - fixed build problems on Solaris ... but there still seem to be some code issues, manifested in testbench failures. So use with care! ---------------------------------------------------------------------- Version 2.0.3, 2017-03-22 - add ability to load rulebase from a string introduces new API: int ln_loadSamplesFromString(ln_ctx ctx, const char *string); closes https://github.com/rsyslog/liblognorm/issues/239 - bugfix: string parser did not correctly parse word at end of line - bugfix: literal parser does not always store value if name is specified if rule=:%{"type":"literal", "text":"a", "name":"var"}% is used and matching message is provided, variable var ist not persisted. see also http://lists.adiscon.net/pipermail/rsyslog/2016-December/043985.html ---------------------------------------------------------------------- Version 2.0.2, 2016-11-15 - bugfix: no error was emitted on invalid "annotate" line - "annnotate": permit inline comments - fix a problem with cross-compilation see also: https://github.com/rsyslog/liblognorm/pull/221 Thanks to Luca Boccassi for the patch - testbench: add test for "annotate" functionality - bugfix: abort in literal path compaction when useing "alternative" parser When using the "alternative" parser, literals nodes could be created with multiple reference count. This is valid. However, literal path compaction did not consider this case, and so "merged" these nodes, which lead to pdag corruption and quickly to segfault. closes https://github.com/rsyslog/liblognorm/issues/220 closes https://github.com/rsyslog/liblognorm/issues/153 - bugfix: lognormalizer could loop This also caused the testbench to fail on some platforms. due too incorrect data type Thanks to Michael Biebl for this fix. - fix misleading compiler warning Thanks to Michael Biebl for this fix. - testbench: add test for "annotate" functionality ---------------------------------------------------------------------- Version 2.0.1, 2016-08-01 - fix public headers, which invalidly contained a strndup() definition Thanks to Michael Biebl for this fix. - fix some issues in pkgconfig file Thanks to Michael Biebl for this fix. - enhance build system to natively support systems with older autoconf versions and/or missing autoconf-archive. In this case we gracefully degrade functionality, but the build still is possible. Among others, this enables builds on CentOS 5. ---------------------------------------------------------------------- Version 2.0.0, 2016-07-21 - completely rewritten, much feature-enhanced version - requires libfastjson instead of json-c - big improvements to testbench runs, especially on travis among others, the static analyzer is now run and testbench throws an error if the static analyzer (via clang) is not clean - lognormalizer tool can now handle lines larger 10k characters Thanks to Janmejay Singh for the patch ---------------------------------------------------------------------- Version 1.1.3, 2015-??-?? [no official release] - make work on Solaris - check for runaway rules. A runaway rule is one that has unmatched percent signs and thus is not terminated properly at its end. This also means we no longer accept "rule=" at the first column of a continuation line, which is no problem (see doc for more information). - fix: process last line if it misses the terminating LF This problem occurs with the very last line of a rulebase (at EOF). If it is not properly terminated (LF missing), it is silently ignored. Previous versions did obviously process lines in this case. While technically this is invalid input, we can't outrule that such rulebases exist. For example, they do in the rsyslog testbench, which made us aware of the problem (see https://github.com/rsyslog/rsyslog/issues/489 ) I think the proper way of addressing this is to process such lines without termination, as many other tools do as well. closes https://github.com/rsyslog/liblognorm/issues/135 ---------------------------------------------------------------------- Version 1.1.2, 2015-07-20 - permit newline inside parser definition - new parser "cisco-interface-spec" - new parser "json" to process json parts of the message - new parser "mac48" to process mac layer addresses - new parser "name-value-list" (currently inofficial, experimental) - some parsers did incorrectly report success when an error occurred this was caused by inconsistencies between various macros. We have changed the parser-generation macros to match the semantics of the broader CHKN/CHKR macros and also restructured/simplified the parser generation macros. closes https://github.com/rsyslog/liblognorm/issues/41 - call "rest" parser only if nothing else matches. Versions prior to 1.1.2 did execute "rest" during regular parser processing, and thus parser matches have been more or less random. With 1.1.2 this is now always the last parser called. This may cause problems with existing rulesets, HOWEVER, adding any other rule or changing the load order would also have caused problems, so there really is no compatibility to preserve. see also: https://rainer.gerhards.net/2015/04/liblognorms-rest-parser-now-more-useful.html - new API to support error callbacks This permits callers to forward messages in regard to e.g. wrong rule bases to their users, which is very useful and actually missing in the previous code base. So far, we only have few error messages. However, we will review the code and add more. The important part is that callers can begin to use the new API and thus will benefit when we add more error messages. - testbench is now enabled by default - bugfix: misadressing on some constant values see also https://github.com/rsyslog/liblognorm/pull/67 Thanks to github user ontholerian for the patch - bugfix: add missing function prototypes This could potentially lead to problems on some platforms, especially those with 64 bit pointers. ---------------------------------------------------------------------- Version 1.1.1, 2015-03-09 - fixed library version numbering Thanks to Tomas Heinreich for reporting the problem. - added new parser syntaxes Thanks to Janmejay Singh for implementing most of them. - bugfix: function ln_parseFieldDescr() returns state value due to unitialized variable. This can also lead to invalid returning no sample node where one would have to be created. ---------------------------------------------------------------------- Version 1.1.0, 2015-01-08 - added regular expression support use this feature with great care, as it thrashes performance Thanks to Janmejay Singh for implementing this feature. - fix build problem when --enable-debug was set closes: https://github.com/rsyslog/liblognorm/issues/5 ---------------------------------------------------------------------- Version 1.0.1, 2014-04-11 - improved doc (via RST/Sphinx) - bugfix: unparsed fields were copied incorrectly from non-terminated string. Thanks to Josh Blum for the fix. - bugfix: mandatory tag did not work in lognormalizer ---------------------------------------------------------------------- Version 1.0.0, 2013-11-28 - WARNING: this version has incompatible interface and older programs will not compile with it. For details see http://www.liblognorm.com/news/on-liblognorm-1-0-0/ - libestr is not used any more in interface functions. Traditional C strings are used instead. Internally, libestr is still used, but scheduled for removal. - libee is not used any more. JSON-C is used for object handling instead. Parsers and formatters are now part of liblognorm. - added new field type "rest", which simply sinks all up to end of the string. - added support for glueing two fields together, without literal between them. It allows for constructs like: %volume:number%%unit:word% which matches string "1000Kbps" - Fix incorrect merging of trees with empty literal at end Thanks to Pavel Levshin for the patch - this version has survived many bugfixes ---------------------------------------------------------------------- ================================================================================ The versions below is liblognorm0, which has a different API ================================================================================ ---------------------------------------------------------------------- Version 0.3.7, 2013-07-17 - added support to load single samples Thanks to John Hopper for the patch ---------------------------------------------------------------------- Version 0.3.6, 2013-03-22 - bugfix: unitialized variable could lead to rulebase load error ---------------------------------------------------------------------- Version 0.3.5 (rgerhards), 2012-09-18 - renamed "normalizer" tool to "lognormalizer" to solve name clashes Thanks to the Fedora folks for pointing this out. ---------------------------------------------------------------------- Version 0.3.4 (rgerhards), 2012-04-16 - bugfix: normalizer tool had a memory leak Thanks to Brian Know for alerting me and helping to debug ---------------------------------------------------------------------- Version 0.3.3 (rgerhards), 2012-02-06 - required header file was not installed, resulting in compile error closes: http://bugzilla.adiscon.com/show_bug.cgi?id=307 Thanks to Andreis Vinogradovs for alerting us on this bug. ---------------------------------------------------------------------- Version 0.3.2 (rgerhards), 2011-11-21 - added rfc5424 parser (requires libee >= 0.3.2) - added "-" to serve as name for filler fields. Value is extracted, but no field is written - special handling for iptables log via %iptables% parser added (currently experimental pending practical verification) - normalizer tool on its way to a full-blow stand-alone tool - support for annotations added, for the time being see https://rainer.gerhards.net/2011/11/log-annotation-with-liblognorm.html ---------------------------------------------------------------------- Version 0.3.1 (rgerhards), 2011-04-18 - added -t option to normalizer so that only messages with a specified tag will be output - bugfix: abort if a tag was assigned to a message without any fields parsed out (uncommon scenario) - bugfix: mem leak on parse tree destruct -- associated tags were not deleted - bugfix: potential abort in normalizer due to misadressing in debug message generation ---------------------------------------------------------------------- Version 0.3.0 (rgerhards), 2011-04-06 - support for message classification via tags added - bugfix: partial messages were invalidly matched closes: http://bugzilla.adiscon.com/show_bug.cgi?id=247 ---------------------------------------------------------------------- Version 0.2.0 (rgerhards), 2011-04-01 - added -E option to normalizer tool, permits to specify data for encoders - support for new libee parsers: * Time12hr * Time24hr * ISODate * QuotedString - support for csv encoding added - added -p option to normalizer tool (output only correctly parsed entries) - bugfix: segfault if a parse tree prefix had exactly buffer size, in which case it was invalidly assumed that an external buffer had been allocated ---------------------------------------------------------------------- Version 0.1.0 (rgerhards), 2010-12-09 Initial public release. liblognorm-2.1.0/Doxyfile000066400000000000000000002110251520037563000153660ustar00rootroot00000000000000# Doxyfile 1.7.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # http://www.gnu.org/software/libiconv for the list of possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = liblognorm # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or # if some version control system is used. PROJECT_NUMBER = 1.0.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = /home/rger/proj/liblognorm/doc # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to JavaDoc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a JavaDoc-style # comment as the brief description. If set to NO, the JavaDoc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 8 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for # Java. For instance, namespaces will be presented as packages, qualified # scopes will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources only. Doxygen will then generate output that is more tailored for # Fortran. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for # VHDL. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given extension. # Doxygen has a built-in mapping, but you can override or extend it using this # tag. The format is ext=language, where ext is a file extension, and language # is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, # C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. EXTENSION_MAPPING = # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate getter # and setter methods for a property. Setting this option to YES (the default) # will make doxygen to replace the get and set methods by a property in the # documentation. This will only work if the methods are indeed getting or # setting a simple type. If this is not the case, or you want to show the # methods anyway, you should set this option to NO. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the default) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum # is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically # be useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. TYPEDEF_HIDES_STRUCT = NO # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to # determine which symbols to keep in memory and which to flush to disk. # When the cache is full, less often used symbols will be written to disk. # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time # causing a significant performance penality. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on # a logarithmic scale so increasing the size by one will rougly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols SYMBOL_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base # name of the file that contains the anonymous namespace. By default # anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = YES # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation # rather than with sharp brackets. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen # will sort the (brief and detailed) documentation of class members so that # constructors and destructors are listed first. If set to NO (the default) # the constructors will appear in the respective orders defined by # SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. # This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO # and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the # hierarchy of group names into alphabetical order. If set to NO (the default) # the group names will appear in their defined order. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # If the sources in your project are distributed over multiple directories # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy # in the documentation. The default is NO. SHOW_DIRECTORIES = NO # Set the SHOW_FILES tag to NO to disable the generation of the Files page. # This will remove the Files entry from the Quick Index and from the # Folder Tree View (if specified). The default is YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the # Namespaces page. This will remove the Namespaces entry from the Quick Index # and from the Folder Tree View (if specified). The default is YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. The create the layout file # that represents doxygen's defaults, run doxygen with the -l option. # You can optionally specify a file name after the option, if omitted # DoxygenLayout.xml will be used as the name of the layout file. LAYOUT_FILE = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = /home/rger/proj/liblognorm/src # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is # also the default input encoding. Doxygen uses libiconv (or the iconv built # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for # the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.d \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.idl \ *.odl \ *.cs \ *.php \ *.php3 \ *.inc \ *.m \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.vhd \ *.vhdl # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentation. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = YES # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # If the HTML_TIMESTAMP tag is set to YES then the generated HTML # documentation will contain the timesstamp. HTML_TIMESTAMP = YES # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the stylesheet and background images # according to this color. Hue is specified as an angle on a colorwheel, # see http://en.wikipedia.org/wiki/Hue for more information. # For instance the value 0 represents red, 60 is yellow, 120 is green, # 180 is cyan, 240 is blue, 300 purple, and 360 is red again. # The allowed range is 0 to 359. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of # the colors in the HTML output. For a value of 0 the output will use # grayscales only. A value of 255 will produce the most vivid colors. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to # the luminance component of the colors in the HTML output. Values below # 100 gradually make the output lighter, whereas values above 100 make # the output darker. The value divided by 100 is the actual gamma applied, # so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, # and 100 does not change the gamma. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting # this to NO can help when comparing the output of multiple runs. HTML_TIMESTAMP = YES # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, # files or namespaces will be aligned in HTML using tables. If set to # NO a bullet list will be used. HTML_ALIGN_MEMBERS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = NO # If the GENERATE_DOCSET tag is set to YES, additional index files # will be generated that can be used as input for Apple's Xcode 3 # integrated development environment, introduced with OSX 10.5 (Leopard). # To create a documentation set, doxygen will generate a Makefile in the # HTML output directory. Running make will produce the docset in that # directory and running "make install" will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find # it at startup. # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. GENERATE_DOCSET = NO # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the # feed. A documentation feed provides an umbrella under which multiple # documentation sets from a single provider (such as a company or product suite) # can be grouped. DOCSET_FEEDNAME = "Doxygen generated docs" # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that # should uniquely identify the documentation set bundle. This should be a # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen # will append .docset to the name. DOCSET_BUNDLE_ID = org.doxygen.Project # When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compiled HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING # is used to encode HtmlHelp index (hhk), content (hhc) and project file # content. CHM_INDEX_ENCODING = # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated # that can be used as input for Qt's qhelpgenerator to generate a # Qt Compressed Help (.qch) of the generated HTML documentation. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can # be used to specify the file name of the resulting .qch file. # The path specified is relative to the HTML output folder. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#namespace QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating # Qt Help Project output. For more information please see # http://doc.trolltech.com/qthelpproject.html#virtual-folders QHP_VIRTUAL_FOLDER = doc # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to # add. For more information please see # http://doc.trolltech.com/qthelpproject.html#custom-filters QHP_CUST_FILTER_NAME = # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see # # Qt Help Project / Custom Filters. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's # filter section matches. # # Qt Help Project / Filter Attributes. QHP_SECT_FILTER_ATTRS = # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can # be used to specify the location of Qt's qhelpgenerator. # If non-empty doxygen will try to run qhelpgenerator on the generated # .qhp file. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files # will be generated, which together with the HTML files, form an Eclipse help # plugin. To install this plugin and make it available under the help contents # menu in Eclipse, the contents of the directory containing the HTML and XML # files needs to be copied into the plugins directory of eclipse. The name of # the directory within the plugins directory should be the same as # the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before # the help appears. GENERATE_ECLIPSEHELP = NO # A unique identifier for the eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have # this name. ECLIPSE_DOC_ID = org.doxygen.Project # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. # If the tag value is set to YES, a side panel will be generated # containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). # Windows users are probably better off using the HTML help feature. GENERATE_TREEVIEW = NO # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, # and Class Hierarchy pages using a tree view instead of an ordered list. USE_INLINE_TREES = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open # links to external symbols imported via tag files in a separate window. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of Latex formulas included # as images in the HTML documentation. The default is 10. Note that # when you change the font size after a successful doxygen run you need # to manually remove any form_*.png images from the HTML output directory # to force them to be regenerated. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are # not supported properly for IE 6.0, but are supported on all modern browsers. # Note that when changing this option you need to delete any form_*.png files # in the HTML output before the changes have effect. FORMULA_TRANSPARENT = YES # When the SEARCHENGINE tag is enabled doxygen will generate a search box # for the HTML output. The underlying search engine uses javascript # and DHTML and should work on any modern browser. Note that when using # HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets # (GENERATE_DOCSET) there is already a search function so this one should # typically be disabled. For large projects the javascript based search engine # can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. SEARCHENGINE = NO # When the SERVER_BASED_SEARCH tag is enabled the search engine will be # implemented using a PHP enabled web server instead of at the web client # using Javascript. Doxygen will generate the search PHP script and index # file to put on the web server. The advantage of the server # based approach is that it scales better to large projects and allows # full text search. The disadvances is that it is more difficult to setup # and does not have live searching capabilities. SERVER_BASED_SEARCH = NO #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = NO # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. If left blank `latex' will be used as the default command name. # Note that when enabling USE_PDFLATEX this option is only used for # generating bitmaps for formulas in the HTML output, but not in the # Makefile that is written to the output directory. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for LaTeX. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # LaTeX documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = NO # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = a4wide # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal LaTeX header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated LaTeX files. This will instruct LaTeX to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO # If LATEX_SOURCE_CODE is set to YES then doxygen will include # source code with syntax highlighting in the LaTeX output. # Note that which sources are shown also depends on other settings # such as SOURCE_BROWSER. LATEX_SOURCE_CODE = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # The XML_SCHEMA tag can be used to specify an XML schema, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_SCHEMA = # The XML_DTD tag can be used to specify an XML DTD, # which can be used by a validating XML parser to check the # syntax of the XML files. XML_DTD = # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an AutoGen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and LaTeX code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = NO # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the # documentation. The MSCGEN_PATH tag allows you to specify the directory where # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is # allowed to run in parallel. When set to 0 (the default) doxygen will # base this on the number of processors available in the system. You can set it # explicitly to a value larger than 0 to get control over the balance # between CPU load and processing speed. DOT_NUM_THREADS = 0 # By default doxygen will write a font called FreeSans.ttf to the output # directory and reference it in all dot files that doxygen generates. This # font does not include all possible unicode characters however, so when you need # these (or just want a differently looking font) you can specify the font name # using DOT_FONTNAME. You need need to make sure dot is able to find the font, # which can be done by putting it in a standard location or by setting the # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory # containing the font. DOT_FONTNAME = FreeSans.ttf # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. # The default size is 10pt. DOT_FONTSIZE = 10 # By default doxygen will tell dot to use the output directory to look for the # FreeSans.ttf font (which doxygen will put there itself). If you specify a # different font using DOT_FONTNAME you can set the path where dot # can find it using this tag. DOT_FONTPATH = # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH and HAVE_DOT options are set to YES then # doxygen will generate a call dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable call graphs # for selected functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then # doxygen will generate a caller dependency graph for every global function # or class method. Note that enabling this option will significantly increase # the time of a run. So in most cases it will be better to enable caller # graphs for selected functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the # number of direct children of the root node in a graph is already larger than # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, because dot on Windows does not # seem to support this out of the box. Warning: Depending on the platform used, # enabling this option may lead to badly anti-aliased labels on the edges of # a graph (i.e. they become hard to read). DOT_TRANSPARENT = NO # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES liblognorm-2.1.0/Makefile.am000066400000000000000000000003641520037563000157160ustar00rootroot00000000000000SUBDIRS = compat src tools if ENABLE_DOCS SUBDIRS += doc endif EXTRA_DIST = rulebases \ COPYING.ASL20 pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = lognorm.pc ACLOCAL_AMFLAGS = -I m4 if ENABLE_TESTBENCH SUBDIRS += tests endif liblognorm-2.1.0/NEWS000066400000000000000000000000771520037563000143620ustar00rootroot00000000000000This file has been superseeded by ChangeLog. Please see there. liblognorm-2.1.0/README000066400000000000000000000037571520037563000145530ustar00rootroot00000000000000Liblognorm is a fast-samples based normalization library. More information on liblognorm can be found at http://www.liblognorm.com Liblognorm evolves since several years and was initially meant to be used primarily with the Mitre CEE effort. Consequently, the initial version of liblognorm (0.x) uses the libee CEE support library in its API. As time evolved, the initial CEE schema underwent considerable change. Even worse, Mitre lost funding for CEE. While the CEE ideas survived as part of Red Hat-driven "Project Lumberjack", the data structures became greatly simplified and JSON based. That effectively made libee obsolete (and also in parts libestr, which was specifically written to support CEE's initial requirement of embedded NUL chars in strings). In 2013, Pavel Levshin converted liblognorm to native JSON, which helped improve performance and simplicity for many client applications. Unfortunately, this change broke interface compatibility (and there was no way to avoid that, obviously...). In 2015, most parts of liblognorm were redesigned and rewritten as part of Rainer Gerhards' master thesis. For full technical details of how liblognorm operates, and why it is so fast, please have a look at https://www.researchgate.net/publication/310545144_Efficient_Normalization_of_IT_Log_Messages_under_Realtime_Conditions The current library is the result of that effort. Application developers are encouraged to switch to this version, as it provides the benefit of a simpler API. This version is now being tracked by the git default branch. However, if you need to stick to the old API, there is a git branch liblognorm0, which contains the previous version of the library. This branch is also maintained for important bug fixes, so it is safe to use. We recommend that packagers create packages both for liblognorm0 and liblognorm1. Note that liblognorm's development packages cannot coexist on the same system as the PKGCONFIG system would get into trouble. Adiscon's own packages follow this schema. liblognorm-2.1.0/ai/000077500000000000000000000000001520037563000142505ustar00rootroot00000000000000liblognorm-2.1.0/ai/README.md000066400000000000000000000027701520037563000155350ustar00rootroot00000000000000# AI Workflow Helpers This directory contains workflow helpers for developers and AI agents. These files are not part of the liblognorm runtime or testbench itself. They exist to make local validation more consistent and less error-prone. ## Current Helpers ### `run-parser-family.sh` Run the full shell-test family for one parser name. Examples: ```bash ai/run-parser-family.sh checkpoint-lea ai/run-parser-family.sh json ai/run-parser-family.sh field_cef.sh ``` What it does: 1. normalizes the parser name to the `field_*` shell-test prefix 2. rebuilds `src/ln_test` 3. ensures `tests/json_eq` is available 4. runs every matching parser-family shell test in `tests/` This is intended for parser work where a single direct repro is not sufficient validation. ## Validation Ladder Use the smallest validation step that is appropriate, but do not stop too early for parser changes. 1. direct repro Useful while debugging a single failing sample or parser edge case. 2. parser-family helper Required for parser behavior changes when a matching family exists: `ai/run-parser-family.sh ` 3. broader test run Use `make check` or a larger targeted subset when the change touches shared parser plumbing, shell harness logic, or build/test infrastructure. ## Notes - Wait for `make -C src ln_test` to finish before running tests. - Parser families often include `*_jsoncnf.sh`, `*_v1.sh`, and terminator or edge-case variants. The helper is meant to catch those automatically. liblognorm-2.1.0/ai/run-parser-family.sh000077500000000000000000000032201520037563000201610ustar00rootroot00000000000000#!/bin/bash # added 2026-03-25 by Codex # This file is part of the liblognorm project, released under ASL 2.0 # # Helper for developers and AI agents validating parser changes. # # This is intentionally kept outside of ./tests because it is not part of the # project testbench itself. It is workflow tooling that enforces the local # validation policy documented in AGENTS.md: # - rebuild src/ln_test before running parser tests # - build tests/json_eq when JSON comparison helpers are needed # - run the whole parser-family test set for a parser name, including # *_jsoncnf.sh, *_v1.sh, and edge-case variants when present # # Usage: # ai/run-parser-family.sh checkpoint-lea # ai/run-parser-family.sh field_checkpoint-lea.sh # # The parser name is normalized to the shell test prefix field_* and # every matching shell test in ./tests is executed sequentially. set -euo pipefail usage() { printf 'Usage: %s \n' "$0" >&2 printf 'Example: %s checkpoint-lea\n' "$0" >&2 exit 1 } [ $# -eq 1 ] || usage parser_name="$1" parser_name="${parser_name#field_}" parser_name="${parser_name%.sh}" script_dir="$(cd "$(dirname "$0")" && pwd)" repo_root="$(cd "${script_dir}/.." && pwd)" tests_dir="${repo_root}/tests" mapfile -t tests < <( cd "${tests_dir}" printf '%s\n' field_"${parser_name}"*.sh | sort ) if [ "${#tests[@]}" -eq 0 ] || [ "${tests[0]}" = "field_${parser_name}*.sh" ]; then printf 'No parser-family tests found for %s\n' "${parser_name}" >&2 exit 1 fi make -C "${repo_root}/src" ln_test make -C "${tests_dir}" json_eq for test_name in "${tests[@]}"; do ( cd "${tests_dir}" srcdir=. top_builddir=.. bash "./${test_name}" ) done liblognorm-2.1.0/compat/000077500000000000000000000000001520037563000151425ustar00rootroot00000000000000liblognorm-2.1.0/compat/Makefile.am000066400000000000000000000003341520037563000171760ustar00rootroot00000000000000noinst_LTLIBRARIES = compat.la compat_la_SOURCES = strndup.c asprintf.c compat_la_CPPFLAGS = -I$(top_srcdir) $(PTHREADS_CFLAGS) $(RSRT_CFLAGS) compat_la_LDFLAGS = -module -avoid-version compat_la_LIBADD = $(IMUDP_LIBS) liblognorm-2.1.0/compat/asprintf.c000066400000000000000000000025071520037563000171400ustar00rootroot00000000000000/* compatibility file for systems without asprintf. * * Copyright 2015 Rainer Gerhards and Adiscon * * This file is part of rsyslog. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * -or- * see COPYING.ASL20 in the source distribution * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #ifndef HAVE_ASPRINTF #include #include #include int asprintf(char **strp, const char *fmt, ...) { va_list ap; int len; va_start(ap, fmt); len = vsnprintf(NULL, 0, fmt, ap); va_end(ap); *strp = malloc(len+1); if (!*strp) { return -1; } va_start(ap, fmt); vsnprintf(*strp, len+1, fmt, ap); va_end(ap); (*strp)[len] = 0; return len; } #else /* XLC needs at least one method in source file even static to compile */ #ifdef __xlc__ static void dummy() {} #endif #endif /* #ifndef HAVE_ASPRINTF */ liblognorm-2.1.0/compat/strndup.c000066400000000000000000000024241520037563000170070ustar00rootroot00000000000000/* compatibility file for systems without strndup. * * Copyright 2015 Rainer Gerhards and Adiscon * * This file is part of liblognorm. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * -or- * see COPYING.ASL20 in the source distribution * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #ifndef HAVE_STRNDUP #include #include char * strndup(const char *s, size_t n) { const size_t len = strlen(s); if(len <= n) return strdup(s); char *const new_s = malloc(n+1); if(new_s == NULL) return NULL; memcpy(new_s, s, n); new_s[n] = '\0'; return new_s; } #else /* #ifndef HAVE_STRNDUP */ /* Solaris must have at least one symbol inside a file, so we provide * it here ;-) */ void dummy_dummy_required_for_solaris_do_not_use(void) { } #endif /* #ifndef HAVE_STRNDUP */ liblognorm-2.1.0/compatibility-v2000066400000000000000000000032701520037563000170020ustar00rootroot00000000000000- the lognormalizer tool now defaults to json output format - API changes * ln_loadSample() does no longer exist loading of samples at runtime is no longer possible. This is due to the fact that the parse DAG is compiled into the most efficient form and thereafter read-only. If urgently needed, we could change the code so that the unoptimized form is kept (memory intense) and a re-compile happens after each addition (time intense). - the plain old iptables parser is no longer supported. Sorry for that, but keeping it would have caused violation of layers. It can be replaced by the new one - the rulebase format for v2 has changed to support the enhancements It is mostly compatible with v1, which means it understands mosts of it's constructs. However, some are NOT understood: - the "tokenized" parser is no longer supported. Use the more capable "repeat" parser instead. - the "recursive" and "descent" parsers are no longer supported. Use user-defined data types instead. - the suffixed parser and friends are no longer supported. A replacement is currently being developed. - the regexp parser is no longer supported and needs to be replaced by other liblognorm features for details on this decision see: https://github.com/rsyslog/liblognorm/issues/143 To signify that a rule base file contains v2 format, it must contain the line version=2 as the very first line of the file, in exactly this format (no comments, no whitespace, nothing else in between the words or after them). If that line is missing or not given **exactly** as above, the old v1 engine is used, with all of its restrictions. If it is present, the v2 engine is used. liblognorm-2.1.0/configure.ac000066400000000000000000000163431520037563000161540ustar00rootroot00000000000000 -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.61) AC_INIT([liblognorm], [2.1.0], [rgerhards@adiscon.com]) AM_INIT_AUTOMAKE m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_CONFIG_SRCDIR([src/lognorm.c]) AC_CONFIG_HEADER([config.h]) AC_USE_SYSTEM_EXTENSIONS # Checks for programs. AC_PROG_CC AM_PROG_CC_C_O AC_PROG_CC_C99 AC_PROG_LIBTOOL m4_ifdef([AX_IS_RELEASE], [ AX_IS_RELEASE([git-directory]) m4_ifdef([AX_COMPILER_FLAGS], [ AX_COMPILER_FLAGS() ], [ if test "$GCC" = "yes" then CFLAGS="$CFLAGS -W -Wall -Wformat-security -Wshadow -Wcast-align -Wpointer-arith -Wmissing-format-attribute -g" fi AC_MSG_WARN([missing AX_COMPILER_FLAGS macro, not using it]) ]) ], [ if test "$GCC" = "yes" then CFLAGS="$CFLAGS -W -Wall -Wformat-security -Wshadow -Wcast-align -Wpointer-arith -Wmissing-format-attribute -g" fi AC_MSG_WARN([missing AX_IS_RELEASE macro, not using AX_COMPILER_FLAGS macro because of this]) ]) # Checks for libraries. save_LIBS=$LIBS LIBS= AC_SEARCH_LIBS(clock_getm4_defn([AC_AUTOCONF_VERSION]), [2.68]time, rt) LIBS=$save_LIBS # Checks for header files. AC_HEADER_STDC #AC_CHECK_HEADERS([]) # Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_TYPE_SIZE_T #AC_HEADER_TIME #AC_STRUCT_TM # Checks for library functions. AC_FUNC_SELECT_ARGTYPES AC_TYPE_SIGNAL AC_FUNC_STRERROR_R AC_CHECK_FUNCS([asprintf strdup strndup strtok_r]) LIBLOGNORM_CFLAGS="-I\$(top_srcdir)/src" LIBLOGNORM_LIBS="\$(top_builddir)/src/liblognorm.la" AC_SUBST(LIBLOGNORM_CFLAGS) AC_SUBST(LIBLOGNORM_LIBS) # modules we require PKG_CHECK_MODULES(LIBESTR, libestr >= 0.0.0) PKG_CHECK_MODULES(JSON_C, libfastjson,, ) # add libestr flags to pkgconfig file for static linking AC_SUBST(pkg_config_libs_private, $LIBESTR_LIBS) # Regular expressions AC_ARG_ENABLE(regexp, [AS_HELP_STRING([--enable-regexp],[Enable regular expressions support @<:@default=no@:>@])], [case "${enableval}" in yes) enable_regexp="yes" ;; no) enable_regexp="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-regexp) ;; esac], [enable_regexp="no"] ) AM_CONDITIONAL(ENABLE_REGEXP, test x$enable_regexp = xyes) if test "$enable_regexp" = "yes"; then PKG_CHECK_MODULES(PCRE, [libpcre2-8 >= 10.00]) AC_DEFINE(FEATURE_REGEXP, 1, [Regular expressions support enabled.]) FEATURE_REGEXP=1 else FEATURE_REGEXP=0 fi AC_SUBST(FEATURE_REGEXP) # debug mode settings AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[Enable debug mode @<:@default=no@:>@])], [case "${enableval}" in yes) enable_debug="yes" ;; no) enable_debug="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; esac], [enable_debug="no"] ) if test "$enable_debug" = "yes"; then AC_DEFINE(DEBUG, 1, [Defined if debug mode is enabled.]) fi if test "$enable_debug" = "no"; then AC_DEFINE(NDEBUG, 1, [Defined if debug mode is disabled.]) fi # advanced statistics AC_ARG_ENABLE(advanced-stats, [AS_HELP_STRING([--enable-advanced-stats],[Enable advanced statistics @<:@default=no@:>@])], [case "${enableval}" in yes) enable_advstats="yes" ;; no) enable_advstats="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-advstats) ;; esac], [enable_advstats="no"] ) if test "$enable_advstats" = "yes"; then AC_DEFINE(ADVANCED_STATS, 1, [Defined if advanced statistics are enabled.]) fi # docs (html) build settings AC_ARG_ENABLE(docs, [AS_HELP_STRING([--enable-docs],[Enable building HTML docs (requires Sphinx) @<:@default=no@:>@])], [case "${enableval}" in yes) enable_docs="yes" ;; no) enable_docs="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-docs) ;; esac], [enable_docs="no"] ) if test "$enable_docs" = "yes"; then AC_CHECK_PROGS([SPHINXBUILD], [sphinx-build sphinx-build3 sphinx-build2], [no]) if test "$SPHINXBUILD" = "no"; then AC_MSG_ERROR([sphinx-build is required to build documentation, install it or try --disable-docs]) fi fi AM_CONDITIONAL([ENABLE_DOCS], [test "$enable_docs" = "yes"]) AC_ARG_ENABLE(testbench, [AS_HELP_STRING([--enable-testbench],[testbench enabled @<:@default=yes@:>@])], [case "${enableval}" in yes) enable_testbench="yes" ;; no) enable_testbench="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-testbench) ;; esac], [enable_testbench=yes] ) AM_CONDITIONAL(ENABLE_TESTBENCH, test x$enable_testbench = xyes) AC_ARG_ENABLE(valgrind, [AS_HELP_STRING([--enable-valgrind],[valgrind enabled @<:@default=no@:>@])], [case "${enableval}" in yes) enable_valgrind="yes" ;; no) enable_valgrind=="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-valgrind) ;; esac], [enable_valgrind=no] ) AM_CONDITIONAL(ENABLE_VALGRIND, test x$enable_valgrind = xyes) VALGRIND="$enable_valgrind" AC_SUBST(VALGRIND) AC_ARG_ENABLE(tools, [AS_HELP_STRING([--enable-tools],[lognorm toolset enabled @<:@default=yes@:>@])], [case "${enableval}" in yes) enable_tools="yes" ;; no) enable_tools="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-tools) ;; esac], [enable_tools=yes] ) AM_CONDITIONAL(ENABLE_TOOLS, test x$enable_tools = xyes) # TurboVM bytecode engine with SIMD acceleration AC_ARG_ENABLE(turbo, [AS_HELP_STRING([--enable-turbo],[Enable TurboVM bytecode engine (SIMD-accelerated) @<:@default=no@:>@])], [case "${enableval}" in yes) enable_turbo="yes" ;; no) enable_turbo="no" ;; *) AC_MSG_ERROR(bad value ${enableval} for --enable-turbo) ;; esac], [enable_turbo=no] ) if test "x$enable_turbo" = "xyes"; then AC_DEFINE(ENABLE_TURBO, 1, [Enable TurboVM bytecode engine.]) FEATURE_TURBO=1 case "$host_cpu" in x86_64|amd64) TURBO_CFLAGS="-msse4.2" AC_DEFINE([LN_SIMD_SSE42], [1], [x86-64 SSE4.2 available]) ;; aarch64|arm64) TURBO_CFLAGS="-march=armv8-a+simd" AC_DEFINE([LN_SIMD_NEON], [1], [ARM64 NEON available]) ;; *) TURBO_CFLAGS="" ;; esac AC_SUBST(TURBO_CFLAGS) else FEATURE_TURBO=0 fi AC_SUBST(FEATURE_TURBO) AM_CONDITIONAL(ENABLE_TURBO, test x$enable_turbo = xyes) AC_CONFIG_FILES([Makefile \ lognorm.pc \ compat/Makefile \ doc/Makefile \ src/Makefile \ src/lognorm-features.h \ tools/Makefile \ tests/Makefile \ tests/options.sh]) AC_OUTPUT AC_CONFIG_MACRO_DIR([m4]) echo "*****************************************************" echo "liblognorm will be compiled with the following settings:" echo echo "Regex enabled: $enable_regexp" echo "Advanced Statistics enabled: $enable_advstats" echo "Testbench enabled: $enable_testbench" echo "Valgrind enabled: $enable_valgrind" echo "Debug mode enabled: $enable_debug" echo "Tools enabled: $enable_tools" echo "Docs enabled: $enable_docs" echo "TurboVM enabled: $enable_turbo" liblognorm-2.1.0/devtools/000077500000000000000000000000001520037563000155165ustar00rootroot00000000000000liblognorm-2.1.0/devtools/ci/000077500000000000000000000000001520037563000161115ustar00rootroot00000000000000liblognorm-2.1.0/devtools/ci/Dockerfile.arm000066400000000000000000000006131520037563000206610ustar00rootroot00000000000000FROM ubuntu:24.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get install -y --no-install-recommends \ autoconf \ automake \ build-essential \ ca-certificates \ clang \ curl \ gdb \ libestr-dev \ libfastjson-dev \ libtool \ libtool-bin \ pkg-config \ && rm -rf /var/lib/apt/lists/* liblognorm-2.1.0/devtools/default_dev_container000066400000000000000000000000461520037563000217650ustar00rootroot00000000000000rsyslog/rsyslog_dev_base_ubuntu:20.04 liblognorm-2.1.0/devtools/devcontainer.sh000077500000000000000000000042751520037563000205460ustar00rootroot00000000000000#!/bin/bash # This scripts uses an rsyslog development container to execute given # command inside it. # Note: command line parameters are passed as parameters to the container, # with the notable exception that -ti, if given as first parameter, is # passed to "docker run" itself but NOT the container. # # use env var DOCKER_RUN_EXTRA_OPTS to provide extra options to docker run # command. # # # TO MODIFIY BEHAVIOUR, use # LIBLOGNORM_CONTAINER_UID, format uid:gid, # to change the users container is run under # set to "" to use the container default settings # (no local mapping) set -e if [ "$1" == "--rm" ]; then optrm="--rm" shift 1 fi if [ "$1" == "-ti" ]; then ti="-ti" shift 1 fi # check in case -ti was in front... if [ "$1" == "--rm" ]; then optrm="--rm" shift 1 fi if [ "$LIBLOGNORM_HOME" == "" ]; then export LIBLOGNORM_HOME=$(pwd) echo info: LIBLOGNORM_HOME not set, using $LIBLOGNORM_HOME fi if [ -z "$LIBLOGNORM_DEV_CONTAINER" ]; then LIBLOGNORM_DEV_CONTAINER=$(cat $LIBLOGNORM_HOME/devtools/default_dev_container) fi printf '/rsyslog is mapped to %s \n' "$LIBLOGNORM_HOME" printf 'using container %s\n' "$LIBLOGNORM_DEV_CONTAINER" printf 'pulling container...\n' printf 'user ids: %s:%s\n' $(id -u) $(id -g) printf 'container_uid: %s\n' ${LIBLOGNORM_CONTAINER_UID--u $(id -u):$(id -g)} printf 'container cmd: %s\n' $* printf '\nNote: we use the RSYSLOG CONTAINERS, as such project home is /rsyslog!\n\n' docker pull $LIBLOGNORM_DEV_CONTAINER docker run $ti $optrm $DOCKER_RUN_EXTRA_OPTS \ -e LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA \ -e LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE \ -e CC \ -e CFLAGS \ -e LDFLAGS \ -e ASAN_OPTIONS \ -e LSAN_OPTIONS \ -e TSAN_OPTIONS \ -e UBSAN_OPTIONS \ -e CI_MAKE_OPT \ -e CI_MAKE_CHECK_OPT \ -e CI_MAKE_CHECK_EXTRA \ -e CI_CHECK_CMD \ -e CI_BUILD_URL \ -e CI_CODECOV_TOKEN \ -e CI_VALGRIND_SUPPRESSIONS \ -e CI_SANITIZE_BLACKLIST \ -e ABORT_ALL_ON_TEST_FAIL \ -e USE_AUTO_DEBUG \ -e LIBLOGNORM_STATSURL \ -e VCS_SLUG \ --cap-add SYS_ADMIN \ --cap-add SYS_PTRACE \ ${LIBLOGNORM_CONTAINER_UID--u $(id -u):$(id -g)} \ $DOCKER_RUN_EXTRA_FLAGS \ -v "$LIBLOGNORM_HOME":/rsyslog $LIBLOGNORM_DEV_CONTAINER $* liblognorm-2.1.0/devtools/gather-check-logs.sh000077500000000000000000000036001520037563000213430ustar00rootroot00000000000000#!/bin/bash # gather logs generated by "make [dist]check" # this also limits log size so that buildbot does not abort # Copyright (C) 2020 by Rainer Gerhards, released under ASL 2.0 show_log() { if grep -q ":test-result: FAIL" "$1"; then printf "\nFAIL: ${1%%.trs} \ ########################################################\ ################################\n\n" logfile="${1%%trs}log" if [ -f "$logfile" ]; then lines="$(wc -l < $logfile)" if (( lines > 4000 )); then ls -l $logfile printf 'file is very large (%d lines), showing parts\n' $lines head -n 2000 < "$logfile" printf '\n\n... snip ...\n\n' tail -n 2000 < "$logfile" else cat "$logfile" fi else printf 'log FILE MISSING!\n' fi fi } append_summary() { echo file: $1 # emit file name just in case we have multiple! head -n12 "$1" } # find logs from tests which are potentially aborted. The main indication is # that no matching .trs file exists check_incomplete_logs() { if grep -q "\.dep_wrk\|rstb_\|config.log" <<<"$1"; then return fi # we emit info only for test log files - this means there must # be a matching .sh file by our conventions if [ -f "${1%%log}sh" ]; then trsfile="${1%%log}trs" if [ ! -f "$trsfile" ]; then printf '\n\nNo matching .trs file for %s\n' "$1" ls -l ${1%%.log}* cat "$1" fi fi } export -f show_log export -f append_summary export -f check_incomplete_logs ############################## MAIN ENTRY POINT ############################## printf 'find failing tests\n' rm -f failed-tests.log find . -name "*.trs" -exec bash -c 'show_log "$1" >> failed-tests.log' _ {} \; find . -name "*.log" -exec bash -c 'check_incomplete_logs "$1" >> failed-tests.log' _ {} \; if [ -f failed-tests.log ]; then # show summary stats so that we know how many failed find . -name test-suite.log -exec bash -c 'append_summary "$1" >>failed-tests.log' _ {} \; fi liblognorm-2.1.0/devtools/run-build.sh000066400000000000000000000015001520037563000177470ustar00rootroot00000000000000#!/bin/bash # script for build-only CI runs via container printf 'running build with\n' printf 'container: %s\n' "$LIBLOGNORM_DEV_CONTAINER" printf 'CC:\t%s\n' "$CC" printf 'CFLAGS:\t%s\n' "$CFLAGS" printf 'LDFLAGS:\t%s\n' "$LDFLAGS" printf 'working directory: %s\n' "$(pwd)" printf 'user ids: %s:%s\n' "$(id -u)" "$(id -g)" set -e if [ -n "$LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE" ]; then CONFIGURE_OPTS="$LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE" else CONFIGURE_OPTS="$LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA" fi printf 'CONFIGURE_OPTS:\t%s\n' "$CONFIGURE_OPTS" printf 'STEP: autoreconf / configure ===============================================\n' autoreconf -fvi # shellcheck disable=SC2086 ./configure $CONFIGURE_OPTS printf 'STEP: make =================================================================\n' make ${CI_MAKE_OPT:-} liblognorm-2.1.0/devtools/run-ci.sh000077500000000000000000000043721520037563000172600ustar00rootroot00000000000000#!/bin/bash # script for generic CI runs via container printf 'running CI with\n' printf 'container: %s\n' "$LIBLOGNORM_DEV_CONTAINER" printf 'CC:\t%s\n' "$CC" printf 'CFLAGS:\t%s\n' "$CFLAGS" printf 'LDFLAGS:\t%s\n' "$LDFLAGS" printf 'CI_MAKE_CHECK_EXTRA:\t%s\n' "$CI_MAKE_CHECK_EXTRA" printf 'working directory: %s\n' "$(pwd)" printf 'user ids: %s:%s\n' "$(id -u)" "$(id -g)" if [ "$SUDO" != "" ]; then printf 'check sudo' $SUDO echo sudo works! fi if [ "$CI_VALGRIND_SUPPRESSIONS" != "" ]; then export RS_TESTBENCH_VALGRIND_EXTRA_OPTS="--suppressions=$(pwd)/tests/CI/$CI_VALGRIND_SUPPRESSIONS" fi if [ "$CI_SANITIZE_BLACKLIST" != "" ]; then export CFLAGS="$CFLAGS -fsanitize-blacklist=$(pwd)/$CI_SANITIZE_BLACKLIST" printf 'CFLAGS changed to: %s\n', "$CFLAGS" fi if [ -n "$LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE" ]; then CONFIGURE_OPTS="$LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE" else CONFIGURE_OPTS="$LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA" fi printf 'CONFIGURE_OPTS:\t%s\n' "$CONFIGURE_OPTS" set -e printf 'STEP: autoreconf / configure ===============================================\n' autoreconf -fvi # shellcheck disable=SC2086 ./configure $CONFIGURE_OPTS if [ "$CI_CHECK_CMD" != "distcheck" ]; then printf 'STEP: make =================================================================\n' make $CI_MAKE_OPT fi printf 'STEP: make %s ==============================================================\n', \ "$CI_CHECK_CMD" set +e echo CI_CHECK_CMD: $CI_CHECK_CMD # shellcheck disable=SC2086 make $CI_MAKE_CHECK_OPT ${CI_MAKE_CHECK_EXTRA:-} ${CI_CHECK_CMD:-check} rc=$? printf 'STEP: find failing tests ====================================================\n' echo calling gather-check-logs devtools/gather-check-logs.sh printf 'STEP: Codecov upload =======================================================\n' if [ "$CI_CODECOV_TOKEN" != "" ]; then curl -s https://codecov.io/bash >codecov.sh chmod +x codecov.sh ./codecov.sh -t "$CI_CODECOV_TOKEN" -n 'rsyslog buildbot PR' &> codecov_log rm codecov.sh lines="$(wc -l < codecov_log)" if (( lines > 3000 )); then printf 'codecov log file is very large (%d lines), showing parts\n' $lines head -n 1500 < codecov_log printf '\n\n... snip ...\n\n' tail -n 1500 < codecov_log else cat codecov_log fi rm codecov_log fi exit $rc liblognorm-2.1.0/devtools/run-rsyslog-integration.sh000077500000000000000000000045741520037563000227140ustar00rootroot00000000000000#!/bin/bash set -e # Build liblognorm from the current checkout, install it into a temporary # prefix, then build rsyslog against that exact library and run only the # normalize-related downstream tests. This keeps the job focused on the # liblognorm integration surface instead of the full rsyslog matrix. LIBLOGNORM_SRC=/rsyslog WORKDIR=$(mktemp -d) LIBLOGNORM_PREFIX="$WORKDIR/liblognorm-install" RSYSLOG_SRC="$WORKDIR/rsyslog" RSYSLOG_REF="${RSYSLOG_INTEGRATION_REF:-main}" trap 'rm -rf "$WORKDIR"' EXIT # Keep this focused on liblognorm-backed normalize modules. We skip the # valgrind wrappers, regex-only tests, faketime tests, and mmdblookup. RSYSLOG_TESTS=( 'mmnormalize_rule_from_string.sh' 'mmnormalize_rule_from_array.sh' 'mmnormalize_parsesuccess.sh' 'mmnormalize_variable.sh' 'mmnormalize_tokenized.sh' 'pmnormalize-basic.sh' 'pmnormalize-invld-rulebase.sh' 'pmnormalize-rule.sh' 'pmnormalize-rule_and_rulebase.sh' 'pmnormalize-neither_rule_rulebase.sh' 'pmnormalize-rule_invld-data.sh' ) dump_rsyslog_logs() { if [ ! -d "$RSYSLOG_SRC/tests" ]; then return fi find "$RSYSLOG_SRC/tests" -maxdepth 1 -name '*.log' -print | while read -r logfile; do printf '\n===== %s =====\n' "$logfile" tail -n 200 "$logfile" || true done if [ -f "$RSYSLOG_SRC/tests/test-suite.log" ]; then printf '\n===== %s =====\n' "$RSYSLOG_SRC/tests/test-suite.log" tail -n 200 "$RSYSLOG_SRC/tests/test-suite.log" || true fi } printf 'building liblognorm from %s\n' "$LIBLOGNORM_SRC" printf 'using rsyslog ref %s\n' "$RSYSLOG_REF" printf 'working directory %s\n' "$WORKDIR" cd "$LIBLOGNORM_SRC" autoreconf -fvi ./configure --prefix="$LIBLOGNORM_PREFIX" make make install export PKG_CONFIG_PATH="$LIBLOGNORM_PREFIX/lib/pkgconfig:$LIBLOGNORM_PREFIX/lib64/pkgconfig:${PKG_CONFIG_PATH:-}" export LD_LIBRARY_PATH="$LIBLOGNORM_PREFIX/lib:$LIBLOGNORM_PREFIX/lib64:${LD_LIBRARY_PATH:-}" export ABORT_ALL_ON_TEST_FAIL=YES git clone --depth 1 --branch "$RSYSLOG_REF" https://github.com/rsyslog/rsyslog.git "$RSYSLOG_SRC" cd "$RSYSLOG_SRC" RSYSLOG_CONFIGURE_ARGS=( --enable-testbench --enable-mmnormalize --enable-pmnormalize --enable-imptcp --disable-default-tests --disable-libfaketime --without-valgrind-testbench ) ./autogen.sh "${RSYSLOG_CONFIGURE_ARGS[@]}" set +e make -j1 check TESTSUITEFLAGS=--stop TESTS="${RSYSLOG_TESTS[*]}" rc=$? set -e if [ "$rc" -ne 0 ]; then dump_rsyslog_logs fi exit "$rc" liblognorm-2.1.0/devtools/run-static-analyzer.sh000077500000000000000000000015341520037563000217740ustar00rootroot00000000000000#!/bin/bash set -e cd /rsyslog echo "SCAN_BUILD_CC: $SCAN_BUILD_CC" echo "SCAN_BUILD: $SCAN_BUILD" if [ -n "$SCAN_BUILD_REPORT_DIR" ]; then export CURR_REPORT="$(date +%y-%m-%d_%H-%M-%S)" export REPORT_DIR="$SCAN_BUILD_REPORT_DIR/$CURR_REPORT" fi autoreconf -fvi export CC="${SCAN_BUILD_CC:-clang}" ./configure \ ${LIBLOGNORM_CONFIGURE_OPTIONS_OVERRIDE:-} \ ${LIBLOGNORM_CONFIGURE_OPTIONS_EXTRA:-} set +e if [ -n "$REPORT_DIR" ]; then "${SCAN_BUILD:-scan-build}" -o "$REPORT_DIR" --use-cc "$CC" --status-bugs make else "${SCAN_BUILD:-scan-build}" --use-cc "$CC" --status-bugs make fi RESULT=$? set -e if [ "$RESULT" -eq 1 ]; then echo "scan-build failed" if [ -n "$SCAN_BUILD_REPORT_DIR" ]; then echo "scan-build report URL: ${SCAN_BUILD_REPORT_BASEURL}${CURR_REPORT}" > report_url fi fi echo "static analyzer result: $RESULT" exit "$RESULT" liblognorm-2.1.0/doc/000077500000000000000000000000001520037563000144245ustar00rootroot00000000000000liblognorm-2.1.0/doc/.gitignore000066400000000000000000000000341520037563000164110ustar00rootroot00000000000000_build Makefile.in Makefile liblognorm-2.1.0/doc/Makefile.am000066400000000000000000000026121520037563000164610ustar00rootroot00000000000000EXTRA_DIST = _static _templates conf.py \ index.rst introduction.rst installation.rst \ configuration.rst sample_rulebase.rst internals.rst \ contacts.rst changes.rst libraryapi.rst \ lognormalizer.rst license.rst graph.png htmldir = $(docdir) built_html = _build/html #html_DATA = $(built_html)/index.html # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = -n -W -c $(srcdir) #SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(SPHINXOPTS) $(srcdir) .PHONY: clean-local html-local man-local all-local dist-hook install-data-hook dist-hook: find $(distdir)/ -name .gitignore | xargs rm -f clean-local: -rm -rf $(BUILDDIR)/* html-local: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." man-local: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." all-local: html-local install-data-hook: find $(built_html) -type f -printf "%P\n" | \ while read file; do \ echo " $(INSTALL_DATA) -D $(built_html)/$$file '$(DESTDIR)$(htmldir)/$$file'"; \ $(INSTALL_DATA) -D $(built_html)/$$file "$(DESTDIR)$(htmldir)/$$file" || exit $$?; \ done uninstall-local: -rm -rf "$(DESTDIR)$(htmldir)" liblognorm-2.1.0/doc/Makefile.sphinx000066400000000000000000000127341520037563000174030ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Testprojectlpk.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Testprojectlpk.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Testprojectlpk" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Testprojectlpk" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." liblognorm-2.1.0/doc/_static/000077500000000000000000000000001520037563000160525ustar00rootroot00000000000000liblognorm-2.1.0/doc/_static/.gitignore000066400000000000000000000000001520037563000200300ustar00rootroot00000000000000liblognorm-2.1.0/doc/_templates/000077500000000000000000000000001520037563000165615ustar00rootroot00000000000000liblognorm-2.1.0/doc/_templates/.gitignore000066400000000000000000000000001520037563000205370ustar00rootroot00000000000000liblognorm-2.1.0/doc/ai_architecture_map.md000066400000000000000000000040001520037563000207300ustar00rootroot00000000000000# liblognorm Architecture Map This document maps high-level concepts to specific files and functions in the codebase. Use this to navigate the source code efficiently. ## Core Components | Concept | Description | Key Files | | :--- | :--- | :--- | | **Context** | The main library context, holding global state and configuration. | `src/liblognorm.c` (`ln_initCtx`), `src/liblognorm.h` (`struct ln_ctx_s`) | | **PDAG** | **Parse DAG**. The core data structure for the rulebase. A directed acyclic graph where edges are "motifs" (parsers). | `src/pdag.c`, `src/pdag.h` | | **Parser / Motif** | A specific matcher (e.g., "word", "number", "literal"). Edges in the PDAG. | `src/parser.c` (implementations), `src/parser.h` (interface) | | **Sample** | A loaded rule or sample line. | `src/samp.c`, `src/samp.h` | | **Annotation** | Metadata attached to parsed fields. | `src/annot.c`, `src/annot.h` | ## Data Flow 1. **Initialization**: User calls `ln_initCtx()`. 2. **Loading**: User calls `ln_loadSamples()`. * `src/samp.c` parses the rule file. * `src/pdag.c` builds the PDAG from the samples. 3. **Normalization**: User calls `ln_normalize()`. * `src/lognorm.c` is the entry point. * It delegates to `ln_pdagParse()` in `src/pdag.c`. * `ln_pdagParse()` traverses the graph, calling `ln_v2_parse*` functions in `src/parser.c`. ## Critical Data Structures ### `ln_ctx` (`src/liblognorm.h`) The "world" object. Contains: - `pdag`: The root of the rulebase. - `debugCB`: Callback for debug logging. ### `ln_pdag` (`src/pdag.h`) Represents the rulebase graph. ### `ln_parser` (`src/parser.h`) Represents a single node/edge type in the graph. ## Testing - **Tests Directory**: `tests/` - **Running Tests**: `make check` - **New Tests**: Add a new `.sh` file in `tests/` and add it to `TESTS` in `tests/Makefile.am`. ## Documentation - **User Guide**: `doc/configuration.rst` (Rule syntax) - **Internals**: `doc/internals.rst` (General concepts), `doc/pdag_implementation_model.rst` (Deep dive into the engine) liblognorm-2.1.0/doc/changes.rst000066400000000000000000000001311520037563000165610ustar00rootroot00000000000000ChangeLog ========= See below for a list of changes. .. literalinclude:: ../ChangeLog liblognorm-2.1.0/doc/conf.py000066400000000000000000000172261520037563000157330ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Liblognorm documentation build configuration file, created by # sphinx-quickstart on Mon Dec 16 13:12:44 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # General information about the project. project = u'Liblognorm' # pylint: disable=W0141 copyright = u'Adiscon' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '2.1' # The full version, including alpha/beta/rc tags. release = '2.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'haiku' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None html_title = "A fast log normalization library" # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None html_short_title = project + " " + release + " documentation" # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. html_use_index = False # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. html_show_copyright = False # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'Liblognormdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'Liblognorm.tex', u'Liblognorm Documentation', u'Pavel Levshin', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'liblognorm', u'Liblognorm Documentation', [u'Pavel Levshin'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'Liblognorm', u'Liblognorm Documentation', u'Pavel Levshin', 'Liblognorm', 'Fast log normalization library.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' liblognorm-2.1.0/doc/configuration.rst000066400000000000000000001323231520037563000200310ustar00rootroot00000000000000How to configure ================ To use liblognorm, you need 3 things. 1. An installed and working copy of liblognorm. The installation process has been discussed in the chapter :doc:`installation`. 2. Log files. 3. A rulebase, which is heart of liblognorm configuration. Log files --------- A log file is a text file, which typically holds many lines. Each line is a log message. These are usually a bit strange to read, thus to analyze. This mostly happens, if you have a lot of different devices, that are all creating log messages in a different format. Rulebase -------- The rulebase holds all the schemes for your logs. It basically consists of many lines that reflect the structure of your log messages. When the normalization process is started, a parse-tree will be generated from the rulebase and put into the memory. This will then be used to parse the log messages. Each line in rulebase file is evaluated separately. Rulebase Versions ----------------- This documentation is for liblognorm version 2 and above. Version 2 is a complete rewrite of liblognorm which offers many enhanced features but is incompatible to some pre-v2 rulebase commands. For details, see compatibility document. Note that liblognorm v2 contains a full copy of the v1 engine. As such it is fully compatible to old rulebases. In order to use the new v2 engine, you need to explicitly opt in. To do so, you need to add the line:: version=2 to the top of your rulebase file. Currently, it is very important that * the line is given exactly as above * no whitespace within the sequence is permitted (e.g. "version = 2" is invalid) * no whitespace or comment after the "2" is permitted (e.g. "version=2 # comment") is invalid * this line **must** be the **very** first line of the file; this also means there **must** not be any comment or empty lines in front of it Only if the version indicator is properly detected, the v2 engine is used. Otherwise, the v1 engine is used. So if you use v2 features but got the version line wrong, you'll end up with error messages from the v1 engine. The v2 engine understands almost all v1 parsers, and most importantly all that are typically used. It does not understand these parsers: * tokenized * recursive * descent * regex * interpret * suffixed * named_suffixed The recursive and descent parsers should be replaced by user-defined types in. The tokenized parsers should be replaced by repeat. The interpret functionality is provided via the parser's "format" parameters. For the others, currently there exists no replacement, but will the exception of regex, will be added based on demand. If you think regex support is urgently needed, please read our `related issue on github, `_ where you can also cast you ballot in favor of it. If you need any of these parsers, you need to use the v1 engine. That of course means you cannot use the v2 enhancements, so converting as much as possible makes sense. Commentaries ------------ To keep your rulebase tidy, you can use commentaries. Start a commentary with "#" like in many other configurations. It should look like this:: # The following prefix and rules are for firewall logs Note that the comment character MUST be in the first column of the line. Empty lines are just skipped, they can be inserted for readability. User-Defined Types ------------------ If the line starts with ``type=``, then it contains a user-defined type. You can use a user-defined type wherever you use a built-in type; they are equivalent. That also means you can use user-defined types in the definition of other user-defined types (they can be used recursively). The only restriction is that you must define a type **before** you can use it. This line has following format:: type=: Everything before the colon is treated as the type name. User-defined types must always start with "@". So "@mytype" is a valid name, whereas "mytype" is invalid and will lead to an error. After the colon, a match description should be given. It is exactly the same like the one given in rule lines (see below). A generic IP address type could look as follows:: type=@IPaddr:%ip:ipv4% type=@IPaddr:%ip:ipv6% This creates a type "@IPaddr", which consists of either an IPv4 or IPv6 address. Note how we use two different lines to create an alternative representation. This is how things generally work with types: you can use as many "type" lines for a single type as you need to define your object. Note that pure alternatives could also be defined via the "alternative" parser - which option to choose is left to the user. They are equivalent. The ability to use multiple type lines for definition, however, brings more power than just to define alternatives. Includes -------- Especially with user-defined types includes come handy. With an include, you can include definitions already made elsewhere into the current rule set (just like the "include" directive works in many programming languages). An include is done by a line starting with ``include=`` where the rest of the line is the actual file name, just like in this example:: include=/var/lib/liblognorm/stdtypes.rb The definition is included right at the position where it occurs. Processing of the original file is continued when the included file has been fully processed. Includes can be nested. To facilitate repositories of common rules, liblognorm honors the :: LIBLOGNORM_RULEBASES environment variable. If it is set liblognorm tries to locate the file inside the path pointed to by ``LIBLOGNORM_RULEBASES`` in the following case: * the provided file cannot be found * the provided file name is not an absolute path (does not start with "/") So assuming we have:: export LIBLOGNORM_RULEBASES=/var/lib/loblognorm The above example can be re-written as follows:: include=stdtypes.rb Note, however, that if ``stdtypes.rb`` exist in the current working directory, that file will be loaded insted of the one from ``/var/lib/liblognorm``. This use facilitates building a library of standard type definitions. Note the the liblognorm project also ships type definitions for common scenarios. Rules ----- If the line starts with ``rule=``, then it contains a rule. This line has following format:: rule=[[,...]]: Everything before a colon is treated as comma-separated list of tags, which will be attached to a match. After the colon, match description should be given. It consists of string literals and field selectors. String literals should match exactly, whereas field selectors may match variable parts of a message. A rule could look like this (in legacy format):: rule=:%date:date-rfc3164% %host:word% %tag:char-to:\x3a%: no longer listening on %ip:ipv4%#%port:number%' This excerpt is a common rule. A rule always contains several different "parts"/properties and reflects the structure of the message you want to normalize (e.g. Host, IP, Source, Syslogtag...). Literals -------- Literal is just a sequence of characters, which must match exactly. Percent sign characters must be escaped to prevent them from starting a field accidentally. Replace each "%" with "\\x25" or "%%", when it occurs in a string literal. Fields ------ There are different formats for field specification: * legacy format * condensed format * full json format Legacy Format ############# Legay format is exactly identical to the v1 engine. This permits you to use existing v1 rulebases without any modification with the v2 engine, except for adding the ``version=2`` header line to the top of the file. Remember: some v1 types are not supported - if you are among the few who use them, you need to do some manual conversion. For almost all users, manual conversion should not be necessary. Legacy format is not documented here. If you want to use it, see the v1 documentation. Condensed Format ################ The goal of this format is to be as brief as possible, permitting you an as-clear-as-possible view of your rule. It is very similar to legacy format and recommended to be used for simple types which do not need any parser parameters. Its structure is as follows:: %:{}% **field name** -> that name can be selected freely. It should be a description of what kind of information the field is holding, e.g. SRC is the field contains the source IP address of the message. These names should also be chosen carefully, since the field name can be used in every rule and therefore should fit for the same kind of information in different rules. Some special field names exist: * **dash** ("-"): this field is matched but not saved * **dot** ("."): this is useful if a parser returns a set of fields. Usually, it does so by creating a json subtree. If the field is named ".", then no subtree is created but instead the subfields are moved into the main hierarchy. * **two dots** (".."): similar to ".", but can be used at the lower level to denote that a field is to be included with the name given by the upper-level object. Note that ".." is only acted on if a subelement contains a single field. The reason is that if there were more, we could not assign all of them to the *single* name given by the upper-level-object. The prime use case for this special name is in user-defined types that parse only a single value. Without "..", they would always become a JSON subtree, which seems unnatural and is different from built-in types. So it is suggested to name such fields as "..", which means that the user can assign a name of his liking, just like in the case of built-in parsers. **field type** -> selects the accordant parser, which are described below. Special characters that need to be escaped when used inside a field description are "%" and ":". It is strongly recommended **not** to use them. **parameters** -> This is an optional set of parameters, given in pure JSON format. Parameters can be generic (e.g. "priority") or specific to a parser (e.g. "extradata"). Generic parameters are described below in their own section, parser-specific ones in the relevant type documentation. As an example, the "char-to" parser accepts a parameter named "extradata" which describes up to which character it shall match (the name "extradata" stems back to the legacy v1 system):: %tag:char-to{"extradata":":"}% Whitespace, including LF, is permitted inside a field definition after the opening percent sign and before the closing one. This can be used to make complex rules more readable. So the example rule from the overview section above could be rewritten as:: rule=:% date:date-rfc3164 % % host:word % % tag:char-to{"extradata":":"} %: no longer listening on % ip:ipv4 %#% port:number %' When doing this, note well that whitespace IS important inside the literal text. So e.g. in the second example line above "% %" we require a single SP as literal text. Note that any combination of your liking is valid, so it could also be written as:: rule=:%date:date-rfc3164% %host:word% % tag:char-to{"extradata":":"} %: no longer listening on % ip:ipv4 %#% port:number %' To prevent a typical user error, continuation lines are **not** permitted to start with ``rule=``. There are some obscure cases where this could be a valid rule, and it can be re-formatted in that case. Moreoften, this is the result of a missing percent sign, as in this sample:: rule=:test%field:word ... missing percent sign ... rule=:%f:word% If we would permit ``rule=`` at start of continuation line, these kinds of problems would be very hard to detect. Full JSON Format ################ This format is best for complex definitions or if there are many parser parameters. Its structure is as follows:: %JSON% Where JSON is the configuration expressed in JSON. To get you started, let's rewrite above sample in pure JSON form:: rule=:%[ {"type":"date-rfc3164", "name":"date"}, {"type":"literal", "text:" "}, {"type":"char-to", "name":"host", "extradata":":"}, {"type":"literal", "text:": no longer listening on "}, {"type":"ipv4", "name":"ip"}, {"type":"literal", "text:"#"}, {"type":"number", "name":"port"} ]% A couple of things to note: * we express everything in this example in a *single* parser definition * this is done by using a **JSON array**; whenever an array is used, multiple parsers can be specified. They are executed one after the other in given order. * literal text is matched here via explicit parser call; as specified below, this is recommended only for specific use cases with the current version of liblognorm * parser parameters (both generic and parser-specific ones) are given on the main JSON level * the literal text shall not be stored inside an output variable; for this reason no name attribute is given (we could also have used ``"name":"-"`` which achieves the same effect but is more verbose). With the literal parser calls replaced by actual literals, the sample looks like this:: rule=:%{"type":"date-rfc3164", "name":"date"} % % {"type":"char-to", "name":"host", "extradata":":"} % no longer listening on % {"type":"ipv4", "name":"ip"} %#% {"type":"number", "name":"port"} % Which format you use and how you exactly use it is up to you. Some guidelines: * using the "literal" parser in JSON should be avoided currently; the experimental version does have some rough edges where conflicts in literal processing will not be properly handled. This should not be an issue in "closed environments", like "repeat", where no such conflict can occur. * otherwise, JSON is perfect for very complex things (like nesting of parsers - it is **not** suggested to use any other format for these kinds of things. * if a field needs to be matched but the result of that match is not needed, omit the "name" attribute; specifically avoid using the more verbose ``"name":"-"``. * it is a good idea to start each definition with ``"type":"..."`` as this provides a good quick overview over what is being defined. Mandatory Parameters .................... type ~~~~ The field type, selects the parser to use. See "fields" below for description. Optional Generic Parameters ........................... name ~~~~ The field name to use. If "-" is used, the field is matched, but not stored. In this case, you can simply **not** specify a field name, which is the preferred way of doing this. priority ~~~~~~~~ The priority to assign to this parser. Priorities are numerical values in the range from 0 (highest) to 65535 (lowest). If multiple parsers could match at a given character position of a log line, parsers are tried in priority order. Different priorities can lead to different parsing. For example, if the greedy "rest" type is assigned priority 0, and no other parser is assigned the same priority, no other parser will ever match (because "rest" is very greedy and always matches the rest of the message). Note that liblognorm internally has a parser-specific priority, which is selected by the program developer based on the specificality of a type. If the user assigns equal priorities, parsers are executed based on the parser-specific priority. The default priority value is 30,000. Field types ----------- We have legacy and regular field types. Pre-v2, we did not have user-defined types. As such, there was a relatively large number of parsers that handled very similar cases, for example for strings. These parsers still work and may even provide best performance in extreme cases. In v2, we focus on fewer, but more generic parsers, which are then tailored via parameters. There is nothing bad about using legacy parsers and there is no plan to outphase them at any time in the future. We just wanted to let you know, especially if you wonder about some "wereid" parsers. In v1, parsers could have only a single parameter, which was called "extradata" at that time. This is why some of the legacy parsers require or support a parameter named "extradata" and do not use a better name for it (internally, the legacy format creates a v2 parser definition with "extradata" being populated from the legacy "extradata" part of the configuration). number ###### One or more decimal digits. Parameters .......... format ~~~~~~ Specifies the format of the json object. Possible values are "string" and "number", with string being the default. If "number" is used, the json object will be a native json integer. maxval ~~~~~~ Maximum value permitted for this number. If the value is higher than this, it will not be detected by this parser definition and an alternate detection path will be pursued. float ##### A floating-pt number represented in non-scientific form. Parameters .......... format ~~~~~~ Specifies the format of the json object. Possible values are "string" and "number", with string being the default. If "number" is used, the json object will be a native json floating point number. Note that we try to preserve the original string serialization format, but keep on your mind that floating point numbers are inherently imprecise, so slight variance may occur depending on processing them. hexnumber ######### A hexadecimal number as seen by this parser begins with the string "0x", is followed by 1 or more hex digits and is terminated by white space. Any interleaving non-hex digits will cause non-detection. The rules are strict to avoid false positives. Parameters .......... format ~~~~~~ Specifies the format of the json object. Possible values are "string" and "number", with string being the default. If "number" is used, the json object will be a native json integer. Note that json numbers are always decimal, so if "number" is selected, the hex number will be converted to decimal. The original hex string is no longer available in this case. maxval ~~~~~~ Maximum value permitted for this number. If the value is higher than this, it will not be detected by this parser definition and an alternate detection path will be pursued. This is most useful if fixed-size hex numbers need to be processed. For example, for byte values the "maxval" could be set to 255, which ensures that invalid values are not misdetected. kernel-timestamp ################ Parses a linux kernel timestamp, which has the format:: [ddddd.dddddd] where "d" is a decimal digit. The part before the period has to have at least 5 digits as per kernel code. There is no upper limit per se inside the kernel, but liblognorm does not accept more than 12 digits, which seems more than sufficient (we may reduce the max count if misdetections occur). The part after the period has to have exactly 6 digits. whitespace ########## This parses all whitespace until the first non-whitespace character is found. This check is performed using the ``isspace()`` C library function to check for space, horizontal tab, newline, vertical tab, feed and carriage return characters. This parser is primarily a tool to skip to the next "word" if the exact number of whitespace characters (and type of whitespace) is not known. The current parsing position MUST be on a whitespace, else the parser does not match. Remember that to just parse but not preserve the field contents, the dash ("-") is used as field name in compact format or the "name" parameter is simply omitted in JSON format. This is almost always expected with the *whitespace* type. string ###### This is a highly customizable parser that can be used to extract many types of strings. It is meant to be used for most cases. It is suggested that specific string types are created as user-defined types using this parser. This parser supports: * various quoting modes for strings * escape character processing Parameters .......... quoting.mode ~~~~~~~~~~~~ Specifies how the string is quoted. Possible modes: * **none** - no quoting is permitted * **required** - quotes must be present * **auto** - quotes are permitted, but not required Default is ``auto``. quoting.escape.mode ~~~~~~~~~~~~~~~~~~~ Specifies how quote character escaping is handled. Possible modes: * **none** - there are no escapes, quote characters are *not* permitted in value * **double** - the ending quote character is duplicated to indicate a single quote without termination of the value (e.g. ``""``) * **backslash** - a backslash is prepended to the quote character (e.g ``\"``) * **both** - both double and backslash escaping can happen and are supported Default is ``both``. Note that turning on ``backslash`` mode (or ``both``) has the side-effect that backslash escaping is enabled in general. This usually is what you want if this option is selected (e.g. otherwise you could no longer represent backslash). **NOTE**: this parameter also affects operation if quoting is **turned off**. That is somewhat counter-intuitive, but has traditionally been the case - which means we cannot change it. quoting.char.begin ~~~~~~~~~~~~~~~~~~ Sets the begin quote character. Default is ". quoting.char.end ~~~~~~~~~~~~~~~~ Sets the end quote character. Default is ". Note that setting the begin and end quote character permits you to support more quoting modes. For example, brackets and braces are used by some software for quoting. To handle such string, you can for example use a configuration like this:: rule=:a %f:string{"quoting.char.begin":"[", "quoting.char.end":"]"}% b which matches strings like this:: a [test test2] b matching.permitted ~~~~~~~~~~~~~~~~~~ This allows to specify a set of characters permitted in the to-be-parsed field. It is primarily a utility to extract things like programming-language like names (e.g. consisting of letters, digits and a set of special characters only), alphanumeric or alphabetic strings. If this parameter is not specified, all characters are permitted. If it is specified, only the configured characters are permitted. Note that this option reliably only works on US-ASCII data. Multi-byte character encodings may lead to strange results. There are two ways to specify permitted characters. The simple one is to specify them directly for the parameter:: rule=:%f:string{"matching.permitted":"abc"}% This only supports literal characters and all must be given as a single parameter. For more advanced use cases, an array of permitted characters can be provided:: rule=:%f:string{"matching.permitted":[ {"class":"digit"}, {"chars":"xX"} ]}% Here, ``class`` is a specify for the usual character classes, with support for: * digit * hexdigit * alpha * alnum In contrast, ``chars`` permits to specify literal characters. Both ``class`` as well as ``chars`` may be specified multiple times inside the array. For example, the ``alnum`` class could also be permitted as follows:: rule=:%f:string{"matching.permitted":[ {"class":"digit"}, {"class":"alpha"} ]}% matching.mode ~~~~~~~~~~~~~ This parameter permits the strict matching requirement of liblognorm, where each parser must be terminated by a space character. Possible values are: * **strict** - which requires that space * **lazy** - which does not Default is ``strict``, this parameter is available starting with version 2.0.6. In ``lazy`` mode, the parser always matches if at least one character can be matched. This can lead to unexpected results, so use it with care. Example: assume the following message (without quotes):: "12:34 56" And the following parser definition:: rule=:%f:string{"matching.permitted":[ {"class":"digit"} ]} %%r:rest% This will be unresolvable, as ":" is not a digit. With this definition:: rule=:%f:string{"matching.permitted":[ {"class":"digit"} ], "matching.mode":"lazy"} %%r:rest% it becomes resolvable, and ``f`` will contain "12" and ``r`` will contain ":34 56". This also shows the risk associated, as the result obtained may not necessarily be what was intended. option.dashIsEmpty ~~~~~~~~~~~~~~~~~~ This parameter, if True, permits to treat string consisting only of dash characters ('-') as being empty. The default value is False. This parameter is meant for e.g. processing web log data where a dash indicates a missing value but the user does not populate an analysis backend with dashes where "empty value" is meant. word #### One or more characters, up to the next space (\\x20), or up to end of line. string-to ######### One or more characters, up to the next string given in "extradata". alpha ##### One or more alphabetic characters, up to the next whitespace, punctuation, decimal digit or control character. char-to ####### One or more characters, up to the next character(s) given in extradata. Parameters .......... extradata ~~~~~~~~~ This is a mandatory parameter. It contains one or more characters, each of which terminates the match. char-sep ######## Zero or more characters, up to the next character(s) given in extradata. Parameters .......... extradata ~~~~~~~~~~ This is a mandatory parameter. It contains one or more characters, each of which terminates the match. rest #### Zero or more characters until end of line. Must always be at end of the rule, even though this condition is currently **not** checked. In any case, any definitions after *rest* are ignored. Note that the *rest* syntax should be avoided because it generates a very broad match. If it needs to be used, the user shall assign it the lowest priority among his parser definitions. Note that the parser-specific priority is also lowest, so by default it will only match if nothing else matches. quoted-string ############# Zero or more characters, surrounded by double quote marks. Quote marks are stripped from the match. op-quoted-string ################ Zero or more characters, possibly surrounded by double quote marks. If the first character is a quote mark, operates like quoted-string. Otherwise, operates like "word" Quote marks are stripped from the match. The parser also accepts an optional boolean configuration parameter ``escape``. If set to ``true``, quoted values treat ``\"`` as an escaped quote and ``\\`` as an escaped backslash, and the extracted value is unescaped accordingly. date-iso ######## Date in ISO format ('YYYY-MM-DD'). time-24hr ######### Time of format 'HH:MM:SS', where HH is 00..23. time-12hr ######### Time of format 'HH:MM:SS', where HH is 00..12. duration ######## A duration is similar to a timestamp, except that it tells about time elapsed. As such, hours can be larger than 23 and hours may also be specified by a single digit (this, for example, is commonly done in Cisco software). Examples for durations are "12:05:01", "0:00:01" and "37:59:59" but not "00:60:00" (HH and MM must still be within the usual range for minutes and seconds). date-rfc3164 ############ Valid date/time in RFC3164 format, i.e.: 'Oct 29 09:47:08'. This parser implements several quirks to match malformed timestamps from some devices. Parameters .......... format ~~~~~~ Specifies the format of the json object. Possible values are - **string** - string representation as given in input data - **timestamp-unix** - string converted to an unix timestamp (seconds since epoch) - **timestamp-unix-ms** - a kind of unix-timestamp, but with millisecond resolution. This format is understood for example by ElasticSearch. Note that RFC3164 does **not** contain subsecond resolution, so this option makes no sense for RFC3164-data only. It is useful, however, if processing mixed sources, some of which contain higher precision. date-rfc5424 ############ Valid date/time in RFC5424 format, i.e.: '1985-04-12T19:20:50.52-04:00'. Slightly different formats are allowed. Parameters .......... format ~~~~~~ Specifies the format of the json object. Possible values are - **string** - string representation as given in input data - **timestamp-unix** - string converted to an unix timestamp (seconds since epoch). If subsecond resolution is given in the original timestamp, it is lost. - **timestamp-unix-ms** - a kind of unix-timestamp, but with millisecond resolution. This format is understood for example by ElasticSearch. Note that a RFC5424 timestamp can contain higher than ms resolution. If so, the timestamp is truncated to millisecond resolution. ipv4 #### IPv4 address, in dot-decimal notation (AAA.BBB.CCC.DDD). ipv6 #### IPv6 address, in textual notation as specified in RFC4291. All formats specified in section 2.2 are supported, including embedded IPv4 address (e.g. "::13.1.68.3"). Note that a **pure** IPv4 address ("13.1.68.3") is **not** valid and as such not recognized. To avoid false positives, there must be either a whitespace character after the IPv6 address or the end of string must be reached. mac48 ##### The standard (IEEE 802) format for printing MAC-48 addresses in human-friendly form is six groups of two hexadecimal digits, separated by hyphens (-) or colons (:), in transmission order (e.g. 01-23-45-67-89-ab or 01:23:45:67:89:ab ). This form is also commonly used for EUI-64. from: http://en.wikipedia.org/wiki/MAC_address cef ### This parses ArcSight Comment Event Format (CEF) as described in the "Implementing ArcSight CEF" manual revision 20 (2013-06-15). It matches a format that closely follows the spec. The header fields are extracted into the field name container, all extension are extracted into a container called "Extensions" beneath it. Example ....... Rule (compact format):: rule=:%f:cef' Data:: CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a value cc=field 3 Result:: { "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a value", "cc": "field 3" } } } checkpoint-lea ############## This supports the LEA on-disk format. Unfortunately, the format is underdocumented, the Checkpoint docs we could get hold of just describe the API and provide a field dictionary. In a nutshell, what we do is extract field names up to the colon and values up to the semicolon. No escaping rules are known to us, so we assume none exists (and as such no semicolon can be part of a value). This format needs to continue until the end of the log message. We have also seen some samples of a LEA format that has data **after** the format described above. So it does not end at the end of log line. We guess that this is LEA when used inside (syslog) messages. We have one sample where the format ends on a brace (`; ]`). To support this, the `terminator` parameter exists (see below). If someone has a definitive reference or a sample set to contribute to the project, please let us know and we will check if we need to add additional transformations. Parameters .......... terminator ~~~~~~~~~~ Must be a single character. If used, LEA format is terminated when the character is hit instead of a field name. Note that the terminator character is **not** part of LEA. It it should be skipped, it must be specified as a literal after the parser. We have implemented it in this way as this provides most options for this format - about which we do not know any details. Example ....... This configures a LEA parser for use with the syslog transfer format (if we guess right). It terminates when a brace is detected. Rule (condensed format):: rule=:%field:checkpoint-lea{"terminator": "]"}%] Data:: tcp_flags: RST-ACK; src: 192.168.0.1; ] Result:: { "field": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' cisco-interface-spec #################### A Cisco interface specifier, as for example seen in PIX or ASA. The format contains a number of optional parts and is described as follows (in ABNF-like manner where square brackets indicate optional parts): :: [interface:]ip/port [SP (ip2/port2)] [[SP](username)] Samples for such a spec are: * outside:192.168.52.102/50349 * inside:192.168.1.15/56543 (192.168.1.112/54543) * outside:192.168.1.13/50179 (192.168.1.13/50179)(LOCAL\some.user) * outside:192.168.1.25/41850(LOCAL\RG-867G8-DEL88D879BBFFC8) * inside:192.168.1.25/53 (192.168.1.25/53) (some.user) * 192.168.1.15/0(LOCAL\RG-867G8-DEL88D879BBFFC8) Note that the current version of liblognorm does not permit sole IP addresses to be detected as a Cisco interface spec. However, we are reviewing more Cisco message and need to decide if this is to be supported. The problem here is that this would create a much broader parser which would potentially match many things that are **not** Cisco interface specs. As this object extracts multiple subelements, it create a JSON structure. Let's for example look at this definition (compact format):: %ifaddr:cisco-interface-spec% and assume the following message is to be parsed:: outside:192.168.1.13/50179 (192.168.1.13/50179) (LOCAL\some.user) Then the resulting JSON will be as follows:: { "ifaddr": { "interface": "outside", "ip": "192.168.1.13", "port": "50179", "ip2": "192.168.1.13", "port2": "50179", "user": "LOCAL\\some.user" } } Subcomponents that are not given in the to-be-normalized string are also not present in the resulting JSON. iptables ######## Name=value pairs, separated by spaces, as in Netfilter log messages. Name of the selector is not used; names from the line are used instead. This selector always matches everything till end of the line. Cannot match zero characters. cisco-interface-spec #################### This is an experimental parser. It is used to detect Cisco Interface Specifications. A sample of them is: :: outside:176.97.252.102/50349 Note that this parser does not yet extract the individual parts due to the restrictions in current liblognorm. This is planned for after a general algorithm overhaul. In order to match, this syntax must start on a non-whitespace char other than colon. json #### This parses native JSON from the message. All data up to the first non-JSON is parsed into the field. There may be any other field after the JSON, including another JSON section. Note that any white space after the actual JSON is considered **to be part of the JSON**. So you cannot filter on whitespace after the JSON. Example ....... Rule (compact format):: rule=:%field1:json%interim text %field2:json%' Data:: {"f1": "1"} interim text {"f2": 2} Result:: { "field2": { "f2": 2 }, "field1": { "f1": "1" } } Note also that the space before "interim" must **not** be given in the rule, as it is consumed by the JSON parser. However, the space after "text" is required. Parameter ``skipempty`` ....................... The optional ``skipempty`` flag removes empty values from the parsed JSON object before it is attached to the result. Empty strings, empty arrays, and empty objects are removed recursively. If the whole JSON value becomes empty, the parser matches but produces no field value. Example:: rule=:%field:json:skipempty%' Input:: {"f1": "1", "f2": 2, "f3": "", "f4": {}, "f5": []} Result:: { "field": { "f1": "1", "f2": 2 } } Unknown flags are rejected as invalid configuration. This is a potential backward-compatibility change for malformed rulebases that previously used extra ``json`` parser flags which were silently ignored. alternative ########### This type permits to specify alternative ways of parsing within a single definition. This can make writing rule bases easier. It also permits the v2 engine to create a more efficient parsing data structure resulting in better performance (to be noticed only in extreme cases, though). An example explains this parser best:: rule=:a % {"type":"alternative", "parser": [ {"name":"num", "type":"number"}, {"name":"hex", "type":"hexnumber"} ] }% b This rule matches messages like these:: a 1234 b a 0xff b Note that the "parser" parameter here needs to be provided with an array of *alternatives*. In this case, the JSON array is **not** interpreted as a sequence. Note, though that you can nest definitions by using custom types. repeat ###### This parser is used to extract a repeated sequence with the same pattern. An example explains this parser best:: rule=:a % {"name":"numbers", "type":"repeat", "parser":[ {"type":"number", "name":"n1"}, {"type":"literal", "text":":"}, {"type":"number", "name":"n2"} ], "while":[ {"type":"literal", "text":", "} ] }% b This matches lines like this:: a 1:2, 3:4, 5:6, 7:8 b and will generate this JSON:: { "numbers": [ { "n2": "2", "n1": "1" }, { "n2": "4", "n1": "3" }, { "n2": "6", "n1": "5" }, { "n2": "8", "n1": "7" } ] } As can be seen, there are two parameters to "alternative". The parser parameter specifies which type should be repeatedly parsed out of the input data. We could use a single parser for that, but in the example above we parse a sequence. Note the nested array in the "parser" parameter. If we just wanted to match a single list of numbers like:: a 1, 2, 3, 4 b we could use this definition:: rule=:a % {"name":"numbers", "type":"repeat", "parser": {"type":"number", "name":"n"}, "while": {"type":"literal", "text":", "} }% b Note that in this example we also removed the redundant single-element array in "while". The "while" parameter tells "repeat" how long to do repeat processing. It is specified by any parser, including a nested sequence of parser (array). As long as the "while" part matches, the repetition is continued. If it no longer matches, "repeat" processing is successfully completed. Note that the "parser" parameter **must** match at least once, otherwise "repeat" fails. In the above sample, "while" mismatches after "4", because no ", " follows. Then, the parser terminates, and according to definition the literal " b" is matched, which will result in a successful rule match (note: the "a ", " b" literals are just here for explanatory purposes and could be any other rule element). Sometimes we need to deal with malformed messages. For example, we could have a sequence like this:: a 1:2, 3:4,5:6, 7:8 b Note the missing space after "4,". To handle such cases, we can nest the "alternative" parser inside "while":: rule=:a % {"name":"numbers", "type":"repeat", "parser":[ {"type":"number", "name":"n1"}, {"type":"literal", "text":":"}, {"type":"number", "name":"n2"} ], "while": { "type":"alternative", "parser": [ {"type":"literal", "text":", "}, {"type":"literal", "text":","} ] } }% b This definition handles numbers being delimited by either ", " or ",". For people with programming skills, the "repeat" parser is described by this pseudocode:: do parse via parsers given in "parser" if parsing fails abort "repeat" unsuccessful parse via parsers given in "while" while the "while" parsers parsed successfully if not aborted, flag "repeat" as successful Parameters .......... option.permitMismatchInParser ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If set to "True", permits repeat to accept as successful even when the parser processing failed. This by default is false, and can be set to true to cover some border cases, where the while part cannot definitely detect the end of processing. An example of such a border case is a listing of flags, being terminated by a double space where each flag is delimited by single spaces. For example, Cisco products generate such messages (note the flags part):: Aug 18 13:18:45 192.168.0.1 %ASA-6-106015: Deny TCP (no connection) from 10.252.88.66/443 to 10.79.249.222/52746 flags RST on interface outside option.failOnDuplicate ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If set to "True", causes parsers to fail if the name already exists in the tree. A no-op unless using .. to cause a merger cee-syslog ########## This parses cee syslog from the message. This format has been defined by Mitre CEE as well as Project Lumberjack. This format essentially is JSON with additional restrictions: * The message must start with "@cee:" * an JSON **object** must immediately follow (whitespace before it permitted, but a JSON array is **not** permitted) * after the JSON, there must be no other non-whitespace characters. In other words: the message must consist of a single JSON object only, prefixed by the "@cee:" cookie. Note that the cee cookie is case sensitive, so "@CEE:" is **NOT** valid. Prefixes -------- Several rules can have a common prefix. You can set it once with this syntax:: prefix= Prefix match description syntax is the same as rule match description. Every following rule will be treated as an addition to this prefix. Prefix can be reset to default (empty value) by the line:: prefix= You can define a prefix for devices that produce the same header in each message. We assume, that you have your rules sorted by device. In such a case you can take the header of the rules and use it with the prefix variable. Here is a example of a rule for IPTables (legacy format, to be converted later):: prefix=%date:date-rfc3164% %host:word% %tag:char-to:-\x3a%: rule=:INBOUND%INBOUND:char-to:-\x3a%: IN=%IN:word% PHYSIN=%PHYSIN:word% OUT=%OUT:word% PHYSOUT=%PHYSOUT:word% SRC=%source:ipv4% DST=%destination:ipv4% LEN=%LEN:number% TOS=%TOS:char-to: % PREC=%PREC:word% TTL=%TTL:number% ID=%ID:number% DF PROTO=%PROTO:word% SPT=%SPT:number% DPT=%DPT:number% WINDOW=%WINDOW:number% RES=0x00 ACK SYN URGP=%URGP:number% Usually, every rule would hold what is defined in the prefix at its beginning. But since we can define the prefix, we can save that work in every line and just make the rules for the log lines. This saves us a lot of work and even saves space. In a rulebase you can use multiple prefixes obviously. The prefix will be used for the following rules. If then another prefix is set, the first one will be erased, and new one will be used for the following rules. Rule tags --------- Rule tagging capability permits very easy classification of syslog messages and log records in general. So you can not only extract data from your various log source, you can also classify events, for example, as being a "login", a "logout" or a firewall "denied access". This makes it very easy to look at specific subsets of messages and process them in ways specific to the information being conveyed. To see how it works, let’s first define what a tag is: A tag is a simple alphanumeric string that identifies a specific type of object, action, status, etc. For example, we can have object tags for firewalls and servers. For simplicity, let’s call them "firewall" and "server". Then, we can have action tags like "login", "logout" and "connectionOpen". Status tags could include "success" or "fail", among others. Tags form a flat space, there is no inherent relationship between them (but this may be added later on top of the current implementation). Think of tags like the tag cloud in a blogging system. Tags can be defined for any reason and need. A single event can be associated with as many tags as required. Assigning tags to messages is simple. A rule contains both the sample of the message (including the extracted fields) as well as the tags. Have a look at this sample:: rule=:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4% Here, we have a rule that shows an invalid ssh login request. The various fields are used to extract information into a well-defined structure. Have you ever wondered why every rule starts with a colon? Now, here is the answer: the colon separates the tag part from the actual sample part. Now, you can create a rule like this:: rule=ssh,user,login,fail:sshd[%pid:number%]: Invalid user %user:word% from %src-ip:ipv4% Note the "ssh,user,login,fail" part in front of the colon. These are the four tags the user has decided to assign to this event. What now happens is that the normalizer does not only extract the information from the message if it finds a match, but it also adds the tags as metadata. Once normalization is done, one can not only query the individual fields, but also query if a specific tag is associated with this event. For example, to find all ssh-related events (provided the rules are built that way), you can normalize a large log and select only that subset of the normalized log that contains the tag "ssh". Log annotations --------------- In short, annotations allow to add arbitrary attributes to a parsed message, depending on rule tags. Values of these attributes are fixed, they cannot be derived from variable fields. Syntax is as following:: annotate=:+="" Field value should always be enclosed in double quote marks. There can be multiple annotations for the same tag. Examples -------- Look at :doc:`sample rulebase ` for configuration examples and matching log lines. Note that the examples are currently in legacy format, only. liblognorm-2.1.0/doc/contacts.rst000066400000000000000000000007731520037563000170030ustar00rootroot00000000000000Contacts ======== Mailing list ------------ If you have any questions about the library, you may mail to the rsyslog mailing list rsyslog@lists.adiscon.com. To subscribe: http://lists.adiscon.net/mailman/listinfo/rsyslog Web site -------- http://www.liblognorm.com/ Git repositories ---------------- - https://github.com/rsyslog/liblognorm.git - git://git.adiscon.com/git/liblognorm.git Authors ------- Rainer Gerhards , Adiscon GmbH His blog: https://rainer.gerhards.net/ liblognorm-2.1.0/doc/graph.png000066400000000000000000003267431520037563000162520ustar00rootroot00000000000000‰PNG  IHDR ´X¥YbKGDÿÿÿ ½§“ IDATxœìÝi¼UÁöÿëŠ2:€€¦("ƒ˜ˆ8EÎ3¢8Ò};Ë£¦iÓs[ie=ΉY™3¦æ„Sh¡(â„ ƒ¨¨L†¨ÌŠÀaÿ_tç?KÍÊsÖ–ýý~>¾p¯ÍY¿}Þ—k­]U*•J*Rƒ¢€â ‚ ‚5*:¨µµµ™?~æÏŸŸ… fÍš5Y²dIjkk³îºë¦¦¦&555iÞ¼yÚ´i“æÍ› À@_p|ðAÆ—I“&eòäÉ™2eJ^yå•ÌŸ??µµµŸù笻îºiÛ¶m¶ÝvÛtîÜ9]»vÍŽ;;Öa=P´*ßb _,¥R)ãÆËC=”Ñ£Gç™gžÉû￟–-[¦[·nÙn»í²õÖ[§M›6iݺu6Ùd“4oÞ< 6L“&MR]]åË—gåÊ•yÿý÷³pá¼ùæ›™7o^æÌ™“iÓ¦eÊ”)™6mZV¬X‘Ö­[§oß¾éß¿8à€´iÓ¦è_ð92ÀÄ3Ï<“áÇçÞ{ïÍìÙ³Ó±cÇì¶Ûnéׯ_vÝu×l¶ÙfŸëùjkkóâ‹/fôèÑ=zt{ì±,Z´(={öÌ¡‡š£Ž:ÊXk!”±eË–åæ›oε×^›^x!;ì°C?üðtÐAéÔ©S½¶¬Zµ*£GÎ]wÝ•»ï¾; .Ì“O>9{î¹g½¶Ÿ!”¡¥K—æç?ÿy.½ôÒ¼ÿþû4hPN>ùäì´ÓNE§%ùËsï¹çž\{íµ=ztvÜqÇüÏÿüOöÝwßTUUü „PFÖ¬Y“_þò—ùîw¿›U«VåŒ3ÎÈÙgŸ-Zö‰&L˜üà1bDzôè‘«®º*_ùÊWŠÎ>£E1~üøôêÕ+C‡Í!CòÚk¯å /,ëq0I¾üå/çÞ{ïÍøñã³îºë¦W¯^9ù䓳hÑ¢¢Ó€ÏÀ@+•JùÙÏ~–wÞ9ë®»nþô§?å§?ýiÙƒo‡vȨQ£rã7æ¾ûîË;ì§Ÿ~ºè,àŸ0@-Z”}öÙ'ßùÎwò£ý(?þx:wî\tÖ¿­ªª*G}t&Ožœ.]º¤oß¾¹ôÒK‹Î>E£¢ RÍ;7ûì³O/^œ±cÇ–Í|Zµj•#FäŠ+®È9眓×_=—_~y4p”!`Μ9éÓ§Oš5k–§Ÿ~:­[·.:ésWUU•3Ï<3:tÈG‘E‹å†n0@™ñ-ÆPÏ,X]vÙ%mڴɈ#²á†Tçž}öÙ 0 GuT† Vtð7 „PV­Z•þýûgéÒ¥yòÉ'Ó´iÓ¢“êͨQ£²÷Þ{çâ‹/ΙgžYtð¿\Ûõè‚ .È믿ž‡~¸¢ÆÁ$Ù}÷Ýsýõ×ç¼óÎË„ ŠÎþ—+ žŒ;6ýû÷ÏÃ?œ=öØ£èœÂœxâ‰yâ‰'ò /duÖ):*žêA©TJÏž=³Ë.»ä²Ë.+:§Pï½÷^ºvíšÓN;-gŸ}vÑ9Pñ „Pn»í¶œqÆyå•WÒ¬Y³¢s wÇwä”SNÉ+¯¼’æÍ›Í3 |ÿûßÏСCƒÿëðÃÏÆoœ«®ºªè¨xB¨cO=õT^~ùåüñE§”ªªª|ýë_Ïõ×_Ÿ5kÖÍ@uì–[nI¿~ýÒ¾}û¢SÊÊ1Ç“7Þx#O<ñDÑ)PÑ „PÇFþýûQv6Ùd“tìØÑ@3@z÷Ýw3}úôì¼óÎE§”¥]vÙ%O=õTÑPÑ „P‡^{íµ”J¥l¹å–E§”¥/}éKyíµ×ŠÎ€Šf €:ôî»ï&IZ¶lYpIyÚxãóöÛoÍ@uhùòåI’&Mš\RžÖ[o½,]º´è ¨hB¨CÍš5Kòÿ_IÈG½óÎ;®®€‚ m´ÑFI’ \RžÞ~ûíG@1 „P‡¶ÞzëÔÔÔdòäÉE§”¥I“&eûí·/:*šêPãÆÓ£G<õÔSE§”¥±cÇfçw.:*šêØ€2räÈ”J¥¢SÊʸqãòöÛoçk_ûZÑ)PÑ „PÇŽ=öØÌœ93cÆŒ):¥¬üæ7¿I¿~ý²ÕV[­ªä?c@;øàƒ³ÞzëåÖ[o-:¥,,^¼8›o¾y~ùË_fàÀEç@E3@=˜2eJvØa‡<÷ÜséÞ½{Ñ9…ûÖ·¾•G}4Ï?ÿ|ªªªŠÎ€Šf €zrâ‰'fÊ”)yâ‰'Ò°aâs 3uêÔì¸ãŽyà²Ç{Ï@õdÁ‚éÖ­[N:é¤\pÁEçbÅŠéÙ³gºtéâvk(B¨G#GŽÌ~ûí—»îº+p@Ñ9õªT*å˜cŽÉرc3qâÄl¸á†E'ñ-ÆP¯ óÎ;/Gydž~úé¢sêÕ·¿ýíÜsÏ=¹ãŽ;ŒƒPF\Aõ¬T*åä“OÎí·ßž#F¤oß¾E'Õ©R©”oûÛ¹ôÒKsß}÷åk_ûZÑIÀßp!Ô³ªªª 6,ûï¿ ßýîwE'Õ™U«Vå„NÈe—]–[o½Õ8eÈ@hذanºé¦ :4GqD¾õ­oeõêÕEg}®fÍš•ÝvÛ-÷Ýw_~øávØaE'Ã@©ªªÊÅ_œë®».W]uUvÝu×Ìœ9³è¬ÏÅ]wÝ•vØ!‹/ÎØ±c³Ûn»|!ìøãϸqã²bÅŠtéÒ%?øÁ²bÅŠ¢³þ-3gÎÌþûïŸÃ;,‡~xž{î¹têÔ©è,àS  l»í¶yî¹çòýï??ûÙÏÒµk×ÜrË-©­­-:í3Y°`AÎ;ï¼téÒ%3gĮ̂Q£ò‹_ü"ë®»nÑiÀ?a €2Q]]sÏ=7S§NMïÞ½s '¤sçιá†òÁ÷±æÌ™“óÎ;/[n¹en¸á†\tÑE™8q¢[Šà ¤ªT*•ŠŽþÑ+¯¼’ /¼0·Ýv[6Ø`ƒwÜqù¯ÿú¯ÂoÙ­­­ÍÈ‘#síµ×æÁÌFm”sÎ9'§œrJÖ[o½BÛ€ÊÜ[o½•ßüæ7ùÕ¯~•™3g¦K—.9äCrðÁ§[·niРîo zï½÷òØcå®»îÊý÷ߟwÞy'{î¹gN:é¤pÀ©®®®ó nà ¢T*åÙgŸÍÝwß»îº+3gÎL‹-Ò§OŸôíÛ7Ûo¿}ºté’M7Ýô?:Ommm^yå•Lž<9ãÇÏO<‘çŸ>kÖ¬IŸ>}r衇æÀLûöí?§OÉ@_PÓ§OÏèÑ£óä“Oæ‰'žÈìÙ³“$-[¶Ì6Ûl“M7Ý4mÛ¶Í&›l’ 6Ø 555Yo½õ²Î:ëdéÒ¥Y½zu–,Y’¥K—fΜ9ùóŸÿœÙ³ggÆŒùàƒÒ¨Q£tëÖ-}ûöM¿~ýÒ§OŸ´jÕªàO |Þ „°–X¸pa&Ožœ©S§æ¥—^Êüùó3gΜ̟??‹/ÎÊ•+³|ùò¬\¹2Mš4Iuuuš4i’¦M›~8$¶k×.:uJçÎÓ©S§4nܸèÔ1!TáÇgðàÁñ×à¯êþiÆ@Ù2@éÞ½{~ò“Ÿ”·@s!T0!T0!T0!T0!T0!T0!T‰'æüóÏ/:(#B¨ S§NÍÅ_\tPF „PÁªJ¥R©è ~,[¶,o½õV¶ÜrË¢S€2a € æc¨`B¨`B¨`B¨`B¨`B¨`B¨ #FŒÈV[mUtPF „PA–-[–™3g”!T-ZdÇw,:(#U¥R©TtP W@3@3@3Z²dIÑ @=3@…[±bEþßÿûÙm·ÝÒ²eË¢s€zæ[Œ€¬^½:íڵ˟ÿüçü+Exýõ׳Å[Ô]Pç\AäÝwßÍøñãÿáõF¥iÓ¦ÿÒÏš3gNŽ=öØÏ+ (ˆ*Èïÿûì´ÓNÿñÏY°`AöÝwß¼õÖ[ŸCP¤FEõoÙ²eùÞ÷¾—… f“M6É|eË–}ä=S¦LÉ·¾õ­tïÞ=óæÍËĉsÅWd—]vɰaÃ2iÒ¤4mÚ4'Ÿ|r~ñ‹_$I–.]šË.»,³fÍÊŒ3²zõê\~ùåéÑ£Gø <ƒ*ÈÔ©Ssï½÷æÈöÛoŸk®¹&É_ž%رcǬ\¹òÃg¶oß>555™1cFJ¥RÚ¶m›u×]7¯¼òJ’¤ªª*;vÌôéÓ“$kÖ¬ÉþûïŸë®».›nºi’dРAyä‘G2sæÌ4kÖ¬€O ü3® € ²ÝvÛeÔ¨Qyúé§sÝu×}øú[l‘Í7ßöPž „Pa¶ÜrË$¹%øÓüñiÒ¤Iú÷ïŸ${õàß¾Ö¹sç,_¾<Æ ûÈ{Þ|óÍx (n1€ söÙgçÁÌYg•víÚ¥wïÞ™çœsNæÎ›~ýúå7ÞȈ#rçwù‘€Oá B¨0ýû÷σ>˜víÚe·ÝvËFm”›o¾9]ºtÉI'”I“&¥¶¶6_|qª««sÀ¤Y³f9ãŒ3Ò¨Q£œp ©ªªÊ…^˜ÚÚÚüä'?I’¬¿þúyôÑG³çž{æê«¯Î1Ç“çž{.7ß|sš6mZð§>IUéÓž2 ¬Õ\Adâĉ9ÿüó‹Îʈ*ÈÔ©SsñÅ”!T0Ï € ²lÙ²¼õÖ[ÙrË-‹NÊ„*˜[Œ ‚ ‚ ‚ ‚ ‚ ‚ ‚Œ1"[mµUÑ@1@Y¶lYfΜYtPF „PAZ´h‘wÜ±è  ŒT•J¥RÑ@1\AÌ@¬QÑ@Ýxûí·s÷Ýw¦÷n°Á9òÈ#ë¸(GžAk©•+WfuÖI’TWWâûV­Z•ã?>×_}}¥eÄ-ư–jܸq† ’êêê¬ZµêÿIâêA¨`® €µØ¨Q£²Ç{|ê{š7ož·Þz+yT"WÀZ¬ÿþÙh£>ñxãÆ3xð`ã T0!¬Å4hÁƒ§qãÆ{|åÊ•9ê¨£ê¹ ('n1€µÜsÏ=—ž={~ì±Í6Û,³gÏNUUU=Wå„°–ëÑ£GÚ·oÿ¯WWWç˜cŽ1@…3@8þøãS]]ý‘×V­ZåöbÀ-ÆP ¦M›–í¶Ûî#¯m»í¶™6mZAE@¹p!T€N:¥sçÎÞN\]]ãŽ;®à*  BwÜqiذa’dõêÕ8p`ÁE@9p‹1TˆÙ³ggóÍ7O©TÊN;í”矾è$  ¸‚*D»víÒµk×$q{1ð!WÀZdÕªUy饗2cƌ̙3'sçÎͼyó²|ùò,]º43fÌȬY³Ò·oßÔÔÔ¤iÓ¦iÚ´i6Ûl³´mÛ6íÚµËvÛm—víÚýQ€zb €/°yóæeÔ¨Qyì±Çò§?ý)S¦LÉÊ•+SUU•M7Ý4mÚ´Éf›m–u×]7M›6ÍêÕ«óøãgÏ=÷Lmmm–,Y’Å‹gΜ9™3gN–,Y’$iÑ¢Eºuë–^½ze÷ÝwÏ.»ì’õÖ[¯àO Ô!|ÁŒ?>wÜqGî¿ÿþL›6-Mš4IŸ>}Ò½{÷tïÞ=ݺuK‡R]]ý±~æÌ™ÙrË-?öØ’%K2eÊ”Lš4)/¼ðBž}öÙLœ81ÕÕÕéÝ»w9ävØaÙtÓMëò#õÈ@_óçÏÏ/ùËÜxãyõÕW³í¶Ûæ°ÃË€Ò³gÏO?ï¼óNüñ<øàƒ¹÷Þ{³dÉ’ôíÛ7'tR9ä:=7P÷ „PÆ^|ñÅüìg?Ëí·ßž 7Ü0_ÿú×3hРl¿ýö…ô¬\¹2<òHn¼ñÆÜ{ï½Ùd“Mrê©§æ´ÓNKÓ¦M iþ3B(C3gÎÌ\[o½5]ºtÉ™gž™#<2555E§}höìÙ6lX®½öÚ4lØ0çw^N;í´¬»îºE§ÿ!”‘>ø ^xa~úÓŸfóÍ7ÏøÃ 80UUUE§}¢E‹å§?ýi®¼òÊ´jÕ*×\sMöÞ{€ÏÈ@eâ™gžÉ!C2{öì\tÑE9õÔSÓ¨Q£¢³>³7ß|3gŸ}v~ûÛßfðàÁ¹âŠ+Ò²eË¢³€¢AÑ@rùå—§oß¾iß¾}&Ožœ¡C‡~¡ÆÁ$iݺun»í¶<ðÀ=ztvÚi§Œ7®è,àŸ0@V¬X‘#Ž8"çž{n~ô£åá‡Î[lQtÖdß}÷ÍĉÓ©S§ìºë®¹ñÆ‹N>Åë?IÀZdÙ²e9ðÀ3yòäüáH¿~ýŠNúÜ´lÙ2<ð@~ðƒdÈ!Y¸paÎ<óÌ¢³€a €¼÷Þ{0`@fÍš•'žx"Ûn»mÑIŸ» ä‚ .H›6mrê©§fÕªU9÷Üs‹ÎþŽêYmmmŽ<òÈüùÏΓO>ù…¿¥øŸ9ñijÁäØcM›6m2xð࢓€¿a €zöo|#£GÎ3Ï<³Öƒõ×AtÈ!Ù|óÍÓ§OŸ¢“€ÿUU*•JEG@¥xôÑG³ß~ûåá‡Îî»ï^tN½;å”S2räÈLš4)Mš4):ˆêÍâŋӵk× 2$\pAÑ9…øàƒ²ÓN;¥wïÞ¹öÚk‹Îb €zóo|#=öXž}öÙTWWS˜?ýéOéÙ³gÆŒ“=zÏ@õ`îܹÙzë­óÀTä­Åï¿ÿû¿3kÖ¬Œ9²è¨x Š€Jpá…¦W¯^õ:.Z´(­ZµÊ=÷Üóo¯KßûÞ÷òøãçÉ'Ÿ¬÷se €:¶|ùòÜrË-9餓êõ¼5Ê;3%K–ü[ÇëRûöí³÷Þ{ç¿øE½Ÿø(!Ô±{ï½7 4ÈX¯çmÒ¤IÚ´i“m¶Ùæß:^׎9æ˜Ü{ï½Yºti!çþÂ@uì¾ûîË€RSSSïçîÚµkºvíúo¯Kûî»oV¯^í9„P0!Ô±§Ÿ~:½zõ*äÜguVš4iòo¯K555éÞ½{žyæ™BÎü…êÐܹs3gΜôèÑ£ó0à?:^×vÚi§Œ7®Ð¨tB¨CóæÍK’´iÓ¦à’òÔ¾}ûÌ™3§è ¨hB¨C .L’´lÙ²à’òÔ²e˼óÎ;Eg@E3@Z¾|y’dýõ×/¸¤<­»îºY¶lYÑPÑ „P‡š5k–äÿ¿’z÷ÝwÓ¢E‹¢3 ¢ ýuüz÷Ýw .)OB(žêP‡Ò°aÃ̘1£è”²4}úôtêÔ©è ¨hB¨C믿~ºté’gžy¦è”²4nܸôèÑ£è ¨hB¨c»îºkÆŒStFÙ™7o^^yå•ôéÓ§è¨hB¨cƒ ʘ1c2{öì¢SÊÊðáóùæ›§wïÞE§@E3@ëÝ»w¶Øb‹ >¼è”²rË-·äè£NƒþZEò¿ÄPǪªªrÎ9ç䪫®ÊŠ+ŠÎ) <òHf̘‘ÓN;­è¨xB¨_ÿú×Ó¸qã\sÍ5E§®T*åÿþßÿ›ÓN;-­[·.:*žêAãÆsÑEå‚ .ȬY³ŠÎ)Ô/~ñ‹Ì˜1#çw^Ñ)@’ªR©T*:*Å~ûí—+VäÑGMUUUÑ9õîÕW_M÷îÝ3lذsÌ1Eç1@½zóÍ7Ó¥K—œxâ‰ùñ\tN½Z¼xqvÝu×l½õֹ뮻ŠÎþW£¢ ’´nÝ:ÇϾûî›Í7ß<'Ÿ|rÑIõ¢¶¶6‡zhÞÿýüæ7¿):øB¨g ÈUW]•ÓO?=믿þZ«íÊ•+3hРLš4)cÇŽMÓ¦M‹Nþ† pÊ)§dñâÅ9î¸ã²dÉ’œvÚiE'Õ‰÷Þ{/|pÆŸ‘#G¦C‡E'Ç@9ÿüó³á†æôÓOÏôéÓsÉ%—¤qãÆEg}nfΜ™C=4óçÏÏèÑ£Ó¹s碓€Ñ è¨d§žzjî¾ûîÜtÓMéׯ_fÏž]tÒçâþûïÏŽ;î˜Få™gž1@3@Á:è <ÿüóY¶lY:w¯¾:kÖ¬):ëß²`Á‚sÌ19à€røá‡çÉ'ŸLûöí‹Î>…ÊÀ6Ûl“çŸ>gœqFÎ<óÌôë×/Ï<óLÑYŸÙêÕ«síµ×¦sçÎyüñÇ3bĈüò—¿LMMMÑiÀ?a €2QSS“‹.º(ãÇOUUUvÞyçtÐAyñÅ‹NûDkÖ¬Ém·Ý–N:eèС9ꨣ2eÊ”ì¿ÿþE§Ÿ‘ÊL·nÝòÄOäÁÌo¼‘nݺå«_ýjî¿ÿþ²¹õxÑ¢E¹ôÒKÓ¡C‡}ôÑÙyç3}úô\~ùåÙpà ‹ÎþU¥R©TtðñJ¥RFŽ™+¯¼2#GŽLûöí3hР 4(Ûo¿}½¶|ðÁyä‘GrÛm·eĈiذa† ’ÓN;-:t¨×àóc €/ˆ—_~97ÝtSn¿ýö¼üòËÙf›mòÕ¯~5»ï¾{úõë—-Z|îç|饗2jÔ¨<öØcyôÑG³téÒôïß?ƒ ÊG‘ 6Øàs?'P¿ „ð4a„Œ1"£FʳÏ>›•+WæK_úRºwïž®]»f›m¶IûöíÓºuë´mÛöS¿,dáÂ…™;wnfÏž9sæäÅ_̤I“2qâÄ,Z´(­ZµÊn»í–Ýwß=|p6Ùd“zü¤@]3ÀÜ{ï½—§žz*&LÈ /¼I“&åå—_ÎÊ•+?|OUUUš5k– ¤AƒY½zuV­Z•eË–}äg5kÖ,Ûm·]ºwïžnݺ¥W¯^éÖ­[ªªªêûcõÄ@k¡R©”ùóçgÞ¼y™;wnÞ{ï½,^¼8O?ýtn¸á†\{íµiÔ¨Q6Ø`ƒ4oÞ¹ãŽ;ŠÎʈ+ ‚¹‚*˜*˜*˜*˜*˜*˜*Ș1c2pàÀ¢3€2b € 2kÖ¬ÜyçEgeÄ@¬ªT*•ŠŽŠá B¨`B¨`B¨`B¨`B¨`B¨`B¨`B¨ ÇOUUUÑ@1@3@iß¾}?üð¢3€2RU*•JEGÅp!T0!T0!T0!T¨%K–”!T˜Ë/¿<{ì±GZµjUta‰L IDAT P|‹1T˜Õ«W§}ûöyóÍ7S×xýõ׳Å[Ôé9€ÿŒ+ ‚Ìš5+÷ÜsO6ÜpÃ:?ל9srì±ÇÖùy€ÿŒ*Ș1c2pàÀ:?Ï‚ ²ï¾ûæ­·Þªósÿ!T°©S§¦GiÔ¨Qºté’qãÆ}xì­·ÞÊgœ‘³Î:+çž{nz÷î“N:)o¾ùæ‡ïyá…²Ûn»å¢‹.Ê·¿ýí4lØ0K—.ͰaÃ2iÒ¤üùÏÎÉ'Ÿ\ÄG>#Ï € 2kÖ¬<ûì³ùÞ÷¾——^z)ßýîwsÆgdÚ´iéß¿vÜqÇŒ7. ,H=râ‰'æ[ßúV’dñâÅÙyç³dÉ’<÷ÜsiÓ¦M¶Új«¬^½:o¼ñF’äÄOÌ…^˜7Þ8UUU騱c¦OŸ^äGþ !T m·Ý6/½ôRjkkÓ Á_n,Úb‹-2gΜ¬^½:ßøÆ7r饗æí·ßNË–-?üs·ß~{ ”SO=5W_}uš7ožE‹eذa9餓òÒK/e³Í6ˆnh €/·@ûë8˜$555©­­M’Œ=:IÒ´iÓ¼¿ÿþIþò,Ã$¹üòËÓ°aÜzê©éÑ£G.\X/_€|~ „À?X³fM’äõ×_ÿÈë-Z´H’¬·ÞzI’ãŽ;.Ï?ÿ|öØcŒ?>»ì²K.»ì²zmþ3Bàì±ÇI’‘#G~äõ9sæ$IöÛo¿$ÉÅ_œvØ!øÃr÷Ýw§ªª*ßûÞ÷>|¿'@ùó B¨@[o½u^y啬^½: 6L’tèÐ!¯¾újjkk³páÂôìÙ3«W¯ÎĉÓ¬Y³$Éyç—G}4O>ùdÖ_ýl²É&™>}zš7ož$iß¾}Zµj• &d£6ʪU«2uêÔ´iÓ¦°Ï |º†\pÁEGõcÍš5¹îºërë­·fÍš5iܸq¾üå/禛nÊ-·Ü’R©”šššôë×/ÇsLæÎ›K.¹$/½ôRî¿ÿþTWWçúë¯O“&M’$ßüæ7sï½÷fùòåyðÁS*•rà 7¤yóæiÚ´iFŽ™åË—gï½÷.ø“ŸÄ„PÁ<ƒ*Ș1c2pàÀ¢3€2b € 2kÖ¬ÜyçEgeÄ@Ì3 ‚¹‚*˜*˜*˜*˜*˜*˜*˜*ÈðáÃSUUUtPF „PÁ „PAÚ·oŸÃ?¼è  ŒT•J¥RÑ@1\AÌ@¬QÑ@ÝX½zu–.]ú™Þ[UU•fÍšÕqPŽ „°–Z´hQ6Úh£ÏôÞ½öÚ+<òHåÈ-ư–jÕªUºuëö™Þ;hР:®Ê•ÖbçœsN4øôÿÛ_]]C9¤žŠ€rc €µØA”êêêO<Þ¨Q£ì½÷Þž?Ì@k± 6Ø ûî»o5úøÇ×ÖÖæ˜cŽ©ç* œ`-wôÑG§¶¶öcÕÔÔd¿ýö«ç" œ`-·Ï>ûdýõ×ÿ‡×«««s衇¦¦¦¦€* \`-·Î:ëdàÀÿð,ÂU«VeðàÁUåÂ@`РAYµjÕG^kÞ¼yöÜsÏ‚Š€ra € °ûî»§eË–þ{ãÆsÔQG}â——•Ã@ aÆ9úè£Ó¸qã$ÉÊ•+3hР‚«€rPU*•JEGuï¹çžKÏž=“$mÚ´Éœ9sRUUUpP4W@…øÊW¾’¶mÛ&IŽ=öXã $ñÀX ­Y³&óçÏÏœ9s2oÞ¼¼÷Þ{Yºti6ÞxãÌ™3'7ίýë4mÚ4n¸aÚ¶m›¶mÛ¦iÓ¦E§õÌ-Æð·lÙ²Œ3&&LÈ /¼I“&åÕW_ýÈ·7lØ0n¸aªªª²téÒ4iÒ$µµµY²dÉG~ÖlÎ;gûí·O·nÝÒ³gÏì°ÃiÐÀÍG°¶2ÀÐóÏ?Ÿx üãóì³ÏfÍš5ÙrË-Ó½{÷tíÚ5[o½uÚµk—Í6Û,mÚ´É:ë¬ó៽ÿþû³ÿþûøï‹/Μ9s2wîÜÌž=;S¦LɤI“ò /äí·ßN‹-Ò¿ÿì±Ç9øàƒÓºuë">2PG „ð1uêÔÜ|ó͹ýöÛóÚk¯eÛm·ÍW¿úÕì±Çéׯ_Üüꫯæüc{ì±<òÈ#Y´hQúöí›樣ŽrK2¬ „PÆÖ¬Y“‡z(W^yeþð‡?d«­¶Ê‘G™Ã?<]»v­×–U«VåÑGÍí·ßž{ï½7kÖ¬ÉqÇ—3Î8#;v¬×àóc €25bĈ|ç;ßÉÔ©S3`À€ :4 (‹o^²dIn¸á†üüç?Ï«¯¾šAƒåûßÿ~:tèPtð/ò¤a(3&LHïÞ½sÐA¥cÇŽ™¹l®üW¼ûî»9ûì³sã7fÈ!¹úê«SSSStð)\A»óÎ;Ó£G4kÖ,/¾øbN9å”/ä8˜$-Z´È 7Üûï¿?÷Ýw_z÷î×^{­è,àS @W\qEŽ8∠2$=öXÚµkWtÒçb¿ýöËóÏ?ŸªªªôîÝ;“'O.: øB(È~ô£œ}öÙ6lX.¿üòTWWô¹úÒ—¾”1cÆd‡vHÿþý3nܸ¢“€á„P€k®¹&C‡Í 7ÜÁƒS§V®\™ÁƒgôèÑ;vl:tèPtð7 „PÏî¿ÿþ|ðÁùÕ¯~•N8¡èœzQ[[›8 3fÌÈØ±c³ÑFü/!Ô£yóæ¥K—.9餓òãÿ¸èœzµxñâìºë®Ùj«­rÏ=÷ü/!Ô“R©”}÷Ý7«W¯ÎÈ‘#¿°ßTüŸxíµ×Ò­[·üüç?ÏqÇWt_Rõæ–[nÉØ±csÝu×Uä8˜üå‹K.½ôÒœuÖYY°`AÑ9@\AõbåÊ•éØ±cÎ<óÌüŸÿóŠÎ)T©TJïÞ½Ó»wï\rÉ%Eç@Ås!Ôƒ_ýêW©­­ÍI'TtJ᪪ªòÃþ0×\sMæÎ[tT<!Ô±R©”K.¹$gœqFjjjêí¼‹-J«V­>ñ AþÙñº´çž{¦cÇŽ¹úê«ëýÜÀG Ž=õÔSyã7rä‘GÖëy5j”wÞy'K–,ù·Ž×µcŽ9&·ÞzkÖ¬YSÈù€¿0@ûío›]wÝ5mÛ¶­×ó6iÒ$mÚ´É6Ûlóo¯kGydfÍš•±cÇr~à/ „PÇÆŒ“]vÙ¥swíÚ5]»vý·×¥Ö­[§C‡yâ‰' 9?ðB¨CË–-Ë‹/¾˜^½zrþ³Î:+Mš4ù·×µ=zdܸq…0@zõÕWS[[[Øm¼ øŽ×µŽ;fÚ´i…6@¥3@z÷Ýw“$-Z´(¸¤<µhÑâÃßP !Ô¡E‹%Iš7o^pIyjÞ¼¹ f €:´þúë'I–/_^pIyzï½÷ }"` €:õ×+ßyç‚KÊÓÛo¿–-[Í@u¨mÛ¶I’yóæ\RžfÏžýáï(†êPëÖ­Ó¶mÛ<÷ÜsE§”¥ñãÇç+_ùJÑPÑ „PÇvÞyç<ýôÓEg”+VdâĉéÕ«WÑ)PÑ „PÇ:è <òÈ#yï½÷ŠN)+<ð@ª««óÕ¯~µè¨hB¨ctPJ¥RFŒQtJY¹ùæ›sðÁgƒ 6(:*ZU©T*k»ÓN;-/¾øbF]tJYxýõ׳Í6ÛdÔ¨QéÓ§OÑ9PÑ „PæÍ›—:ä¾ûîË^{íUtNá¾þõ¯gÞ¼yyøá‡‹N€Šg €zrÞyçå‘GɳÏ>›ÆS˜ &¤W¯^;vlvÚi§¢s â ž,]º4]ºtɱÇ›þð‡EçbÅŠÙi§²ë®»æšk®):ˆêÕÿøÇì½÷ÞùýïŸÝwß½èœzwòÉ'çøC&Nœ˜&MšÄ·@½Úc=rÖYgåCÉäÉ“‹Î©W_|q®¿þú >Ü8eÄ„PÏjkksÈ!‡ä…^Èc=–/}éKE'Õ¹[n¹%'œpBn¸á† <¸èào¸‚êYÆ sÛm·¥mÛ¶éÓ§O¦M›VtR6lXŽ=öØüøÇ?6@2@Ö[o½Œ92;wή»îšÇ{¬è¤ÏÝš5kòÝï~7§Ÿ~z®¸âŠœsÎ9E'Ã@YýõóÀdÀ€Ùk¯½òÓŸþ4kË€Þyçì³Ï>¹ä’KrÓM7åŒ3Î(: øžAeàÊ+¯Ì9眓Ýwß=×\sÍú¹„#FŒÈi§–êêêÜu×]Ùa‡ŠN>…+  :4cƌɜ9sÒµk×\vÙeY¹reÑYÿ’¹sçfàÀ9ðÀ³Ûn»eܸqÆAø0@™èÑ£G&L˜sÏ=7çŸ~¶Ûn»Üzë­Y³fMÑiŸêwÞÉ7¿ùÍl½õÖ7n\~ÿûß禛nJ‹-ŠN>·@š5kVþçþ'7ß|s:uꔡC‡fðàÁYo½õŠNûÐk¯½–aÆåW¿úU7nœï|ç;9å”SÒ¸qã¢Ó€ÊØ´iÓrÉ%—äÖ[oÍzë­—N8!GuT¾üå/ÒóÁ䡇ÊM7Ý”ûï¿?›m¶YN=õÔœvÚiiÒ¤I!MÀÆ@_o¿ýv~ýë_çÆoÌôéÓ³õÖ[gàÀÙk¯½Ò«W¯¬³Î:uvî äñÇÏ<ûî»/Ë–-Ëž{î™ÿþïÿÎA”† ÖÙ¹€ºg €/˜‰'æöÛoÏý÷ߟ)S¦d½õÖK¯^½²ÓN;eûí·O·nݲõÖ[ÿ[£á¢E‹òâ‹/fÒ¤I™8qbž}öÙLž<9묳NvÙe—zè¡9ì°Ã²ÑFÕÁ'Š` €/°ùóçgÔ¨QyüñÇ3a„¼øâ‹Y±bE’d“M6I›6mÒ¶mÛÔÔÔ¤yóæiذaš4i’Å‹gåÊ•Y¾|yÞ}÷ÝÌ›7/³fÍÊòåË“$m´Q¶ß~ûôìÙ3»ï¾{z÷îššš"?*PG „°Y½zu^~ùå̘1#³fÍʼyó2wîܼÿþûY´hQæÎ›iÓ¦eÏ=÷Luuuš4i’æÍ›g³Í6K»víÒ¶mÛtîÜ9mÚ´)ú£õÄ@døðá¹ãŽ;ŠÎÊHU©T*ÄPÁ „PÁ „PÁ „PÁ „PÁ „PÁ „PAÆŒ“”!TY³fåÎ;ï,:(#B¨`U¥R©TtP W@3@3@3@3@3@3@3@>|xªªªŠÎʈ*Ð /¼ÝvÛ-]tQ¾ýío§aÆYºti’déÒ¥ùÁ~ÿú¯ÿJß¾}Ó»wï<÷ÜsþÙeË–å›ßüfœo}ë[2dH.¼ðÂtêÔ)¥R)>ø`Î8ãŒl¾ùæ™5kVöÚk¯4jÔ(]»vÍøñã“$¿ýíoSSSóáX¹téÒüú׿þÈkË—/Ïï~÷»œp éÓ§On¹å–4oÞ<[n¹ež}öÙŒ=:½zõJuuu:w'ÖóoÖU¥R©TtP?ÆŒ“+¯¼2ãÇÏêÕ«óÆo$IN<ñÄ\xá…iÕªUößÿ\wÝuÙtÓM“$ƒ Ê#<’™3gfuÖIß¾}Ó¥K—üæ7¿IUUUV¬X‘M6Ù$K–,Éš5kòî»ïf›m¶É»ï¾›‹.º(C† ÉK/½”ÝvÛ-Ý»wÏ„ ’$;vÌŒ3ò·%ùÛ×Ö¬Y“·Þz+­[·NóæÍs÷ÝwgÛm·Mûöí³ñÆçÜsÏÍÉ'ŸœY³fe»í¶KïÞ½3zôèúÿ¥Àœ*PóæÍ³hÑ¢ 6,'tR^zé¥l¶Ùfyê©§²Ï>û|쟹뮻òÊ+¯ä¼óÎËÔ©SÓ©S§ýýØ÷qãßV[m•×^{-kÖ¬I’l»í¶y饗>òž¿­T*¥Aƒ騱c¦OŸž$éСC^}õÕøÙo¾ùfÞ{ï½Ïé7•Ã-ÆP.¿üò4lØ0§žzjzôè‘…ÿ{wWUøü}Ye\iqqr DÌRÓÔDr#3M{¸ŒãLM3mÓô5œ~Sf£6.“K:"¦eËd.¹àf¡â–+²¸Š"—ûû£‰ùúMKË{Ï•óz><pî9çó>÷/}?>ŸÏ)(¯¯¯vìØ¡6mÚÈf³ýàgàÀZ³f¤ïJºÿíÿîkx³}ÝÝÝu§ónv77·›Þ»´´ôŽî ¾CA˜Pbb¢ÒÓÓÕ£G}õÕWêÒ¥‹Þzë-Y­V}ûí·ºvíÚ®±Z­ºté’$©¨¨ÈÑ‘€P&4}útµmÛVëׯתU«d±Xôâ‹/*<<\—/_ÖìÙ³o8?//O³gÏV“&M$IŸ|òÉ Ÿ¿løç°Z­•¿———ÿìû€Ÿç‡sóTyo¾ù¦ÆŽ«ZµjiÀ€ ’¿¿¿ú÷ï¯àà`ýîw¿SNNŽ¢££uêÔ)­Y³F)))jÑ¢…V¯^­©S§ÊÛÛ[-Z´ÐæÍ›uöìÙîÿ}ah³Ù*— _þ}¬qãÆ:|ø°fÏž­G}TŸþ¹ $I{öìQëÖ­+¯ýßK“¿¿·Õj•««ë-Ç·‡„€ ;wN:uÒôéÓõüóÏ«eË–Z¹r¥¼¼¼´nÝ:ÅÅÅiÖ¬Y1b„ÒÒÒ´dÉÕ¬YSqqqZ¼x±|||” Ñ£G+<<\ 4¨¼÷²eË*ߎŽ?®ÐÐPÝêŸý‹EÊÈÈpp2à,˜ATa!!!jÛ¶­,ËM?wuuUbb¢ƒSgBATq#GŽ”««ëM?³Z­JHHpp"àLXb TqgΜQÆ UQQqÃquêÔI©©©%΀„@W¿~}EGGÿ`¡ÅbÑÈ‘# Jœ!`ÇÿÁ1‹Å¢Ç{Ì€4À™P&0pà@¹¸ü÷Ÿÿ®®®êÕ«—j×®m`*à (ðóóÓÃ?,777I’ÍfÓСC Nœ!`#FŒÕj•$U«VMýû÷78p„€IôîÝ[žžž’¤G}T^^^'΀‚0ëׯëÚµkjݺµ$©ÿþ*,,48p›Íf3:€_¦¢¢BÇŽSFF†öîÝ«#GŽ(''GÙÙÙÊÍÍUYYÙ-¯õõõUPP6l¨F),,L­[·VëÖ­àÀ§F  îA6›M™™™Ú°aƒ6lØ Í›7«¨¨HîîîjÖ¬™š6mZYúÊËËK>>>rqqÑÒ¥Kõøã«¢¢B—.]Ò¥K—*ËÄÓ§OëÀ:}ú´$)$$D=zôP÷îÝ«zõêüäàn£ î!{öìQrr²V¬X¡“'O*00P±±±êÞ½»Ú¶m«ððpU«VíGïqíÚ5yxxüè9ùùùÚ»w¯vîÜ©7jÛ¶mºzõªºvíª!C†hРA”…T„€“»r劖.]ª™3gjÿþýjÞ¼¹âããõØc)""Â!®]»¦/¿üRÉÉÉúðÃU\\¬þýûkÒ¤IŠŽŽvH`„€“*..Ößþö7½óÎ;ºr劆 ¦gžyFíÚµ34×µk×´fͽóÎ;Úºu«Z·n­W^yEýû÷—Åb14¸s„€“)++Ó¬Y³4mÚ4•——kêÔ©7nœüýýŽö{öìÑôéÓ•’’¢ÈÈH%%%)&&ÆèXà¸À¥¥¥©C‡úÓŸþ¤Ñ£Gëøñãzá…œ²”¤víÚ)99Y»wï–ŸŸŸbccõä“O*??ßèhà6QN ¼¼\øÃÔ¹sghÿþýJJJR­ZµŒŽv[Úµk§Ï?ÿ\«V­ÒÚµk¦O?ýÔèXà6P;sæŒzôè¡Y³fiîܹZ¿~½BBBŒŽõ³<úè£ÊÌÌÔC=¤~ýúéå—_VEE…ѱÀ`BÀ@ûöíS¯^½T³fM­\¹RáááFGºk,X  &èÁTrr²<==Žn‚‚0HZZš~øaEFF*99Y¾¾¾FGºë¾úê+õéÓGáááúè£äíímt$ðPØ·oŸºu릞={jéÒ¥ªV­šÑ‘ìæðáÃzðÁªÿûßUúY¸Q–­N:©k×®zÿý÷åêêjt$»;yò¤ºuë¦èèh-Y²D‹ÅèHà?(*//W§Näêêª7ªFFGr˜ôôtÅÄÄè•W^Ñïÿ{£ã€ÿà-Æ€M›6MyyyúøãMUJRÇŽµxñb½øâ‹Ú¿¿ÑqÀ0ƒpŒŒ EFFê£>ÒC=dtÃ<ñÄÚ·oŸÒÒÒL±¼gGA8H¯^½Ô A-\¸Ðè(†ºté’š5k¦×_]£F2:¦GA8ÀæÍ›Õ«W/9rDÁÁÁFÇ1ÜŒ3ôöÛoëðáüÕƒ±!௾úªFår°°°PþþþZ½zµ]¯ù%Ƨ+W®hñâÅÜ!`g§NÒ—_~©‘#G:d<777]¼xQEEEv½æ—ðôôTBB‚é—[à (;[¶l™7n¬N:9dÒ¶mÛÔµkW£ã€;Ä„€“»zõªþõ¯éwÞÑ×_­ÐÐP 2DƒVëÖ­–aÆ JNNÖG}¤ÒÒR 0@“'OVçÎ’Ø!pÙ»w¯’““•œœ¬cÇŽ©^½zŠULLŒÚµk§–-[ÊÓÓósþüyeddh×®]Ú¸q£¶oß®²²2=ðÀJHHРAƒäïïž‚¸GeffjãÆúòË/µiÓ&ÈÍÍMMš4QÓ¦M¬ÀÀ@5lØPÕ«W—ŸŸŸÜÝÝååå¥ÂÂB]¿~]%%%*((PNNŽNŸ>­ììleff*77W’Ô´iSÅÆÆª{÷îêÞ½» ~jp·QU€ÍfÓ‰'”‘‘¡ŒŒ }ûí·:}ú´rrr”››««W¯ÞòZ???5lØP5R£F¦Ö­[«uëÖª]»¶Ÿ‚0ŠŠ ]ºtIË—/×3Ï<£üü|¹ººÊ×××èhÀ`nF`...ªU«–jÖ¬)IªU«–Á‰€³`!`b.F` BÀÄ(£ LŒ‚01 BÀÄ(£ LdÙ²e²X,FÇN„‚01 BÀD‚ƒƒ5xð`£c'b±Ùl6£C03£ LŒ‚01 BÀÄ(£ LŒ‚0‘¬¬,¥¤¤8 BÀDRSSot àD(£ L¤k×®Z±b…Ñ1€±Øl6›Ñ!ƒ„€‰Q&FA˜!€;RTTdtpQ¸-3fÌP=äïïotpñc·tòäIýêW¿’$•——+88Xyyyâ¿TÌ L$55Uñññ·unvv¶FŽYù·›››|}}í ÄÍè'++K)))?yÞùóçÕ§O]»vÍ©€‘(:wîœþò—¿ÈÍÍMnnnÚ¶m›"""ôÊ+¯¨Aƒš={¶öîÝ«š5kjܸqz÷Ýwo¸þÀzâ‰'´gÏ5oÞ\ .T‡$IúÍo~£¸¸8]¾|YÓ§OWaa¡|||T\\¬·ÞzKYYY:räˆÊËË5cÆ EFFJ’JJJôꫯ*''GÁÁÁ:{ö¬BBB´téR«I“&IúnB///IÒk¯½&«Õª¤¤$UTThþüù•Ë’“’’TRRrñéÓ§ëÚµk:wîœ:uê¤éÓ§ëùçŸWË–-µråJyyyiݺuŠ‹‹Ó¬Y³4bÄ¥¥¥iÉ’%ªY³¦âââ´xñbùøø(!!A£GVxx¸4h`Ä×€i0ƒ€SûþÍÉü×û`!`"Ë–-ã­Àà„œZYY™¤Ÿ÷ÂðÓ( ÖàÁƒŽq[Ž=ª¤¤$8qB’4mÚ4]¾|ÙàTT=ìA˜3£ LŒ‚01 BÀÄ(£ LŒ‚0‘¬¬,¥¤¤8 BÀDRSSot àD(£ L¤k×®Z±b…Ñ1€±Øl6›Ñ!ƒ„€‰Q&æftöqñâE}ðÁ·u®¯¯¯ìœ8#ö ª¨ëׯ«Zµj’$ww÷=oôèÑš?¾£¢'Âc Šrwwט1cäîî®ëׯßòG’† bpZ`fUØæÍ›ó£çÔ©SGgÏž•«««cB§Â B  ëÖ­›êÕ«wËÏ«U«¦áÇS`b„@æââ¢aÆUîEø•••ièСNœ KŒ€*¾R‡núYPPNŸ>íàDÀ™0ƒ¨âÚ·o¯_ÿú×?8îîî®ÄÄDgBA˜@bb¢ÜÝÝo8výúu––fpøða5oÞ¼òo‹Å¢°°0íß¿ßÀTÀ0ƒ0fÍš©U«V²X,’$7771ÂàTÀP&‘˜˜(WWWIRyy¹ NœKŒ“ÈÉÉQ£Fd³Ù¥;w 8f&ѰaCµnÝZ’x{1¨Ä B  ÉÉÉÑ¡C‡”­ÜÜ\åååéܹs*--Õ•+Wôí·ßêÔ©SŠŽŽ–»»»üüüT½zuÕ¯__ 6Týúõ¢æÍ›ËÇÇÇèÇ@AܣΞ=«­[·jË–-úú믕™™©‚‚IRõêÕ¨ ¨^½zòðð···¬V«6oÞ¬ØØXIRAA®^½ª¼¼<åææêìÙ³²Z­²X,ºï¾ûÔ²eKEFF*::Z;vTõêÕ|d`„À=Âjµ*55U~ø¡>ÿüs:tHêØ±£Ú¶m«–-[*<<\-Z´PíÚµoyŸ“'OêW¿úÕM?«¨¨PVV–8 ÌÌLíß¿_iii•cEEEé‘GÑ€Ô¸qc;=)p$ BÀÉ¥§§kþüùZµj•.\¸ víÚ©_¿~Љ‰Qdd¤Cfõ}?[qݺuZ³fΜ9£V­ZéñÇרQ£T¯^=»göAA8¡«W¯jÑ¢EúÇ?þ¡¯¿þZmÚ´Qbb¢  ûî»ÏÐlÚ¹s§V®\©Å‹«¨¨Hýû÷ׄ mh6pç('RZZª¹sçê7ÞP~~¾†ª±cÇ*22Òèh7uíÚ5}ðÁz÷ÝwµuëVEGG륗^ªÜã8?£øÎ²eËÔ¸qcýñT||¼Ž;¦ùóç;m9(I:t¨¶lÙ¢­[·ÊÃÃC=zôP=tàÀ£ã€Û@AìСCêÞ½»FŒ¡¾}ûêĉzóÍ7Õ A££Ý‘®]»jíÚµÚ¶m› Õ¶m[ýþ÷¿×•+WŒŽ~!` wß}WíÛ·Wqq±vìØ¡¹sçªnݺFÇúE:wî¬ôôt͘1C ,P‡”‘‘at,p „€Š‹‹5pà@M˜0AS§NÕÎ;z)ñrqqÑøñãµoß>Õ¯__QQQš3gŽÑ±ÀM¸0›¼¼<õéÓGçϟצM›ÔµkW£#ÙMÆ µ~ýz½þúëš8q¢Ž;¦7ÞxC‹Åèhà?x‹1à@GU\\œ|}}õÙgŸ)((ÈèH³jÕ* >\ýû÷×ûï¿/WWW£#Q“““£˜˜jÍš5ªY³¦Ñ‘nûöíêÝ»· ¤ùóç3“'@A8@qq±ºté¢5jhݺuòññ1:’avîÜ©=zhòäÉš6mšÑq0=^R8ÀøñãUVV¦?þØÔå $ÝÿýZ±b…Þxã }üñÇFÇÀô˜AØÙ¢E‹4~üx¥§§+<<Üè8NãþçôÖ[o)##C 64:¦EAØQ~~¾š6mª×^{MãÆ3:ŽS±Z­zðÁåïï¯ääd£ã`Z„€M™2EiiiJMMå…7qôèQµlÙR›6mÒý÷ßotL‰‚°“ãÇ«E‹Ú´i“:uêt×î[XX¨ÐÐPÍ›7O ¸£k­V«Þ}÷]-Z´Hááá:s挢££%I¾¾¾Ú¶m›ÊÊÊ”’’rׯü)“'OÖîÝ»µmÛ¶»z_p{ÜŒTU³gÏV›6mîj9(Innnºxñ¢ŠŠŠîè:«Õª!C†hõêÕÚ°aƒbbbtùòeÍŸ?_“'OV^^ž^|ñE¥§§ßµ1oÇäÉ“ªÝ»w«C‡wýþàÇñcÀÊÊÊ´dÉ=ú®ßÛÛÛ[jÚ´é]7oÞ<}ðÁ9r¤bbb$}73Ð××W’ôÒK/iܸq ¹kcÞŽÅÆÆjÁ‚wýÞà§Qv°iÓ&]¸pAƒ¶Ëý#""qG×¼ÿþû’tCi¹nÝ:EFF*33Sk×®ÕóÏ?WǼ]ƒÖŠ+TQQa—û€[£ ì 55UaaaªU«–]î?eÊy{{K’Ö¬Y#}úé§?zÍåË—%I;v”$•––jãÆ Ó³Ï>«iÓ¦UÞó§Æ¼Ûºvíªüü|>^ß|ó¼½½Õ¶m[ýîw¿SII‰Ñ±îŠýû÷+66VcÆŒÑøñãµmÛ6…„„ Ü!` ûî»O[·nÕ;ï¼£÷Þ{OM›6Õ¼yóTVVft´Ÿ%''G&LPÛ¶muùòeíØ±Cýë_åîînt4p „€Á\\\4nÜ8>|XÔ„ Ô¤IÍ™3GW¯^5:Þm9uê”&L˜ Æë£>Òœ9s´sçNEFF ü‹Íf³Àegg+))I ,P54räH3FaaaFG»ÕjÕgŸ}¦¹sçêßÿþ·‚‚‚ôÜsÏiôèѪV­šÑñÀm¢ œÔ¹sç´hÑ"Í›7Oß~û­"##5pà@ 8PMš41$“ÕjÕæÍ›µzõj­ZµJgΜQÏž=5vìX=òÈ#,%àDA89›Í¦Í›7kåÊ•Z½zµrssÕ¢E ÅÄĨk×®ŠŽŽVÆ í2¶ÕjÕÞ½{µuëVmÙ²E›6mR~~¾:tè jÈ!úõ¯m—±€cP÷›Í¦]»véóÏ?×–-[´sçN•––Êßß_­ZµRXX˜Z´h¡F©Aƒ T½zõäêêzË{–––*77WyyyÊÍÍÕÉ“'µÿ~effêàÁƒ*--Uƒ Ô­[7EGG«oß¾ vàS{¢ îaׯ_Wzzº¾ùæíÛ·O™™™:pà€.^¼xÃynnnòññ‘‹‹‹\]]uýúuUTTèÒ¥K7œçêêªàà`…‡‡+<<\ŠŒŒ4lI3°? B  *--U^^žòòòtöìY]½zU%%%Ú¾}»-Z¤üã²X,òóóSõêÕ+gÖ­[÷Gg€ª‡‚0‘eË–iذaâ¿à{.Fà8]»vÕŠ+ŒŽœ3c!`b„€‰Q&FA˜!`b„€‰Q&’ššªøøx£c'BA˜HVV–RRRŒŽœ!`b›Íf3:c0ƒ01 BÀÄ(£ LŒ‚01 BÀÄ(£ LdÙ²e²X,FÇN„‚01 BÀD‚ƒƒ5xð`£c'b±Ùl6£C03£ LŒ‚01 BÀÄ(£ LêäÉ“FGN€‚0‘¬¬,¥¤¤(;;[#GŽ4:pnFà8©©©6l˜Zµj¥k×®8f&´wï^9sFãÆ«<–™™©~ýú饗^ÒSO=¥:hÛ¶m•ŸWTTèõ×_דO>©I“&ÉËËK‹¥òÜ›,6›ÍftŽ‘••¥]»v)>>^Íš5Ó¡C‡*? –§§§Ž9"›Í¦   U¯^]G•$%%%éå—_VQQ‘<<<4þ|3FÇ×’%KŒz$ð ±Ä0‘àà`ßô³ &ÈÓÓS’d³Ùäéé©ãÇW~¾víZUTTÈÕÕUnÅè_ IDAT’4hÐ 3Fß|óýƒ»¡  IzöÙgUXX¨3fÈÅÅE×®]Óÿ^pÔ¥KmÚ´Ik×®UŸ>}T^^.IŠ‹‹3*2¸ Xb ˜ÅbùÁã7*!!A+V¬PLLŒš7o®Ã‡W–„åååJJJÒÛo¿­±cÇêøñãjÚ´©þô§?©ZµjF= ø…˜A˜Ôÿ+ðÄOÈÛÛ[1117ý¼¢¢B—.]ÒîÝ»uß}÷9*&°3 BÀ„üýýuöìYåææ*00P’TXX¨²²2effjß¾}ºxñ¢$éÛo¿•fΜ©O?ýT­ZµR@@€|}}U³fM…††ÊÃÃÃÈÇ¿€‹Ñ8Þk¯½&«Õª¤¤¤ÊcÓ§O—»»»úõë'???Mœ8Qnnn5j”,‹¢¢¢tîÜ99R?ü°ºt颖-[* @ÿüç? |ðK°!€Ÿd³Ùô÷¿ÿ]6›M“&Mª|¸ªU«&I*++SBB‚Á©€3°Øl6›Ñ!Ø_ZZ𢢢$IÊÎΖÅb1803“èØ±£‚‚‚$I#Gޤ’$6ª ŠŠ ={VÙÙÙÊÍÍÕ•+WT\\¬ºuë*;;[ÕªUÓüùóU³fMùúú*((HAAAªY³¦ÑÑ€ƒ±Ä¸Ç•””(55U{öìQFF†öîÝ«cÇŽÝðÖbWWWùúúÊb±¨¸¸XÞÞÞ²Z­***ºá^>>> WëÖ­ÕªU+EEE©mÛ¶rqañU!pJOO×'Ÿ|¢ 6h×®]ª¨¨PHHˆÚ´i£ˆˆ5iÒD5RÆ (Êk?þøc=òÈ#•_ºtIÙÙÙÊÉÉÑéÓ§•™™©½{÷*##C.\PíÚµ£=zhÀ€jР ì„‚¸G8p@K–,Qrr²Nœ8¡æÍ›ëÁT=m—åÁÇŽÓ† ôå—_ê‹/¾Paa¡xàÅÇÇkèС,I    œXEE…>ûì3Íœ9SëׯWãÆõøãkðàÁŠˆˆph–ëׯkݺuJNNÖ‡~¨ŠŠ %&&jâĉjÖ¬™C³€»‡‚pRk֬џþô'8p@½zõÒ¤I“Ô«W/§xûpQQ‘.\¨¿ÿýï:vì˜ôç?ÿY¡¡¡FGwˆ†'³gÏuîÜY>ú¨š5k¦}ûöé³Ï>ÓC=äå $ùúújÒ¤I:tè’““µgÏ………i„ ºté’ÑñÀ  œDii©þð‡?(**JnnnJKKÓÊ•+ft´[rqqÑc=¦}ûöiΜ9JIIQxx¸>üðC££€ÛÄcÀ :tH=ö˜NŸ>­¤¤$7Îif Þ‰üü|ýö·¿Õ¢E‹ôä“OjÖ¬Yòôô4:øÌ  –’’¢ÈÈHùùùiÿþý?~ü=YJRíÚµµpáB}üñÇúè£Ô¹sg8qÂèXàGPzûí·5dÈ=ùä“úòË/Õ¨Q#£#Ý}ûöUzzº,‹:wî¬}ûö Ü!`iӦ鷿ý­fÏž­3fÈÝÝÝèHwÕ¯ýk¥¦¦ªmÛ¶Š‰‰ÑîÝ»Žn‚=Ì™3G“&MÒÂ… 5lØ0£ãØUYY™† ¦Í›7kûöí 5:ø_(ûøã5`ÀÍ›7O£F2:ŽCX­Võë×OGŽÑöíÛ`t$ð„€åææªeË–zúé§õúë¯Ç¡.]º¤nݺ©qãÆZ½zµÑqÀPb³ÙÔ§O•——kíÚµ÷웊‰'N¨U«Vúûßÿ®ÄÄD£ãñ’ÀaÞÿ}mß¾] ,0e9(}÷â’7ß|SS¦LÑùóçŽÄ BÀ!ÊÊÊÔ¬Y3ýæ7¿ÑäÉ“Žc(›Í¦Î;«sçÎúÛßþftL„€Ì›7OV«UO?ý´ÑQ g±Xô—¿üEsæÌQNNŽÑq0= BÀÎl6›þö·¿iâĉòôô´ûx………ò÷÷¿£üœk~‰¸¸85kÖL³fÍrÈxàÖ(;Û¶m›N:¥ÇÜ!ã¹¹¹éâÅ‹***²ë5¿Ôˆ#´téRUTT8lLðC„€-_¾\ݺuSPPCÆóööV`` š6mj×k~©Ç\YYYÚ¾}»ÃÆ?DAØYjjªºtéâÐ1#""a÷k~‰ (44T[¶lqؘà‡(;*))Ñþýûuÿý÷;tÜ)S¦ÈÛÛ[’´fÍùøøèÓO?½ík%22R»wïvè˜àF„€;vLV«Õ¡Kw%©W¯^•¿«¤¤DÅÅÅ·}£4kÖLtø¸à¿ÜŒTeùùù’¤Úµk–aذa0`€jÔ¨aX†[©]»våwŒÁ BÀŽ %IµjÕ24‡3–ƒÒwß !Æ¢ ìÈËËK’tùòeƒ“8§+W®8|ßCp# BÀ޾Ÿ9xñâECs\¹rÅÐñoåÂ… ªS§ŽÑ105 BÀŽ‚‚‚$I¹¹¹†eX¾|¹¼½½•’’bX†[9}útåwŒAAØQƒ ¤´´4Ã2Ô¨QC^^^òðð0,í|õÕWêØ±£Ñ105‹Íf³¨Êâããe³ÙœrŸ‘®^½ªš5kjÙ²e4hÑq0-fvöè£ê‹/¾pÚ}òÉ'ŸÈÝÝ]>ø ÑQ€ÿÏÞ}‡EyæÝ?TQ@Aš…bE£±@ÄF”Þ˜MÁ4ͦo’-IÞd£yÝÍ&Ù”I\uÕD33‘*° h ‚ "R•&u`˜ßù1¯Ä®ÀC9Ÿëš œ™c"<3g¾Ï} j,‰ˆˆˆˆzX`` T*bcc…ŽÒ§üôÓO °aÄŽBDD4¨ñc""""¢^ðÊ+¯ 77iiiBG銊Šàè舔”Ì›7Oè8DDDƒ'‰ˆˆˆˆzAhh(233±ÿ~¡£ô ëÖ­ƒ»»;ËA""¢>€„DDDDD= ¥¥éé鈋‹Cbb".]º„éÓ§Ž?]]] çÔ©S˜;w.Ž=ŠÇ\è8DDDƒ'‰ˆˆˆˆºIii)6oÞŒÀÀ@˜™™ÁÃÃééé‰D8rä<ˆêêj¬[·N訂iiiÁ3Ï<ƒ… ¢ººJ¥RèHDDDƒž¶Ðˆˆˆˆˆú«ŽŽœ8q ˆGVVôõõ±hÑ"|öÙgðññM—ïùïÿ ///,Z´‹/(¹pÞ|óM´´´@OO077Gpp0$ \]]¡©É""¢ÞÆSŒ‰ˆˆˆˆ@]]öíÛ‡ÄÄD$$$ ªª vvvðöö†¯¯/-Z}}ý»ÞÇŸÿüglܸ‡ÂÔ©S{)¹ð>ýôS|øá‡8tèfÏž‹/B&“A&“!77ÖÖÖ‰D‰D˜5k–Ðq‰ˆˆ „DDDDD÷pþüyÄÇÇ#11‡BGG\\\àããŸ.ù”J%–-[†ììl> Ø·o0uêTx{{ÃÇÇ®®®ÐÒÒêÖÇlllDPPN:…ˆˆ,Z´¨[ï_høðñ~ýz|ýõ×xíµ×èûU*Ž9™L†ÈÈHTTTàñLJX,Fhhè-ë;ÑÃaAHDDDDƒÖ¹sç‡ØØX;v ºººX´h|}}áíí1cÆôx…BU«VA.—cýúõx÷Ýw¡¡¡ÑãÛÓ®_¿Ž•+W"-- ›6mÂSO=õH÷§T*‘’’¹\Ž]»v¡®®®®®H$X¾|9FÕMɉˆˆ„DDDD4h´··ãСCˆGLL .^¼333øúúÂ××:t¨ Ù¾ùæ¼óÎ;X¼x1¾ÿþû~½.all,^yåèèè ** =öX·Þ¿B¡ÀÞ½{!“É‹ææf,^¼b±AAA011éÖÇ#""èXÑ€VWW‡½{÷"&&III¨®®ÆÄ‰áïï???Ì;·ÛO~X™™™xî¹çPTT„uëÖá•W^®®®Ð±îÛÕ«WñÖ[o!""O?ý4¾úê«G^«ñ^š››™L†ÄÄD¨T*,]ºb±þþþ044ìÑÇ'""XÑ€STT¤>u8==˜7o|}}Ч7ºP(øûßÿŽõë×ÃÆÆü1$ 455…ŽvGׯ_ǧŸ~Šo¿ýøþûïáááÑë9êëë±{÷nÈårìß¿ÚÚÚðóóƒX,†——ôôôz=QÀ‚ˆˆˆˆú=•J…'N 66ñññÈÎÎÆðáÃáéé ???x{{÷ø$[w+..Æ;#¨¨(Lš4 ¯¿þ:V®\ ¡£©â»ï¾Ã¦M› ­­çž{Ÿ|òIŸ˜z¼~ý:¢¢¢ “É––CCCB,ãÉ'Ÿ„ŽŽŽÐ‰ˆˆú „DDDDÔ/577#99qqqˆ‹‹CYYìììàçç,X° OU«¤¤‹-ÂÈ‘#1eÊìܹXµjV¬X3f’«µµ‰‰‰øñÇ+++üáÀÙ³g±gÏì߿ӧO$Û”••!""2™ ÇŽÈ# ‘H„ ôééL""¢ÞÀ‚ˆˆˆˆúÊÊJÄÅÅ!>>ûöíCss3fΜ‰€€øúúö¹bêa•••aáÂ…ÐÑÑÁÁƒaff†k×®aóæÍؾ};òòòààà€ÐÐP,Y²sçÎÅ!Cz,OUURSSÕ›»444àÉ'ŸÄ‹/¾ˆÀÀ@hii¡©© ~~~8sæLŸ, ;A.—C&“!++ ––– …X,Æœ9sÄÒDDDŠ!õigÏžE\\bbb™™ ]]]¸»»Ãßß¾¾¾=z´Ð»UUU.\¥R‰ÔÔTXXXÜò5YYYË刋‹ÃÙ³ga``€¹sçâñÇÇ´iÓàìì ‡‡* kkk‘››‹3gÎ ++ ÇGNN† ‚'žxË—/Gpp0ÌÌÌnùÞþRv:þŸˆˆ¨;± $"""¢>¥££ÇŽÃîÝ» .ÀÜܾ¾¾ðõõ…‡‡GŸZ‡¯;UUUÁÝÝÍÍÍHMM…••Õ=¿§¢¢)))HMMÅ©S§››‹––À¨Q£0zôhX[[COO&&&ÐÒÒ‚¡¡!êêê P(ÐØØˆêêj”––¢¸¸333L›6 sæÌÁâÅ‹áêêz_›|ô·’°Sgé*“ÉPTT„ &@,C$a„ BÇ#""êQ,‰ˆˆˆHp …)))ˆŽŽFll,ÊËËaoo   ÀÅÅeÀ¯W]] wwwÔ××#-- ÖÖÖu?ííí(((@~~>Š‹‹QZZŠ«W¯¢¹¹µµµhkkCcc#Œ¡££CCC˜˜˜ÀÊÊ 666°¶¶ÆäÉ“i2³¿–„ÀoÞ?~2™ ááá(++ÃôéÓÕeá˜1c„ŽHDDÔíX‘ êëë‘””„èèh$&&âÆ˜1c‚‚‚ˆÉ“' ±×ÔÕÕÁÝÝUUUHOO‡Ð‘Y. ;utt ==2™ ‘‘‘¨®®ÆÜ¹s!‰ KKK¡#u „DDDDÔk***ƒÝ»w#%%J¥óçÏG`` `kk+tÄ^WWWOOO””” 55ãÇ:R·%a§¶¶68p2™ 111¸qã,X±XŒåË—cäÈ‘BG$""zh,‰ˆˆˆ¨G]¼xÑÑÑØ½{7222 §§‡¥K—"00¾¾¾ƒºXihh€§§' ‘šš ¡#u»TvjiiÁž={ “ɇööv<ùä“‹Å ÄðáÃ…ŽHDDô@XQ·;uêvïÞÝ»w#''#FŒ€¯¯/ô&#¢©© ^^^8þ§µµ8yò$’““1mÚ4¡#õšÁPvª¬¬Ddd$¤R)Ž= ###A,cñâÅÐÒÒ:"‘ B""""ºo555HHHÀîÝ»‘””„ææfÌž=[] ::: ±OS( ÂÑ£G‘œœŒ3f©× ¦’°Ó•+W¹\Ž'N`Ô¨Q†X,†««+455…ŽHDDƒ B""""º««W¯"&&ÑÑÑHKKƒ††.\ˆ   øûûcôèÑBGì BCC‘ššŠýû÷cÖ¬YBGÌ`, ;]¸pr¹R©gÏž… BCC!‹ñøã ˆˆ)„DDDDt‹ââbDFF"** ÇŽƒ¼¼¼ooo ±_Q*•‰DØ·oöîÝ ¡# n0—„Ξ= ©T ¹\Ž .ÀÁÁ¡¡¡H$ƒb]J""ê;XàÒ¥KˆŒŒDdd$Nž< ###`Ù²eXºt)ôôô„ŽØ/)•J¬\¹qqqHJJ‚›››Ð‘ú –„ÿçĉËåÇ•+W0eʈÅbˆÅbŒ?^èxDD4À± $"""ÄΟ?¨¨(DFFâôéÓ9r$ wwwîºúˆ”J%V­Z…¨¨($$$`áÂ…BGêsXvÕÑÑ£GB*•"22•••˜5kD"D"¬­­…ŽHDD B"""¢A&77»víBDDrssannŽÀÀ@„„„`áÂ…ÐÖÖ:†R©ðüóÏC&“!&&K–,:RŸÅ’ðö”J%RRR “ɺº:<ñċŠ™™™Ð‰ˆh€`AHDDD4dee!** QQQøõ×_1zôh!88nnnÐÒÒ:•R©°zõjlß¾111ðôô:RŸÇ’ðîZ[[±oß>H¥RÄÅÅ¡¹¹îîî‰DX¶l×%"¢G‚ˆˆˆh€:yò¤z£‘ .ÀÆÆË—/ÇòåËáêê MMM¡#H*• ¯¿þ:þóŸÿ 22~~~BGê7XÞŸææfÄÅÅA&“aÏž=P©Tððð€X,†ŸŸ …ŽHDDý B"""¢B¥RáøñãêR°¨¨cÆŒAHH–-[†9sæ@CCCè˜Þ[o½… 6@&“aÙ²eBÇéwX>˜ºº:ÄÄÄ@*•"99:::ðóóƒH$‚··7† "tD""êXõc8räˆúôá’’ØÛÛ#$$Ë—/ÇÌ™3…Ž8¨¼ûî»øòË/!•J"tœ~‹%áù~ý:"##!“ÉžžŽaÆ! ‰îîîÐÑÑ:"õQ,‰ˆˆˆú¥R‰´´4ìÚµ QQQ(//ÇĉŒàà`8;; qPúàƒðü?þø#V¬X!tœ~%á£)--EDDd2Ž?Ž‘#G"88"‘óçÏçDDÔ B"""¢~ ½½)))ˆŠŠBtt4ªªªàììŒåË—#88“&M:â öñÇcíڵزe „Ž3`°$ìEEEÉdÉdÈÎÎÆèÑ£‰D‚Ù³gsé""bAHDDDÔW) 8p‘‘‘ˆÅõë×1sæLõF#ŽŽŽBG$ëׯÇ|€M›6áùçŸ:΀Ò°{ååå©ËÂóçÏcìØ±‰D‹Å˜6mšÐñˆˆH ,‰ˆˆˆúööv$''#<<»víB]]fÍš…àà`,_¾ãÆ:"ÝäóÏ?ÇŸþô'|÷ÝwX³fÐq,–„=#++ R©ááá(**Âĉ!‰ ‘HøÑ ˆˆˆH`HOOGxx8"##QUU…Ù³g#44!!!°µµ:"ÝÆ×_7ß|_}õÞxã ¡ã x, {ŽJ¥Â±cÇ “ɲ²2LŸ>‰"‘vvvBG$"¢Æ‚ˆˆˆH/Èår9"""PZZŠiÓ¦!44b±˜“‚}Üwß}‡W_}Ÿþ9þøÇ? gÐ`IØó”J%ÒÓÓ!“É…êêj¸¸¸@$!$$–––BG$"¢À‚ˆˆˆ¨:u r¹\}Jß„  ‰ ‰0qâD¡ãÑ}ؼy3^zé%¬_¿ùË_„Ž3è°$ì=mmmØ¿?d2bbbÐØØˆùóçC"‘`ùòå1b„Љˆ¨›° $"""êagÏž…\.‡\.G~~¾zS‘HÄr£ŸÙºu+žþyüíoÇ~(tœA‹%aïkiiAbb"d2âããÑÞÞŽ%K–@,# Ç:"=„DDDD=   áááËåÈÉɵµ5BBB ‰0gΡãÑCرcÂÂÂðÞ{ïaíÚµBÇôX §¡¡111Ë娻w/´´´àåå‰Dèëë ‘ˆˆ B"""¢nrùòeDDD@.—ãäÉ“5j–/_‘H„yóæASSSèˆôÂÃñbÅ ¼ýöÛøôÓO…ŽCÿKBáÕÔÔ`×®]Éd8xð ôõõ±XŒ¥K—BWWWèˆDDtX=‚²²2DDD <<G…‰‰ ‚‚‚ ‰°xñbhii ‘QTTÄb1^{í5üë_ÿ:ýK¾£¢¢‘‘‘Éd8räˆú÷¡X,Æ¢E‹øûˆ¨cAHDDDô€®]»†¨¨(Èår¤§§cèС@hh('f˜˜˜„††bÍš5øê«¯ ¡¡!t$º –„}Ï•+WÔk¯vNT‡„„@,ÃÕÕA£M IDAT•?KDD} B"""¢ûPWW‡èèhÈår$''C[[~~~‰Dðòòz 5·”J%æÍ›‡ƒBOO¯Sӣسg†~øá–B£±±ß}÷är9:::`ll •J'''L˜0W¯^ÅgŸ}&PúÁ§§JÂÇcß¾}X·nàùçŸG@@üüüºåþƒ‚‚ÈårÈd2œ={666‰D‹Å˜9s¦ÐñºMFF¶oߎ7>øààñÇ8ѽ± $"""ºƒ††ÄÅÅA.—#)) àéé ‘H???>ÔýîÞ½AAAØ´i^xá…îŒLÝdß¾}ÀŠ+°yóæ[ÊÁ .ÀËË –––ؼy3*• ±±±xá…àïï-[¶ÐêÉI±cÇ¢¨¨---2dH·Ýï`“““£. /^¼ˆÅbˆD"Lž ‘H„ÀÀ@?òcøûû#++ †††ÈÍÍåæ%}LJJ üüü‚-[¶Ü²nZSS¦OŸmmmœ>>HLLDiiiý;¡G×Ý“„œ ìJ¥@xx8víÚ…úúzÌ›7b±ÁÁÁ033:â=q‚ˆú#NÑ RPP€?þNNN˜3g’’’ðòË/£  xýõ×{¬ô©ªª‚R©„­­- ñòË/#%%…N}@ff&¼½½±dÉìܹóŽå \¾|`nnÞ[ñè!p’°ÒÒÒ‚‡‡¶lÙ‚òòrDGGÃÊÊ ï¾û.,--áé鉭[·¢¶¶Vè¨DD B"""ð***ðÍ7ß`îܹpttÄÆáííÌÌLäååáÃ?„½½}çøÏþƒW_}Uýç×^{ C† Á_|ÑãMwöË/¿`éÒ¥˜7od2tuuïúõÚÚÚ€æææÞˆG€%aÿ6dÈøûûãçŸFee%vîÜ ¼üò˰°°@`` ¤R)…ŽJDÔïñc"""Ÿþû÷ïÇСC„+VÀÝÝý®b=A¡P`̘1(++»å6mmmÂÚÚºW3••wwwÌž=111÷, $$‘‘‘øå—_0cÆŒ^HIª;N7æ)Æ}G]]vïÞ ™L†äädèèèÀßß"‘^^^‚ï2ÍSŒ‰¨?â! mmmˆÇŠ+`nnŽ^xºººØ¹s'ÊË˱mÛ6,]º´×ËAˆˆˆÀÛo¿ •JÕå²cÇ´··ãßÿþw¯gìrss±téR̘1QQQ÷U€¯¯/ ..®'ãQ7â$áÀbdd„gŸ}{öìAii)¾øâ ”——cùòå5j””„ööv¡£ÞVUUÚÚÚ„ŽADÔ'‰ˆˆ¨_S©TÈÈÈÀÎ;!—ËQ]]yóæaÅŠ ň#„Ž•JÄÆÆÞ²n]kk+lll P(påÊ 6L ”ƒË¯¿þŠ… bâĉHLL¼ãnÄ·ÓÚÚŠ©S§âÚµk8sæÌm'?;::±XܱéuNfggãÀ4IÈ Â¾ïêÕ«ˆŒŒ„T*Eff&Fމ„††bþüùÐÔìùù•JMMÍ;NªT*¼ð شiS¯ä!"º_üDDDDýÒÅ‹±víZ888à‰'ž@zz:Þ~ûm!==kÖ¬éå ÄÇÇcøðá·ÝÔbÈ! E]]6nÜ(@ºÁ'??îîîpttDBB•ƒÀoÿÏbbb`hh777$%%¡££Ào/þOœ8•+Wò”ñ>¨s’pÚ´ixòÉ'h’°¥¥¥ËGê{¬¬¬ðÆoàØ±c¸xñ"þøÇ?âèÑ£X´hlmmñÖ[oáøñã=š¡smÒÛM/677ãý÷ßGkk+ËA"ês8AHDDDýF]]är9~úé'9r£F‚D"Á3Ï<óPkŠõ†˜˜¼üòËÐÒÒÂ'Ÿ|‚gŸ}¶Ëí øôÓOqèÐ!â³Ï>Ú5kJ;ð]¸p‹-‚ ’’’0|øð‡¾¯ºº:|õÕWˆŒŒDUU,,,`bbWWW¼ùæ›033ëÆäÔd’ðÈ‘#HJJÂ'Ÿ| òeËàçç×[qéýúë¯ËåJ¥ÈÏÏÇØ±c!‹!‰0mÚ´n{œŒŒ lÙ²[¶lÌœ9ÆÆÆ~[777ضmÛ-Ç""¡± $""¢>­­­ {÷îÅ?þˆØØXhjj"00O=õ<<<YOú§¢¢"ÌŸ?£FÂ`dd$t$УœnLý×éÓ§!“É —ËqùòeLš4 "‘b±ŽŽŽBÇ#" B"""ê“~ùåüøãÉd¨ªª‚›››zr‡Å=¨+W®`Á‚066Frr2LLL„ŽD}KÂÁ«sýZ¹\Žððp”——ã±ÇƒD"Ahh(ìì섎HDÔ«XQŸqåÊüüóÏøñÇqîÜ9899aåÊ•xöÙgakk+t<ê§JJJ°hÑ" 99¦¦¦BG¢>„%!)•J¤§§C*•"** 555puuEhh(BCCaaa!tD"¢Ç‚ˆˆˆÕÐЀ¨¨(ìØ±)))066ÆŠ+°bÅ ¸¸¸ú¹²²2,\¸:::8xð פÛbIHÚÚÚ°oß>ÈårÄÄÄ ±± ,€D"Á²eËúÌæWDDÝ!õºŽŽ¤¤¤`Û¶mˆ‰‰A[[¼¼¼OOO 2Dèˆ4TUUaáÂ…P*•HMMåÝKBú½ææfìÙ³R© hooÇÒ¥K!‰ˆaÆ ‘ˆ¨Û° $""¢^sñâElß¾Û·oGqq1\\\°råJH$NeP·ªªª‚»»;š››‘šš +++¡#Q?À’îäÆˆ…L&þ}û ¥¥oooˆÅbøøø@___èˆDD„!õ¨††DFFbëÖ­8tè,--ñôÓO#,, &L: @ÕÕÕpwwG}}=ÒÒÒ`mm-t$êGšššàïשּׁ,–„t[ÕÕÕˆŽŽ†T*Ejj* ‘Hèèè‘ˆè± $""¢n§R©pèÐ!lݺ‘‘‘hkkƒ¿¿?ÂÂÂàáá---¡#ÒUWWwww\»v iii܉” KBº_ˆˆˆ€L&ÃÑ£Gabb‚   H$,\¸Ç;"ê7XQ·)..VŸB|ñâEÌœ9aaaX±bO!¦WWW\½z©©©?~¼Ð‘¨cIHª¸¸áááÉdøå—_`aaˆD"¸ººBCCCèˆDDwÄ‚ˆˆˆIss3víÚ…mÛ¶!%%¦¦¦X¹r%V­Z…©S§ ‰††xzz¢°°©©©ppp: , éa@&“A&“áܹs°µµEhh($ f̘!t<"¢[° $""¢‡’‘‘mÛ¶A.—£©© ÞÞÞXµj¼½½¹þõª¦¦&xyyáüùóHMMåÚ–Ô­XÒ£ÊÉÉQ—…—.]‚££#D"Äb1&Mš$t<"",‰ˆˆè”––â§Ÿ~¶mÛ——‡©S§bÕªUX¹r%ÌÍÍ…ŽGƒP箳9998xð &Ož,t$€XRwÉÌÌ„L&CDDJJJàìì ‘H‰D‚±cÇ ˆ1„DDDtWJ¥{öìÁæÍ›‘˜˜ˆaÆaÅŠ ÃÌ™3…ŽGƒXkk+pòäI$''cÚ´iBG¢Œ%!u§ŽŽ>|2™ ‘‘‘¨ªªÂìÙ³!‹ +++¡#Ñ Ã‚ˆˆˆnëâŋغu+¶nÝŠ²²2,^¼«V­Bpp0† "t<ä ‚‚‚pôèQ$''sM/ê, ©'´··#99r¹ÑÑѨ¯¯‡››D"‚ƒƒaff&tD"X‘Zkk+¢¢¢°uëV$''ÃÒÒaaaxî¹ç¸#,õ …¡¡¡HMMÅþýû1kÖ,¡#Ñ Â’zRkk+’’’ “ɇÖÖV¸»»C,#((FFFBG$¢Š!!''›7oÆŽ;P__<ÿüóðöö†–––ÐñˆÔÚÛÛ!‹±ÿ~$%%ÁÅÅEèH4±$¤ÞÐÔÔ„ØØXÈårìÙ³àéé ±X ??? :Tà„D4° $""¤nܸ©TŠ-[¶ 33ãÇÇ‹/¾ˆ§Ÿ~£G:Ñ-”J%V®\‰¸¸8$%%ÁÍÍMèH4ˆ±$¤ÞTWW‡èèhÈår8pC† ŸŸÄb1<==¹ô=2„DDDƒLFFþóŸÿ 22J¥Ë–-à /¼€ @CCCèxD·¥T*±jÕ*DEE!!! .:KBĵk×¹\ŽC‡aذa ‚X,†»»;´µµ…ŽHDý B""¢A ººÛ·oǦM›ð믿bÚ´ixá…°råJ˜˜˜è®T*žþyÈd2ÄÄÄ`É’%BG"RcIHBºzõ*""" “Épüøq˜™™!88"‘nnnÐÔÔ:"õ,‰ˆˆ°#GŽ`ãÆˆˆˆ€®®.$ ^|ñEÌœ9SèhD]( èêêÞr½J¥ÂK/½„ü111ðôô Ñݱ$¤¾ °°R©áááÈÎΆ••BCC!‹1{öl¡ãQÇ‚ˆˆh€©««ÃŽ;ðÃ? 773fÌÀš5k ‘H`hh(t<¢[äååáé§ŸÆÞ½{1bÄõõ*• ¯¿þºú”x???SÝKBêKÎ;¹\™L†üü|Œ;‰"‘ÎÎÎBÇ#¢>ˆ!ÑqâÄ lܸ2™ ‹±zõjÌš5KàdDw†íÛ·còäÉ8xð ÌÌÌo½õ6lØ™L†eË– œ’èÞî§$üꫯ°zõjèëë £S§NA&“!<<—/_ƤI“ ‹!‰àèè(t<"ê#Xõc J¥øá‡pêÔ)L™2kÖ¬ÁSO=###¡ãÝSII ÆŽ‹öövèèè`ܸqHKKÃ矎/¿üR©!!!BÇ$ºow+ ?úè#¬]»ß|ó ^{í5SÒ`¤R©pôèQÈårDDD ¼¼3fÌ€D"Ahh(lmm…ŽHDbAHDDÔeggcãÆØ¹s' BBB°zõj<ñÄBG#z o¿ý6þý­  ££SSSTVVâ§Ÿ~‚D"8!у»]IØY€¥¥%ŠŠŠn»î&QoP*•HKKƒT*Å®]»PSSWWWˆD"„„„ÀÂÂBèˆDÔËXõÍÍÍÇ?ü€cÇŽÁÉÉ /½ôº¬ÛFÔ_ÔÔÔÀÊÊ ÍÍÍ]®×ÑÑ™™233aee%P:¢GssI(‘Hðí·ßªoÓÔÔĦM›ðÜsÏ ˜è7mmmØ»w/ÂÃñ{÷n455aáÂ…H$ âs ¢A‚!Q———‡ï¿ÿ?ýô„Õ«WcáÂ…ÐÐÐ:ÑC[·nÖ®]‹ööö[nÓÑÑ……>ÌÓÞ¨ßjjj‚··7ÒÒÒº\¯©© [[[\¸pZZZ¥#ºUss3!•J‘˜˜ˆöövxxx@$! Æ :"õ„DDD}R©D\\6lØ€äädŒ;/¾ø"ž{î9˜›› è‘577côèѨ­­½ã×èèè`ĈÈÈÈÀرc{1Q÷¸ù´âßÓÐÐÀÎ;y=õY7nÜ@LL är9öîÝ mmmøøø@,ÃÛÛû7Ú9}ú4Œùûœ¨Ò:ýŸk×®áÿøìíí±|ùrèèè >>øË_þÂrŒÍ›7ãÆwýš¶¶6TTT ­­­½”Œ¨{Ü­~+×®] ÎkP_5lØ0<õÔSˆ‹‹Cyy9¾ùæÔÔÔ@$aÔ¨Qxæ™g ^Cö^Ö­[‡qãÆ!))©‡“ÑÃà!QpòäIlذ2™ zzzxî¹ç°fÍ888¨Ûµ··ÃÎÎeeew,G´µµ¡¯¯wÞyo¼ñwå¦~å^åàÍvïÞ€€€NDÔ}ÊË˹\Ž£GÂÄÄË–-ƒD"Á‚ n{Ú¼B¡ÀСCÑÞÞMMM¬]»ï½÷—J!êC8AHDD$ÖÖVìØ±...˜5kN:…¯¾ú %%%øâ‹/XÒ€%•JïXjkkÃÈÈ}ôJJJðᇲ¤~¥½½¦¦¦~[kðnˆ––þö·¿õR2¢îaaa×^{ ‡Faa!þú׿âÔ©Spww‡µµ5Þxã 9r¤Ëïø¤¤$(•J@GG>üðCøûû£¾¾^¨¿ý'‰ˆˆzYII ~øálÞ¼ÕÕÕ Ä+¯¼‚ ¨Ç©T*Lœ8èèèP_¯¥¥…aÆáOú^}õU.„OýžB¡À?þˆuëÖ¡¤¤ºü›¿Ù¾}û°dÉ’ÞŒGÔíòóó!“É —ËqîÜ9ØÙÙ!44‰ÿüç?ÕåtdØØØ ..“&M09,‰ˆˆzMjj*6l؀ݻwÃÔÔ/¼ðÖ¬Y+++¡£õš¸¸8øûû«ÿ¬¥¥…áÇã¯ý+^~ùe ˜Ž¨ûµ··C.—cíÚµ(((€¦¦¦z’ ømjvöìÙ8r䈀)‰º×™3g —Ë!•JQXXmmíÛîX¯­­ üôÓOX¾|¹I‰¨ B""¢téÒ%œ;w¾¾¾÷üÚ††ìܹß~û-rssáââ‚W_}ÁÁÁÐÕÕí…´4ØÕÔÔÜõöææf´´´Ür}gqw7†††ÐÑÑy <ãÆCaa!´´´`dd„÷Þ{kÖ¬ÁСCè~ˆú›ŽŽìÞ½ü1Μ9--­.Eá¡C‡0oÞ¼n{¼›ö›ššÐÚÚ «¯:t(EÔãÖ¯_>øàŽkÎvž†ÿî»ïbýúõ·]Ãp ¹Ó±·Scc# Å-×ßë¸;|øðAñßz B""¢P\\ ;;;¿í°ª­­}Û¯+--Å7ß|ƒ7¢µµb±¯¾ú*f̘ћqI uuuP(¸qã†úI~MM  ÑÐÐ…BÚÚZ´¶¶¢©© 7nÜ@{{;êëë¡T*¡R©P[[ à· ¤Î[ZZÐÜÜ à·ºót­ÚÚÚ>³êÍ/`Œ¡¡¡---hjjâüùóGGGXYYaøðáê‚ÂÈÈššš022‚®®.†  2&&&ÐÕÕÅСCahh]]]cÈ!000À°aÃîøóHÔ×ìÙ³k׮űcÇÔ“UK—.ÅÞ½{oùÚªª*”——ãÊ•+(//ÇÕ«WQQQªª*ÔÕÕu¹ÔÖÖ¢©©éóèèèÀÈÈFFF066V_ŒŒŒ`ii ØØØ`Ô¨Q°¶¶Æ¨Q£øÍ¼˜˜xÛ Â›iiiaÞ¼yˆŒŒT¯ãÙSjjjÔÇÓ›?¯­­EKK šššPWW‡––466ªÍÅ{ç±€ú¶›ÕÇþ›åBÑÑÑQOçwŸ;µ ¯¯===ÿwÌî<ÞvÞfbb===èëëÃØØzzz000€‘‘ôôô0tèP >úúú|ïcAHDD‚jjjB~~>JJJPZZŠÒÒRõµÎ'_ÚÚÚ6l´´´`bb‚Q£FÁÒÒ£G†ƒƒÌÍÍ{%kII æÍ›‡ââb¨T*lݺaaa]¾&''_|ñ¤R)FŒ×^{ «W¯ÆÈ‘#{%#=ºëׯ£¶¶V}©¯¯W¿øîü¼¾¾^}ûï¯oll¼çctNíü¾ôê|»r­s𝳠.Ï€;O ÜkBèN“‚7‘wr§éÄÛ•œmmmˆŠŠ‚©©)Õ/;‹Ñ›ï¯¶¶ … ]ÊÔ{é,9†uá1|øð[>þþvŒ9’¥õª´´4|üñÇ8xð àƒ>€B¡À… ŸŸ .t™2:t¨º¨355Uÿî,öŒŒŒÔ/Î; wêöJ¥²Ë¦?« …¢KÑXSS£þüêÕ«¨¬¬DEE…úû455akk õÅÑÑŽŽŽ7nœúq‰êëëajjÚeíÁ»ÑÑÑ©©)âââ0sæÌ;~]uuõ-Çëû½Ü«Dï<.>zzz044T¿ Õùsõ ¥›‰‰É-ÑùÜönÿn·äÆÝÎ èèè@]]Ý-×w¾9 Ü™Ùù ©© ---¨­­UO=ÞëÌ]Þh¸Ýå÷oFcĈ055U?Ç¡ÞÇ‚ˆˆzMkk+233‘––†“'O"77………êEÛMLL`ii ccc¨Ÿ u>±iooGmm-***PQQ¡þ>SSS8;;ÃÙÙóçχ››[·¿ó\QQWWW\¹rmmmÐÔÔ„ .\¸mmm8pŸþ9öíÛ‡‰'âü#žzê)uyC½O¥RáÚµk¸~ýºúÒù窪ª.×ß|ûí6è|—¼³`ºùÅøï¯ï|Q¡«««~7]OOÆÆÆÐÕÕÔkìµ¶¶>ÒÏÄÍÓ–…F狘úúz477ßRàÞ\ÞÞ\ôÞ©p>|8LMMajjŠ‘#Gª?v^~›©©)OѤrñâEœ>}YYYêËÕ«WüV 8;;w)Þlll`aakkkA( ”——£¤¤eee¸xñ¢ºÈ,((@ii)€ßÊgggLŸ>Ó§OÇÌ™31eÊþœ RÛ·o¿åÍÔûµdÉ899áÚµk¨¨¨Àµk×PUU…k×®Ývñ~J(ccc :´ËÜï'âXpßÛí&.oþ¼¦¦æ–b¶sÊù^e­LMM1jÔ(˜™™©µfff077ïr……75ëF,‰ˆ¨G#::±±±ÈÈÈ@ss3œœœÔ/&OžŒ‰'ÂÚÚúúú÷}¿ííí¨¬¬D~~>Î;‡œœdggãäÉ“hooǤI“àãム  Ì™3G½¾Í說‚››.]ºÔåp ¬^½ÇŽCVV-Z„·ß~ÞÞÞôxtw­­­¨¬¬TŸfWZZªþX^^޲²2”••¡²²ò–‰CCÃ.ÅÎïËŸÎÛFŒѥ䩫ÓÍbmm­º$¾[|ýúõ[îÇÌÌL=ÙÜy:fçÇÑ£GcÔ¨Q°²²Ôåð`¥T*qæÌ¤§§#==‡Fee%ttt0iÒ$u‰6mÚ4Lž<•••prrê—“¬ (((À™3g••…S§N!++ õõõÐ××ǬY³°`Á¸¹¹ÁÕÕ•§!>>>HLL„¾¾>T*T*:::nùüv´´´`kk‹Ç{L=5{sYdjjª.ýn7¥G}_çµµµ¨©©QÀ7—ÂÇåÊÊJTUU¡¡¡¡Ë}ÀÖÖVýFŠ¥¥%¬¬¬0zôhŒ=Z}ߤ¸7„DDÔíêêê°sçNü÷¿ÿÅ©S§0räHøûûcéÒ¥˜?>,--{ì±›šš‘‘””ÄÆÆ"77VVV‹Åx饗àèèø@÷WSS777äççßR6ijjB__¾¾¾x÷Ýwïz* ÝŸ¶¶6\½zÅÅŸ|ù2._¾Œ+W® ¸¸W®\Q?Y¼ÙÈ‘#Õ…Œ¥¥¥ºŒ177ÇèÑ£»”œè¤GÕÑÑqKiXRRÒ¥´¾¹¤¾yÊÅÀÀVVV°°°À˜1c`kk [[[ØØØÀÎÎvvv,M€’’$$$`Ïž=HMME]],--Õî...ƒf¢N¥RáÒ¥K8qâ>Œ´´4œ;wššš˜9s&¼¼¼àãベ3gòµ~ª¬¬ ………(,,DQÈm¢Þ IDATQ‘úRRR‚+W®tYvCOOO]ÜX[[«×¶ì|3åæÉ0NñÑ´´´¨KÃòòrTTT¨×híœp.))ér¶õyvvv3f ÆŽ«þ8vìXN"‚!u£üü||þùçJ¥‰D‰D‚ùóç ¶£Z~~>"##±yófañâÅxë­·àããsÏï­««Ã¢E‹››{ǵs444°uëV<ûì³Ý}@jkkCQQ.\¸€ÂÂB«/—/_FYY™zWO==½.ÊÍ/"ÌÍÍamm sss–~Ôg©T*TTT ²²R]"–––¢¬¬¬K~ózN#GŽTÿ»·³³SnooN!öQ™™™ˆ‹‹CBBNŸ> xxx`É’%pssƒƒƒƒÐûŒêêj>|D||<.\¸KKKøøøÀ×מžžü½Þ‡´´´àüùóÈÏÏGQQÑ-e`発ºº3fŒúbmmÝåôxKKK®ÇL½ª½½())QoôÔyüíü·Ü¹4ðÛñ·óßï¸qãÔ'L˜[[ÛAQZ³ $"¢Gvþüyüïÿþ/~þùgØÛÛãõ×_ÇSO=uÛ„ÒÑÑ}ûöaÆ HHHÀÌ™3ñÑGÁ××÷¶_ßÐÐwwwœ>}ú® kÿ~-Bú픺ââb¨/‹í©ÿ{šššv)Bììì`cc£¾ÎÂÂBà¿ Qï¸q㊋‹QTT¤.̯\¹¢.KKKÕŹ¥¥%Õ…¡ƒƒƒúóY¦]aa!vìØ;wâüùóprr‚¿¿?|||ðÄOð˜pŸòóó‡ÄÄD¤§§cèС ÅSO=777Nö’ŠŠ üúë¯ÈÏÏG^^žúó¢¢"ttt@[[ÖÖÖ·(SX£G ,---]&__€WUUømÓGGGL˜0NNN˜0a&L˜GGÇ5ùÏ‚ˆˆZCCÖ­[‡/¿üöööøàƒ ‹ûüÄS§NaíÚµˆÅâÅ‹±aÃ899©oojjÂÒ¥KqüøñÛ.‚ý{ƒuŠP¡P //¹¹¹ÈÉÉÁ¯¿þŠóçÏãÒ¥KP(sssØÛÛÃÑѱK‘ÁI(¢ûwóäm熟C©TBCCÖÖÖ°··Ç„ àììŒ)S¦`Ê”)066ú¯0`( ÈårlÚ´ ‡†¥¥%$ žyæ8;; ¯ß«¬¬„\.ÇŽ;™™ [[[¬Zµ kÖ¬á›Fݤ¾¾ÙÙÙÈÊÊBvv6rss‘ŸŸ¯žd611““&Nœ'''899aÒ¤I;vl¿\“èQÔ××ãüùóÈËËC^^žúó‚‚( hhhÀÆÆNNN]6gê¯kɲ $"¢‡²wï^¼ôÒK¨¯¯Çúõë±zõê>_ þ^FF^yåœ={ùË_ð?ÿó?hoo‡··7>|×ÉÁN:::ê¯kkk#*• ………ÈÉÉÁÙ³gqæÌõ ж¶6 2“&M¤I“ÔE`gÈb‚¨g) âüùóêÒ0//999¨®®ØØØ`Ê”)˜:uªº4œ4iOã|•••ظq#¾ûî;\¿~ÁÁÁXµj/^,Ø]^^vî܉M›6¡¦¦"‘o¼ñ×û}ÅÅÅ]vËÎÎÎFaa!T*,,,Ô›ãt'N„¹¹¹Ð±‰ú<¥R©>öv¾Až““ƒœœ455AWWS¦LQ†›Qõ¥³«n‡!=¶¶6¼÷Þ{øâ‹/ ‘Hðå—_öë'“íííøî»ïð׿þÓ§OÇ¥K—P^^®¾]KK ÚÚÚhkkS/t¬¥¥KKKŒ?êÓmÄT\QQŽ?ŽÌÌLdff"++ ÐÔÔĸqãàììŒÉ“'cêÔ©˜:u*ìííd1JÔß•––"77gΜÁÙ³g‘““ƒsçΡ¹¹ÚÚÚprrÂìÙ³1{ölÌ™3S§NåÏò•á“O>Á–-[0lØ0¬^½øÃ0zôh¡£ ­­­J¥øæ›opúôi,X°ëׯ‡«««ÐÑú”ÆÆFœ8qéééÈÈÈ@ff&ª««¡¥¥8;;ã±ÇS—œÈ$ê~J¥ùùù]Jùììl”——CCCãÇÇܹsáêê 777Lš4©O X° $"¢ûV]] ???œ9s6lÀ3Ï<#t¤n“““ƒy󿡾¾`aa±cÇÂÁÁA½»Yç:;VVVfb¤¦¦F]v^*++1dÈLŸ>³gÏÆc=†©S§bÒ¤I000:2=¥R‰K—.áÌ™38sæ Nœ8ÌÌL\¿~úúú˜1cF—ÒpìØ±BGDmm-þùÏâ믿ÆÈ‘#ñþûïãÙgŸ…žžžÐѵÔÔTüíoCZZüüü°~ýzL™2EèX‚(..Æ‘#G‘‘£G";;íííptt„««+æÎ‹éÓ§cêÔ©êÞ£ª««Ã””„½{÷âÊ•+Ð××Çã?WWW¸ººÂÅÅfffBG%¢{P©TÈËËCFF†º0ÌË˃¦¦&f̘OOOxyyaöìÙ½:”À‚ˆˆî©¤¤óæÍƒ¹¹9âããûõ)Å÷¢R©ð§?ý _~ù%víÚ¡#=’¦¦&9rÉÉÉHNNÆéÓ§¡¥¥…9sæÀÝÝ...˜={6× $¢.ÚÚÚãÇ#55Äõë×agg§. /^< NS,**š5k°ÿ~¼ùæ›øŸÿùþnìÃT*~þùg¼õÖ[Ð××Ç?ü///¡cu›ŽŽüòË/HJJ¾}ûpìØ1hkkÃÕÕžžž˜?>f̘Ñ/7B ¢[UWW###ÉÉÉØ¿?rssabb‚Å‹ÃÓÓžžž°¶¶îÑ ,‰ˆè®êêêàææ$''cèСBGê}ô>ûì3ÄÇÇcñâÅBÇy ¥¥¥ˆŽŽÆ®]»päÈ´µµaúôéX¼x1ÜÝÝáææ6hþ?Q÷èèè@VVRRRœœŒC‡¡±±“'O†ŸŸ‚ƒƒûõæR©/½ôììì°yófÌ;WèHtŸªªªðÆo@*•bÍš5øúë¯ûí¼R©DJJ ¤R)âããQUU{{{õ4ÑÂ… yº0Ñ QRR‚¤¤$$%%áÀ¨««ÃÔ©S±lÙ2¬X±ŽŽŽÝþ˜,‰ˆèŽT*üýýQXXˆƒºÓVþð‡? <<gΜéó Ò—””`×®]ˆŒŒÄ‘#G`dd„€€øøø`Ñ¢E9r¤Ð‰hQ(8~ü8öîÝ‹ÈÈHœ?ãÆÃòåË‚ÇBǼ'•J…>øÿûßñöÛoã“O>áîÎýÔîÝ»gggDEEõ«ç,'NœÀÏ?ÿ ¹\Žòòr¸ººB$ÁËË öööBÇ#"µ··ãèÑ£ˆ‡\.Gqq1fÍš…+V@$ÁÒÒ²[‡!ÝÑ·ß~‹÷ßÙÙÙ3fŒÐqzR©ÄÒ¥Kû÷ïïS»Œ¿­'¸cÇÈdÿ½ûŽ‹âZÿþ¡ Hg)"E+رÄX0‰%˜Ø\Qb‰^½^£Æ“k⢈]cC#؈5Š¢¢Æ‚ *(*ˆ´E–&u÷üþð»óE@Üe`yÞ¯/–awγ;sæÌ>sæœCøçŸ`dd„‘#Gb̘1pww§ÛŽ!õæþýû AHHbccakk‹1cÆ`ÆŒhÛ¶-ßáU©´´ß~û-Μ9ƒíÛ·câĉ|‡D>Ò£G0bÄ”••áüùó :¹–žžŽ;v`ß¾}HHH@ûöí1~üxŒ?¾IžsBjG*•"""AAAFnn.ˆiÓ¦á믿þ¨óJB©Rrr2Ú¶m‹íÛ·cüøñ|‡Ã›—/_¢S§NX½z5¼½½ùÀ›ž›6mÂáÇ¡¡¡±cÇÂÃĺº:ßáBš¸ØØX„„„`ÿþýHLLÄ'Ÿ|‚Ù³gã믿n0Ç(‰D‚)S¦àï¿ÿFhh(ÝR¬Drrr0zôh<{ö W¯^Uø˜]*::¿ýö‚ƒƒ¡¯¯©S§b„ pqqá;4BH#SZZгgÏbß¾}8qâLMM1kÖ,Ì™3FFF¼>JB©Òĉ‘ŸŸ'Nð ïöìÙƒE‹!11‘×YÃÂÂðóÏ?#,, ]ºtÁ¬Y³ðí·ßBWW—·˜ˆüååå)ýlœ Å«W¯põêUÄÆÆbÉ’%µ.ÚnïbŒáâŋضmŽ= kkk,\¸Ó§Oç}VàÙ³g#((çÏŸG÷îÝy…Èßëׯ1pà@¼~ý—/_®Óey‹‰‰ÁŠ+püøq¸ººÂ××ãÆ£[Ú›8y¶C¤i{ñâ±mÛ6”——ÃÏÏ~~~4ÙVúWŠBHƒp÷î]>|k×®å;”aÒ¤I°´´Äÿþ÷?^ÊŠŠÂçŸwww¨ªª",, ‘‘‘ðôôl´ÉA±X @€cÇŽÉ}ÝR©?ÿü3–.]Š>}ú ]»v¸ÿ>œáéé)÷ò䡸¸ëÖ­«õx‘ï{¤ö=z„_ý#GŽÄž={ê´Žòòr…B 4@Î*¶žÔ 4þù'ñå—_bÑ¢EpvvÆÁƒÁW?…#GŽ`×®]8zôh½$ÂvlÇŒúütttpòäI€ÂË«ŽX,†ºv튧OŸâøñ㈌ŒÄÔ©SUr°ªí×ÛÕÆ@íÐÇŠˆˆÀòåË¡¢¢xzz⯿þâ% é«äÉÚÚkÖ¬ÁÓ§Oñý÷ßcãÆhÛ¶-öîÝ[ûö–B!o™8q">|8ßa4(»wïfÆÆÆ¬¨¨¨ÞÊ,..f‹/fªªªÌÍÍ…‡‡×[ÙŠ–ŸŸÏ°?þøCîëþõ×_™¹¹9“J¥,''‡ 2„]¹r…}òÉ'láÂ…r/O^ÊÊʘ……«ÍéÙûÞ#ù0ååå srrú¨uXZZÖj»}(EÖ¾¼|ù’MŸ>©ªª²O?ý”={ö¬^Ëõê³°°`ëׯ¯·2뺟>}*·Â1ƒýùÑ£GLKK‹‘[ù å˜ÁÇþüóÏ?3VXXX¯å …B¦¦¦Æ<< :t(Ï‘4,Í›7ǰaâð²?~ŒÞ½{ƒ1†{÷îaÆŒPQQQx¹õ­cÇŽèØ±£Ü×ûüùs¹¯³¡i ¡¨z·®]»âÎ;˜0aFމ(¼ÌäädìÞ½¿ÿþ;ÔÔÔ^^E²³²²0lØ0dffÊ­ü†rÌàcž?>`çÎõVæºuë0þ|¬_¿‡þ 1À2e<)¢¾5t¬jzôè»wï¢K—.8p`µ·lS‚BH%×®]C·nÝ ©©Éw( N¯^½pýúu…–ñúõkŒ=¸rå ìììZŸæÍ›Ç¡˜™™ Ì›7ßÿ=z÷î™3g"-- Œ1œ:u >>>hÕª’““1xð`¨««£cÇŽˆŒŒœ|::: zï:+ÖeÓ¬Y3bõêÕ˜>}z­?çºÚ¹s'œœœÐ«W/…–S•yóæ¡yóæµÚ·ÃÕq™÷í›Õ‹¶mÛVåq±  >Ĉ#°|ùrxzz¢[·n¸víW^Mõ¸.u…ýYSS“'OÆöíÛ를ððp,Y²{öìá}üCy“m?‰DR©]ýãö‡ì7Œ1ܾ}K—.…ƒƒbccѧOhhh ]»v8uêTmqMmÁûê[mÚië>”¼ÚÍÚ¨íúèXÕxèëëãèÑ£0`<<<Þ¾©èB—ž={² ðFƒôÏ?ÿ0 3kåÊ•ÌÞÞž‰Åb…•ÑÐdff2[[[¶fÍn™X,fmÛ¶eVVVìÅ‹L$1ccc€ýôÓO,--…‡‡3Ö¹sçJëC·êäåå½³üçŸfšššÜx;Û·ogØÄ‰¹ç8::¾sËlYII ‹ŒŒdúúú  …,<<œyxx°¬¬,6tèP–––ƽî›o¾aFFF,''‡•””°^½z1oooîÿOŸ>ešššµºÅ§â{©.ŽGUûÙ¦¦¦2‰DÂÒÒÒfddÄ.]ºÄÒÒÒ˜††³²²bB¡³„„¦®®ÎúõëWmlùùù¬sçÎlƌܭñÛ¶mcØ¡C‡äº½ííí™ ÷·——ËÈȨ±ŒÔÔÔ*?K‰DRív{Ù­Y ,`ÿüóÛ³gÓ××gìîÝ»Œ1Æ/^̰¸¸8îuIIIlÔ¨QÕ~žMÅ¿þõ/Ö©S'…§Ð­[7¶hÑ"…­¿&R©´ÖûöÛÇ«êöÍŒŒŒ÷²³³«\cŒµlÙ’µiÓ†‹ÍÒÒ’9880Æj®Çu­+|¹yó¦ÂÛoÆÞl''''6þ|…–ÓTlWk»oè~S^^ÎΟ?ÏíÛ .dÑÑÑìèÑ£ÌÐЩ©©±ëׯTÈØ»õ£6mÈǶuïk‡ª"ïó¤ªT¼Å˜ŽUÊ«  €999±~ø¡ÊÿS‚BH%VVVì·ß~ã;Œéùóç û矲þׯ_3###¶wï^…¬¿¡š?>ÀD"Q¥å‡bØìÙ³cU'ëìí홊ŠJ¥eU\J¥Òw–÷ïߟ©««³²²2ÆØ›É °:pÏ©jLž·—Éâzýú5·ìôéÓ @•?GŽa7ndXlll¥u·iÓæƒ„ÕÅQÛ϶ªÏÇÁÁ¡ÊÏ[[[»ÚØ~øá€%%%qËŠŠŠX@@ËÌÌ”ëö644dX`` “H$,66–åææÖº Æ*–5m·÷‘í¥¥¥Ü²Í›73lüøñŒ1ÆÒÓÓ™––›1c÷œU«VÕëø{ Yvv6ÓÖÖVØçQZZÊ455ÙÁƒ²þQ›}ûíúX›}³ªcÀûÖÇc¿üò ó÷÷gŒ½ùR_1†šêq]ë _ŠŠŠ˜šš;~ü¸BË9yò$ÓÐÐ`YYY -§!¨ªÝ¨iß®ë~#[¯¬½fŒ±ÀÀ@€Mš4©ÒsêÒ¾ý>jó:yµuU•ÿ6yŸ'U¥ªó:V)§;w2@PéœE†n1&„RÉ«W¯`llÌw ’©©)€7c®(Bll,rrrЧO…¬¿¡º|ù2ÀÀÀ Òòþýû""" Êq544À«±Œª^Û§O”——ãܹsÀÝöéîî^ûà+¬[[[›[vãÆ ¸ºº‚½¹[égôèÑ8zô( uëÖ•Ö¥ªZ÷S³ªâø˜ÏV]]ýe(**ª6Ž3gά­­¹eZZZ˜3gLMM庽…B!ÔÔÔ0{ölôèÑ999Ð×ׯuo«i»ÕDCCƒ{üÕW_bbbæææðôôÄÞ½{‘šš Æ.]º„/¾ø¢Æõ6ÆÆÆhÛ¶í{·ÍÇ‹Å(--…¹¹¹BÖÿ!êr,«Í¾YÕ1 :‹-ÂäÉ“! €’’.†šêñÇÖ•ú¦¥¥###¼|ùR¡åܺu mÚ´@ Ph9 AUûqMûv]÷Ùz+¶KÇðf¨‰ŠÏ©Kø¶Ú¼N^m]mÔÇyRUèX¥œzôè‘H„„„„wþG BB!„ðJ*•ž={Vi¹,Q­£££rW¬XüS§NÅþóøúúbùòåøå—_>z݉‰‰‰())©ò²½ksðcññÙÊÞó“'OÓ”)Spûöm 4‘‘‘èÓ§Ö¯__ç2jÚnB– ¨89Á÷߯֝_Û·oÃÍÍ­ÊD,!o“ç¾)sñâE8::ÂÕÕ¾¾¾•ÆÛª©+"¢üä¹ßXXXx3–éûÔµ-¨Íëê³­ãë<©.èXÕ¸Q‚BH%&&&xõêßa4H²žƒ²ž„òÖ®];+¬MC5hÐ àzòɼxñðå—_*¤\©TŠÜÜ\ܹs?ýô<ˆ•+WV9AOÅ“HYOÃê´oß……… ¬´<-- °··ðî{–7>>Ûîݻ֬YS©—H$‘#GäÓ/¿ü‚Î;ãÂ… 8zô(TTTðßÿþ·ÎeÔ´Ý>„¬§ÒÈ‘#¹e666˜8q"¶nÝŠ€€LŸ>ýƒÖ©Ì²³³‡O>ùD!ë744D³fÍ‘‘¡õ+BÅú#Ï}SfêÔ©ÐÕÕåz!U,¯¦z¬ˆx©¨¨¯^½‚¥¥¥BËéÑ£!‰ZNc%Ïý&''@õ½þ?¤-¨¸Ÿ×æuõÙÖñužT[t¬j\nݺ@GGÇwÿùQ7/BQ:4IÉûÉ&)yþü¹ÂÊMRòêÕ+…•ÑЈD"æààÀZµjUiÀèE‹±Î;³‚‚ÆØÿŸ¯â$²qòdËJKKÖºuëJe”——3Ü ×Œ1¶dÉÖ¶m[¶wï^væÌvíÚ5öàÁnÒÆ:t(À6lØÀ’““Ù¶mÛ¸A»###Yyy9Wyy9÷º‚‚fccÃTUUÙ‚ Xhh(Û¸q#stt¤IJ!„ÔΜ9sjœ¥´© …ÌÂÂB¡e¼~ýšuêÔ‰õë×åææ*´¬†$33“Íš5‹õêÕ‹}ÿý÷ÌÇLJ-Z´ˆååå1Æ;pà7Ãï¦M›Xnn.Û»w/SSScØÚµkÙ½{÷ØÊ•+¦¦¦Æ6oÞÌâââXAA[¿~=À455Ù¾}ûX^^;~ü8w’YñGOOíܹ“1ö&ÙÕ»wo¦¦¦Æ:tèÀnݺÅ>ýôSæååÅöîÝËÖ®]Ë444¶|ùröðáCî=ÅÇdzÏ>ûŒiii16iÒ$–žžÎýÿÌ™3¬{÷îLCCƒ™˜˜°… ²¾}û²™3g²°°°wNœeë|û=Þ¹s‡­_¿þ½qÔôÙfff²_~ù…{ïW¯^eW®\aÚÚÚ [³f ËÎÎf{öìá¶ÁÖ­[«Mb>xð€}ùå—LWW—ikk³±cDz—/_Êu{qƒ™¯]»–ýûßÿfC† aOž<©UÏŸ?g«W¯æö‹={ö°œœœ·[UN:Åh”3 IDATÜÝÝYÏž=™§§'›:u*[¶l+,,¬òù£Fjr“UçÇdÍš5c·nÝRh9?üðk×®B˨Im÷í-[¶0]]]æããýö}û¦ìWÕ1 ªc†l&íÀÀ@¦««ËìííÙ™3gØŠ+˜ºº:ëÓ§KOO¯±×¥®ðeéÒ¥ÌÅÅ¥^ʺrå SWWg»wï®—òøPU»ºeË–ZíÛuÙod Â7²ÜÜ\–’’ÂV¬XÁÒÒÒªÝÿ«¹-`ŒUYßjóºmëÞ×UE^íæÛ"""زe˸s ©S§²ÐÐP:V)¡’’öÕW_1ggg–ŸŸ_åsT«ãˆ•„B”Ò¡C‡0}útäääT;®KS4nÜ8”••!$$D¡å$%%aàÀ000ÀÑ£Gáàà Ðòš"ÆÀƒ¯¯/·¬¨¨çÎäI“>> iš¤R)úô郰°°5nŠ‹‹1wî\ìÚµ ûöí÷ß~«Ðò’““áàà€‹/*ìVfÒð”––ÂÖÖK–,O½”) 1þ|üþûïðóó«—2•™³³3âããë<Ù!MX,ÆøñãqëÖ-\¾|íÛ·¯òy4!!„J† œ>}šïP”‚‚œ:u _ýµÂ˲··Çõëס¡¡lÙ²… šÈÇòåËáëë OOOn™ŠŠ tttУG´jÕŠÇèˆ2Û¹s'úöíÛ䓃·oßF×®]ñçŸ"44TáÉAàÍžžžX¸p! Nß„¬[·˜1cF½•éçç,\¸cÆŒ¡± !¼ùçŸÐ¥KÄÄÄàÒ¥KïM” $„ò}}}|ýõר¹s'ß¡4(ÁÁÁÐÔÔĨQ£ê¥R©çÏŸÇÖ­[ ;;;,\¸Ó¦Mƒ¦¦&¯±ùøøà?þÀ… гgO^c!òWPPwww¼~ýW¯^…ß!!..+W®Dpp0Úµk‡¹sçb„ ÐÖÖæ;4Bˆxþü96mÚ„mÛ¶AMM ~~~˜7otuuk½JB©Rrr2Ú¶m‹mÛ¶a„ |‡Ã›ÔÔT¸¸¸`õêÕðööæ;@tt4pèÐ!¨¨¨ÀÃÃ4hï_º !$&&GŽÁþýûñôéS 0³fͨQ£ ¦¦ÆwxÞ )0iÒ$œ;w'Nœ@ïÞ½ù‰ÈÉ«W¯0räH¤¥¥áÒ¥K°¶¶æ;¤J>|ˆß~û „®®.&Mš„‰'¢K—.|‡FidJJJpúôi®— ¥¥%æÌ™ƒY³fÕ©% !„¼W`` –,Y‚¨¨(ØÛÛóN½“H$pww‡ššþþûo¨ª6¬‘9rssqàÀ:t×®]ƒFŒ¸»»Ó,Ô„z…9rñññ°··Çرc1mÚ48::ò^•JKK1yòd?~[¶lÁÔ©Sù‰|¤ØØXŒ1Œ1œ;w­[·æ;¤÷ÊÊÊÂöíÛ±ÿ~ÄÅÅÁÙÙãÇÇøñãáààÀwx„J*•"<<ÀÑ£G‘ŸŸÁƒcÚ´i=z4ÔÕÕë¼nJBy/ÆFމG!""¢É›5{öl#&&-Z´à;œj¥¥¥áèÑ£ ÁÕ«W¡««‹áÇcذa8p ÌÌÌø‘¢DŠ‹‹qãÆ œ;wGŽÁãÇѺukxxx`̘1èÚµ+ß!Ö c +W®ÄªU«0wî\üüóÏÐÒÒâ;,R!!!˜1cºté‚àà`¾Cªµ¨¨(î‚_jj*ÜÜÜàáá¡C‡ÂÙÙ™ïð!<+--EDDN:…C‡!-- nnn7n¾ùæ¹çS‚BHµrssáææ†fÍš!""âƒÆ±hÌ–.] œ9sýúõã;œ’™™‰£GâØ±c¸zõ*Š‹‹Ñ©S' 4ħŸ~Úd¶#!D>$ "##qñâE„……áÚµkܱeøðáøúë¯áââÂw˜u OOO´hÑ»ví¢[Ž‘ÌÌL|÷Ýw Á¬Y³  ¡¡¡ÁwXu"•JqùòeáäÉ“HOO‡­­-¾øâ |ñÅ4hµß„4Ïž=ÃÙ³gqöìY„……¡°°®®®=z4Ư»»(AH!ä½nܸ¡Pˆ£GBGGŽŽŽ8yò$ÌÍÍùMacX°`6lØ€£GbĈ|‡ôQd½|†;wîzö쉢W¯^èÑ£LLLxŽ”Ò””” ::7oÞÄ¥K—±X {{{ 4ˆ»à L=ËSRR0sæLœ;wsæÌÁªU«`hhÈwXä=¤R)öíÛ‡ @OO[·nÅgŸ}ÆwXrÃCtt4— ¸~ý:TUUÑ·o_|þùçèÛ·/ºvíJÉ¢$²²²pãÆ „‡‡ãÌ™3xôèŒ1xð`î"………Bc !!„JÊÊÊ¡Pˆ[·n¡{÷îðó󃛛†Žââbœ:uJ)oyyýú5¦M›†¿þú AAA9r$ß!É]^^._¾Ì% ccc!•JѺukôèуûéܹ3ÝfGHÁC||—lˆ‹‹Cii)455Ñ®];´mÛmÚ´££#Ú´iƒÖ­[ÃØØ˜ïÐ Qj%%%HLLÄãÇñøñc$&&âÑ£G¸ÿ>rrrvvvèС:tè€N:¡}ûöpvv¦Äþ‰Dغu+‘••…Ñ£GcêÔ©E\\âã㘘ųgÏ””„gÏžq?²žøººº\¢¼mÛ¶•çÊ4‰% !D‰]»v þþþ8vì,,,0gÎxyyÁÄÄD!å=~üëÖ­Ã •JñÍ7ß`üøñèß¿?o'qqqÆ®]»’’wwwÌ;—ƒ<‘H$HNNÆãǹDÈóçÏñüùs$''#55eeeMMMXYYq DkkkXXX E‹Üï-ZФ¤Á*//GFFÒÒÒžžŽ´´4î§â¾_PPÀ½ÆÜܼR²¼U«VhÕªìííáèèH³‹7PQQQ Å©S§pçÎèëëcðàÁpwwG¿~ýжm[¾Cl0233K—.áÔ©Sxúô)¬­­ñå—_bøðá4h%»ÒÒR<~üñññ\Ò$))‰K¨ÔÕÕѲeK.yزeKXZZÂÒÒVVVhÑ¢ÌÍÍ)áKêMII ÒÒÒššŠ—/_âåË—HMMå.Æ%%%!++‹{¾¹¹9—ð–%¿íííáää„–-[òøNê% !DÉÈn …¸}û6zö쉹sçâ믿®·ñ¦òóó„?þø7oÞ„±±1FŒwwwôïß––– +»°°ׯ_Gxx8Ž;†¸¸8ØØØ`ܸqðòò‚ƒƒƒÂÊ&O"‘ --K¾ý“™™‰ÌÌLH¥Rî5zzz°²²‚™™¬¬¬`nnÎ%ÍÌÌ`jj @^g†#Ê¡¬¬ "‘ÙÙÙÈÎÎFFFF¥ä_FFRSS¹}µâ©¶¾¾>¬¬¬`aaÁõ”­˜ ´±±¡ HOOÇÉ“'qöìY\ºt ¯^½‚©©)>ùäôë×nnnèØ±c“¸¸!•J‘˜˜ˆÛ·oãÊ•+ˆˆˆ@\\455ѽ{w 2Æ ƒ««+ß¡’:ÊÊʪ²ç•ì¢_nn.÷\ ´hÑÖÖÖhÑ¢¬¬¬¸$¢¬ÍÔ—¼WAA²²²™™‰¬¬¬JÉ¿´´4¼xñiiiÈÌÌä^£ªª sssXYY¡U«V\"P–´³³Sªž€uE BBQPÏÌÌĘ1càçç777^ãzñâŽ;†'Nàúõë(**Bë֭ѵkWn|¬¶mÛÂÚÚúƒ’7eeeÈÈÈ@BB>|ˆ ::wïÞ…D"A‡0tèPŒ3ݺu£+ÖJ¤¼¼™™™HOOÇË—/‘‘‘Áý–%fRSS‘‘‘âââJ¯ÕÒÒâ’…¦¦¦011áþ–=611¡¡!  ¯¯O½Y”T*Enn.Äb1òòò ‹‘ÌÌLdggWJfgg#++ "‘yyy•Ö£¦¦333î‹GÅDµ¥¥e¥„5}ùhz¤R)bccqùòe\½zW¯^ÅË—/¡¦¦†¶mÛrƒ×wéÒNNN ½€¦hyyyHLLĽ{÷…»wï"&&hÞ¼97sg¿~ýгgOªMÄëׯñâÅ ¤§§#%%ééé\§bÏ®·ÛkCCÃJù,,,¸Ç²6ÜÈȈk¯úØä]EEE\[œ““‘HÄ%ÿ233!‰ ‰*ý]TTTiFFF•zªÊî8iÙ²%,,,`mm sssº¾(AH!܃°aÃìß¿ÚÚÚðòòœ9sdWøÒÒR®Add$bbb””‰DàMO0kkkèëëCOOÐÕÕEII ^¿~’’äåå!##\ÏssstìØ:uB¿~ýзo_…ÝFM—¼¼¼*>²NÙOÅDì犚5kÆ% `dd}}}î ¡¥¥mmm.¹¨§§4kÖ FFF<|Ê¡  ¥¥¥‹Å(..FQQòòòPRR‚üü|"//yyyÈÍÍE^^rrr¸Ç—çççWY†‘‘Q¥ÄqÅä±™™Y•ËéK)ùÉÉɈŒŒä&‰¸{÷.^¼xàÍ8Wo1ieeÅõ´âóøQ\\\©§NRR7qN||<×[G__...èÒ¥ 7ƒgûöíéË9©VNN×#L–$ÊÈȨ2A”••Åg\‘žž ¹„¡ìñÛ yóæÐÒÒ‚ttt ­­Í=¦‹‚5ËÏÏGqq1òóó+=.((@QQÄb1Äb1rss‘““Ãý-[&{,›Ô®"]]]˜™™q=JMMMajjZ)aljj sss˜ššÒÅ9¢!!„4B² 7„B!ÂÂÂàä䄹sçbòäÉî–¥ââb$$$ %%…»šœ——Ç% ¡¡¡ððp´k×={ö„¹¹9w5ÐÉɉ’D®rss‘Íõ,“%”*&™*&ÞN>U¼ê}455ѼysèééASSÐÖÖ†––÷?•¾¨èëëCMM ***Ü Ð²$:îõU©)©PÕÿ «üVÓÿ% ×ËN–ÜÀ%ù7‰[‰DÆÄb1€7½‚ ¸×¡¸¸b±¥¥¥•Æé{w’¶{‚Ê~¿ý?ÙcJö¾ˆD"ÄÅÅáñãÇHLLäo‰‰‰ÜoÀ›z.ë%#¸ý·â~¬§§ òñ£Y³fÐÑÑAyy9—¯XÿŠ‹‹¹ãWÅ/ï999Ü-ô²Yn7ã͵jÕªÒìôŽŽŽhÓ¦ ììì¨×>Q¸¼¼<ˆD¢w’Oo' ªúûížàoSUUå’…ZZZ044äêSÅz%k;eËÿßV«««su±yóæUÞ2]]2RMMíè+¶¯U)((¨ò"§l¹¬ç<®±X ÆwÞ ¼IÊŽEEE(((¨”¬ŽššZ¥ä¬‘‘Qµ [ÙßÆÆÆ4Ô(AH!HAAöìÙƒ 6 11Ÿþ9æÎ‹Ï?ÿ\éOÆÛ·o¬X±‚ïP©QUÉ­ÂÂB”””pWÌ_¿~¼¼<”––"//K ÉzÆ•Oöe'ðïKÀՔЫ/µI`êééq½‰ ¡¢¢Â}’}1“õÂ400€¦¦&ôôô¸/YFFFÜ2===4kÖì/R„(‹œœœwÆ×JOO‡H$ª”Ô“ýÔôå½*²^Òç²[7e·ÈW/ÎÜÜœf™'ZAAŠ‹‹‘——‡ÂÂB®GúÛeÉsY+û]—d_>&©©«« mmmèééAWWZZZÐ××ïã÷%CIã@ý¼ !¤xþü96mÚ„íÛ·£´´“&MÂñãǛԬˆæææÈÈÈà; BjE[[ÚÚÚ âvbYϼ÷©ø%ÆÁÁëׯLj#*õd¬JMÿ'„ȇ‘‘ŒŒŒÐ¾}ûzìÃĉ¡­­_ý•KÀËÈô„45ºººÐÕÕ…@ à-†ÜÜÜJ“®UTñba`` Ž;†óçÏÃÀÀà½ÉyÙ…5Bꊄ„Ò€EDDÀßßÇŽƒ¥¥%þýïÃËË ÆÆÆ|‡Vï²²²øƒFGCCフfff°··W`D„ú ë½[XX{{{ª×„40“õÕ155Eyy9Õa¢pt‹1!„40¥¥¥†P(Ä;wàææ???Œ=|‡Ç___DGGãÊ•+|‡B!„4*ÅÅÅ4®!TII MœBêõ $„B$aË–- „H$˜1c€ž={òZƒ ¸ !„R{”$¤ñ¢ä ©/” $„žÝ¿6lÀþýûѼysxyyaΜ9°¶¶æ;´ÅÜÜ"‘ˆï0!„B!DéP‚Bx •Jqúôi…B„……¡mÛ¶ …˜4i7£©ÌÔÔÙÙÙ(++kÒ·ZB!„Bˆ¼ÑÜô„Ròóógggn–ÐsçÎááǘ9s&%«affÔ‹B!„BäŒzBH=xöì°cÇ”——cÒ¤I …³³3ß¡5²aFFZ´hÁs4„B!„¢<(AH! tõêUøûûãøñã°´´ÄÒ¥Káåå###¾Ckt ++‹çH!„B!D¹P‚B䬴´þù'üýý‰^½z!((£G†º:vëÊØØššš4“1!„B!„ÈAH!r’••…ü­ZµÂŒ3àèèˆ[·náúõë;v,%åÀÄÄ„„„(ŠŠ ‚‚‚øƒ"'!!!4±!\ll,ÔÔÔpÿþ}¾C!J޾­BÈGЉ‰¿¿?‚‚‚ «« ///Ì™3VVV|‡¦tZ´hA·B!µ$‹¡««Ëw„ ­­ ©Tв²2¾C!JŽ„„RR©§N‚P(ÄÅ‹Ѿ}{øûûcâĉ4±™˜˜ ##ƒï0QZ°±±á; BˆœXZZbìØ±|‡AùÍ›7ÇÈ‘#é;Q8Æã;Bi,òóó±{÷nlܸOž<Á!Càççwww¨¨¨ðžÒ›}Š€€ìر‰S¦L¯¯/œœœø­IHHHà; B!„BQ*” $„j\¹rB¡¡¡¡°²²Â²eËàååCCC¾Ck’ÌÌÌ ‰øƒB!„B” % !ä-%%%øóÏ?áïï»wï¢OŸ>8xð FE3óÌÌÌ ééé|‡A!„B!J…¾éBÈÿÉÌÌÄ–-[°yófdggÃÃÃ[·nE·nÝøüSSS¢¨¨ÚÚÚ|‡C!„B!J„„&ïÞ½{ð÷÷GPPôôô0sæLÌž=–––|‡FÞbnnÈÈÈ€­­-¿ÁB!„Bˆ’På;BáƒT*ʼn'0pà@¸ººâöíÛØ¸q#RRR°zõjJ6P••Ås$„B!„¢<(AHiRòòòàïïGGGŒ=:::8þ¥¦¦ÂÚÚ×®]CïÞ½ù‡¥2vìXøúúÂÖÖöã([ i\V®\ CCCÌ;·N¯—w···‡¦¦&=z$—õÒ”;v ×®]úuëêôzªÏ¤6ècBïîÞ½‹)S¦ÀÖÖ›7oÆÌ™3ñüùsìÝ»÷ƒ“ƒYYY6l233-ᛩ©) ##ƒçHQ>ÁÁÁxðàÁ;ÇQ:¶ÒøDDD 66¶N¯UDþü¹ÜÖEHSààà:½–ê3©-JBx!‘HpìØ1ôïß]»vEtt4ñüùs¬Zµ uZo`` bbbžžooo9GMMMMB$áСCÐÒÒ‚ŠŠ ??;v쨴Œòa.\¸ðÎq´ªcëÁƒѼys¨ªªB(¢¼¼pøðaèèè ((ˆ·÷@ys®e``€Ã‡cÊ”)èׯc8uê|||ЪU+$''cðàÁPWWGÇŽ àýçSùùùXµj<==ѯ_?ôîÝ·nÝ‚D"ADD/^ $%%ÁÅÅ&&&ضm¼½½!•J¹õy{{£  >Ĉ#°|ùrxzz¢[·n¸víW^aa!V¬XÉ“'cÞ¼yèÙ³'V®\ ‰DRm<„(H$ªÏD±!„Ô£ÜÜ\öÛo¿1;;;¦ªªÊ†ÎÂÂÂäZæää$×u’†ÅÑÑ‘­ZµŠ{üvsVÕ2BHíUu­jÙâÅ‹Ç-KJJb£Fª—8 !5ËËËãê¯T*e"‘ˆ3ì§Ÿ~biii,<<œ©¨¨°Î;s¯{»ÎK$6tèP–––Æ-ûæ›o˜‘‘ËÈÈ`‘‘‘L__Ÿ`B¡…‡‡3–]åúc¬eË–¬M›6Œ1ƤR)³´´dŒ1ÆòóóYçÎÙŒ3˜T*eŒ1¶mÛ6€:t¨Úxrrräü)Ò0P}&ŠD“”BêÅãDZqãFìÞ½0mÚ4øøø uëÖE¢!!D¡.^¼œ>!!! …®®.6oÞ ---"88˜K*˜››ÃÔÔ زe ÂÃÑ›› 333èêêB ÀÐÐ/^Dhh(ÆŒ+++DDDàÎ;˜5kƇääd\¹r§N‚ŽŽ6oÞ “jã111áù“'Dþ¨>ESat!ääææbÇŽ@rr2†޹sçbÀ€|‡Fš‹/bРAÈÌÌ„©©)ßáÒäI¥RôéÓaaaÐÑÑá;Bš¼’’”••ñzÁ–BHãBcBj%11>>>hÙ²%V­Z…¯¾ú 8~ü8%I½377î6cBˆ|¨¨¨ ((èƒ_·sçNôíÛ—’ƒ„4‡âÚJBHãÜ¿ŸïPˆ£{®!Õ ƒP(ÄéÓ§akk‹•+WbÆŒ41ᕬ×`FFÚµkÇs4„4MgΜÁ‚ P^^ŽœœÄÆÆò!äÿˆÅbò!DN´µµyyy¢P4!!„#›F~ëÖ­ÈËËøqãàëë‹Î;ó!ï077Çÿû_|÷Ýw|‡B!„B!ÝbLAdd$Ö¯_àà`aΜ9ðöö¦±kHƒ&h BB!„B‘JÒDI$;v þþþˆˆˆ@çαuëVŒ7Íš5ã;ˆ‰‰ % !„B!D(AHH‘€ 6`Ïž=PUUÅŒ3ðÝwßÁÞÞžïЩ“-Z **Šï0!„B!¤Ñ£!!JŒ1†°°0…Bœ9svvvX½z5¦M›F3“FÏÄÄ|‡A!„B!ž*ßB䯸¸Û·oG§N0xð`¼~ýGEBBæÎKÉA¢,,, ‰øƒB!„B=J¢D^¾|‰eË–¡eË–ðññA÷îÝ‹/⫯¾‚ª*Uy¢<^½z…²²2¾C!Di#99™ï0!¡¼¼›6m¢ºLˆ’ùûï¿qÿþ}¾Ã JŒ²„(;wî`„ °µµÅÎ;áããƒäädìÚµ ...|‡GˆB˜™™õ"$DŽÆŽ‹ˆˆ¾Ã „|±XŒï¾û?æ;Bˆ-Y²AAA|‡A”% i¤ÊËËŒ¾}û¢{÷îxôèvìØgÏžaùòå\ò„e%ÛÇÓÓÓyŽ„Bi8Äb1ÀÐÐçH!ò¤££ƒ‚‚¾Ã JŒ&)!¤‘‹Åؾ};ššŠ#FàòåËèׯß¡R¯LMMPBBäéðáÃèÙ³'ßaB>‚••.]ºGGG¾C!„ÈѪU«```ÀwD‰©0ÆßABj 6`Ïž=PSSƒ§§'¾ûî;ØÙÙñ!¼iÖ¬vî܉‰'ò !„B!„4ZÔƒŒ1† .@(âÌ™3pppÀš5k0mÚ4èééñ!¼ÈÊÊâ; B!„BiÔh BB ¢¢"lÛ¶ ;vÄgŸ}†ââbœ8qñññðõõ¥ä !ÿÇ™™™|‡A!„B!õ $¤IMMŦMÿ½;‹ªÜÿþ†]VEdT@QDPTÑ\AM-Ò2·´ô—KVšW­¼ÙbYz+óæRîÙÍLÅ%5•-MYÙÁdW†f¾¿?¼s.#æ gž÷ë5/pfœóÃy–ó=Ïyžؽ{7*++1gÎ9r|‡Æ0jÉÂÂ<@vv6 Q\\Œââbôë×£Fâ;<†QkGEyyy«Þ;sæLôèÑCÅ1 ÓV±±±X»v-zô蘙™ÁÌÌ æææJ711á;d†aZ!33™™™¨ªª‚X,FMM jkkQ[[‹ššˆÅbTUU¡®®#FŒ@hh(ß!3›ƒaÔ@LL ¶mÛ†cÇŽ¡GX¶lÞxã n†a€‹/b×®]¨¬¬DQQòóóQRR‰DÒè½ÇÇóÏ?ÏC” £9^{í5ìÝ»:::M¾G^¾Äbq³ïc†"‘ˆ[­XWWD©T ©TªðÞåË—ã믿æ#L†aÚ(<<cÆŒáþ­¥¥¡P@@J¥¨¯¯Ç±cÇ0sæL¾Be:– džÔ××ãøñãØ¾};®]»///¬\¹¡¡¡ÐÕÕå;<†Q;iii­Z‘Q  ¤¤Ý»wFs]¼xãÇoö=:::˜?>víÚÕAQ1 ÓV¾¾¾¸~ý:Z:­KIIAÿþý;(*†až¡W¯^-ε­««‹ÒÒRuPdLgÆæ d˜öèÑ#|þùçprrœ9s`ccƒÈÈHÄÆÆbîܹ,9È0MpqqApp0´µ›ŸÃÍÍ%¦‚‚‚Z¼mX"‘àå—_î ˆ†y!!!ÍŽð …7nK2ŒXµjU‹eû¹çžcÉA¦Ý°!Ãt””,]ºöööøä“Oðâ‹/"==ÇŽƒŸŸßá1ŒFxûí·Q__ßä뺺º˜4iRFÄ0šK(â•W^iöÂT¯^½XÅ0jnâĉ‹ÅM¾.•J±|ùòŒˆa˜ö0oÞ¼FS4DDìÖb¦]±!èáÂ… ˜}úÀËË‹‡ˆ†i«É“'C&“5z^KK ‹-â!"†aÚCpp0·RyCZZZ˜5k1ËV0L víÚ…Ù³gþüóOäææ6zÏõë×1{öl899aÿþýXµjòòò°{÷n 4¨£Cf˜NM[[«W¯†P(Tx^WWÏ=÷OQ1Œæš3g$‰Âs:::˜7oO1 ÓVÏ=÷\£UŒuttðꫯ6{û1Ã0êMWW .lt!¯¾¾Ó§Oç)*¦³b B†iÆ–-[ðÆopWd…B!¾ùæWvüùçŸáëë‹#F ==?þø#²³³±aÃ6Ü›aTèµ×^k´°‚X,ÆØ±cyŠˆa4—››ÜÝÝ!¸ç$ ^zé%£b¦-,--1pà@…ç$ –-[ÆSD ô—… 6º7pà@6Ÿ=ÓîX‚a” "¬Y³ï½÷žÂÕX‰D‚;wâ£>Bß¾}ñòË/ÃÎÎQQQˆ‰‰ÁË/¿Üìj ô333,]ºTájªŽŽ|}}yŒŠa4×ܹs¹ùË<<<пž£b¦-BBB¸~¨P(Ĉ#àééÉsT Ã<+777 6Œ›®JGG/¼ðÏQ1K2̤R)–,Y‚/¿üRéë555زe BCC‘‘‘_~ù£Gîà(†Y¹r%7ºW `øðá000à9*†ÑL/½ôêëëÄÇñèÑ£&¯¨¨€H$R8Ñ~ôè×ahI[’o 5w2ÝðdYN^¹)#O4$ï¸(KT*{­9zzzèÖ­ÌÌÌЭ[7tëÖ FFF055…±±1ºwïssstïÞ]á÷†Ï5”ŸŸ   ddd´*9ØPRRÜÜÜÚôFÝÈ;(òúHþû“ÏÉ;=ÕÕÕÜ ùÅŠŠŠŠ·Ó°>’'è”%åžìÔàNN”ÉÎÎFZZšÂÕÔæŠ “‘ É;cò‹1Àÿ’•òÏkm=%ÿ>FFF044äê(ùïæææ uSÇü9vâÅ<«êêjüù矸té®_¿Ž;wîàáÇØÙÙÁÎÎŽ; …¸|ù2  !‰PSSƒ¿ÿþÿý7 ADÐÕÕÅÀáéé‰ÀÀ@Œ;ÖÖÖ<[†Q­ÚÚZ””” ¨¨EEE())Aqq1JJJPPP€²²2”——+´—åååM^€Ðè$]Þ&Ê/Ä?yA^YŸüÉçËË˹rü¤'Ï'ämšL&ãúö"‘ˆk'¶‡ÊÈ·knn®8433ƒ……zöì‰^½zq¿[XX W¯^M¶ç £ÊËËQXXȕ…Ëýååå\™oîÜøßgy[+ïËÊÏÓåäåÿIúúúøí·ß0{öl‚&·+O@Ê5¼!¿€¯ì"ESôõõ¹òmbbsss.‰¨¬ŒËgÌ5K>ƒG¡  ÅÅÅÈÏÏç*Ž ¨¨ÅÅÅ(--å€O …WFFF033S8©T6ŠNž333S8éî,äÉCyr´ªªJéèÉÊÊJTUUq#–***>Tz»pÃý}ûömîy--­F•¬@ à*By¥gaaSSS¼ð lFíÔ××£¸¸EEEÈÏÏGqq1Š‹‹QPPÀÕMEEE(--Å£G”&¼A£ä•¼cÐp´œüwyÝd``  “ÿûÉ„_{’Éd8qâf̘¡²m(#O(6 "¯³ª««¹«ÇOŽª¬¬¬l”€­¬¬lôù::: íCÏž=amm KKKî¤KÞ ³²²âF0][AAŽ;†_ýW¯^E}}=<==áããƒÁƒÃÃÃîîîMž gee¡OŸ>J_“H$HKKÃ;w›7oâÏ?ÿDUUÜÜÜŒÐÐP :T•_‘aÚU]]—ÏËËøѵò>}aaa£zZOO°°°€••7ڦᨛ†I³†'ÕòQ:ª’ GGÇvû<‰DÂŽj˜ü”ÿÞ0a"‰ ‰¸¾Gsû®gÏž°µµåööö°±±Q¸xÁ0íE$)”í¼¼<®ìÿý÷ß\ðÉD™±±1×÷êÙ³'ÌÌÌÊ·pNNz÷îÝ®Ÿ)×°ûäÅŽ†¿ËË|YY ¸Dê“ûÎÈȈë¯6¼ÁÆÆööö°µµ…M“w&2%•H$xðàrss‘““ÃU$999(((àN²û …èÙ³',--¹8KKKôèÑCá¯á¨6Öèu ‘H¤0RSþ(,,Äo¿ý†’’H¥Rn$TÃ$¡žžwbneeØÙÙÁÞÞ½{÷æ*9U&@F®¨¨÷ïßÇßÿÜÜ\®~ÊËËã:ãÅÅÅ ÿ§á1ܰƒ#¯Ÿ”d333ãévMò "Mä,--UšämØ„ëêê*´C¶¶¶°³³CïÞ½¹“¯Þ½{«ô¤”á‡D"Á¯¿þŠï¿ÿ‘‘‘055ÅóÏ?)S¦ 00°ÑÈùö$‹qãÆ œ?ÇŽCJJ \\\ðꫯbÉ’%°´´TÙ¶¦5Äb1rrržžŽÌÌLddd ##¹¹¹\PNGGÖÖÖ '®½zõ‚¥¥¥Â++«f§±`ÉG_ʆòß‹‹‹¹äŒüÜ«á-ÔݺuƒƒƒlmmѧO8;;ÃÉɉûÉF"2 •——så[þÈÌÌlöØ’'­lmm¹rÎFÁ==ùèȉÌP IDATˆ#¬ QTT¤p!¦  @áÎ=yÑÞÞž+ß}ûöEß¾}áèèÈγ;P—LŠÅbdee!==\077¹¹¹(((àDººº°··‡`eekkknüÓгgÏFóY1šI&“q'ß\¥&%š››ËÍÝ$OkiiÁÊÊŠKÚÙÙÁÑÑÎÎÎpvvfÓ*R©¹¹¹ÈÌÌDzz:w¬É¯pæåå)ܪdiiÉ%ììì¸ ò:J^O±‹“T*U¡!Å.¿%¿Bž““£0l÷îÝakkË]ì°³³ƒ““wÒeaaÁã·bÚ¢¬¬ ;vìÀwß}‡¢¢"̘1sçÎÅøñãy›/(>>GŽÁ?ü€ŠŠ ¼ôÒKx÷ÝwáîîÎK233ý={öìÉ%úöí‹ÀÍÍ  `#’:©ºº:ܽ{—{ÈËyzz:JJJ<ž:KžhêÓ§×ç±¶¶f£SÕ„L&Caa¡ÂÅùOyý-ð  áàà PÖÝÜÜàîîŽÞ½{³:»uÚa]]233‘––†ôôt…GÆÅÊÊ ŽŽŽ\eáèèÈ%xìííYGi¡  €KàäååqÉfù惘899±NLR[[«pâ"ÑžžŽœœ.élnnØÛÛ+ÔIòQ öööìŠ&Ój>äê&ù‰Wè999Üm!&&&FiÈÚÛÛ³‹aj ¦¦ß~û-¶lÙ"Â믿ŽeË–©ÕÊõµµ8rä¶oߎ¤¤$¼òÊ+Ø´iS»ÞöÈtM999HNNFbb"—LNNFUUàìì¬p2Ù·o_6⬓H$ÈÎÎVÚ—JKKC]]´µµáää„AƒÁÕÕ„»»;úõëÇ[Ðb±©©©HNNFBBîÞ½‹ÄÄDddd@*•BOO...JË9˜Ñ94úä9Snn.ˆFFF\wuuÅ Aƒàææ¾Ã×XŸ ”H$¸wï™™™Éd°³³Sè(4LÐt¶¹ûõRYYÙ(A-üý÷ß>trr‚‡‡ÜÜÜ0hÐ ®Ã7ÍUUU…””$&&ršääddggs·…ÚÚÚ6êØÈª¼%až$“É——§Ðkx‹ŽH$ðøvuWWW®&ï”õéÓB¡çoÑ5œ9sK—.Å£G°bÅ ¼ûî»j=%€L&Ãþólܸ¹¹¹xï½÷°~ýzv’δJNNbccƒ˜˜ÄÅÅqõ‘££#WÉG“¸¹¹5Z¥—é:êëë‘™™©PJNNFjj*Äb1ttt0pà@x{{ÃÛÛÆ ÃÀÙªì<“H$HLLäÊxLL !‘H ««‹4êwôíÛ—õ;º°ÊÊJ®Œß½{—+ó999/Î:lØ0xyyqe]Uó6v6• ÌÏÏGll,€¤¤$®Â—Wnnnððð€««+—d£luT]]ŒŒ ¤§§s[sÇ´¼CceeÅwèLR©)))¸}û6¸Ñ ÙÙÙÉd044„««+wââêêÊÈbu£)JJJ¸ú*))‰ë”eeeA*•B__®®®ÜIú AƒàååÅV¶mG¥¥¥X±bŽ9‚9sæ`ëÖ­µëëëñí·ßbÆ pttÄÞ½{1|øp¾ÃbÔˆH$Btt4®_¿ŽØØXÄÆÆ¢¸¸:::ðððàNöäTÙ€LkI$¤§§#11qqqˆ‹‹Cll,ÊÊÊ ¯¯!C†`ذaðöö†ŸŸ_“ 51í#33ÑÑш‰‰All,nß¾ÚÚZ.©ãíí¡C‡ÂÝÝ...,Ë´Zyy9îÞ½‹;wîp—äÉfKKK 6 Æ ƒüüüبr%Ô6AXVV¦pÅ0&&÷ïßg£­˜NïÉQ±IIIHHHàFÅÚÛÛsWBä?ÕyôHg"“ÉššŠØØX®ƒyëÖ-TUUÁÀÀ€K4ÍàèèÈnÉd:­šš¤¤¤p·ø%%%!)) YYYÉd°±±——¼¼¼¸“{v‘£íbcc1sæLvî܉)S¦ðÒSËÎÎÆo¼Ë—/ã‹/¾ÀÊ•+ù‰áÉÇ…ˆˆDDD >>0`À…~ÎàÁƒÙT,L»#"¤§§sIª˜˜ܾ}•••°··G`` ggg¾ÃÕhiiiˆˆˆ@dd$ÂÃÑ——### :Ta”—³³3›Ú‹iwµµµˆç.<ÅÄÄ %%0dÈøûû#00~~~077ç9Zþ©M‚0)) W®\Áµk׃ôôtœ!C‡e·3]Ree%nÞ¼ÉUl±±±HOO‡@ €‹‹ ¼½½1räHÂÍÍïp;…’’DGG#22±±±¸uë*++ahhÈ]m–'? Ànu`˜ÿª¬¬ÄíÛ·¹QqqqHMM…L&ƒ¼¼¼àãダ€x{{³ÛM›ñÃ?`Ù²e7nöïß=zðÒ3#"|ñÅX¿~=fÍš…}ûö±PP[[‹+W®à÷ßGxx8¡¥¥oooøûûÃßß~~~ld éTŠ›7o"22ˆŠŠBYYlllˆ±cÇ"88˜­ÎÞ‚‚‚œ>}—/_FDD>>3f ‚ƒƒ1lذ.Ù7øè£°iÓ&|ÿý÷X´hßá¨ÌÝ»w1nÜ8ôéÓ.\`õm'››‹Ã‡ãäÉ“ˆ‰‰A·nÝ0a„„„`òäɰ°°à;D†y*b±ááá ÃéÓ§‘Þ½{#$$sæÌ¯¯/ß!v"Ÿþ‰£G",, ¹¹¹èÓ§BBBvw£±Š‹‹¹‹[çÏŸGuu5|||0cÆ „††vêU’Uš ÌÊʉ'pêÔ)DEEA__Ï=÷ÆŒƒ1cÆ`àÀ]²ÓÏ0E&“!!!»êùÇ@,ÃÏÏÓ¦MÃôéӻ܊Nÿý7NŸ>°°0\ºt õõõðóóãF[úøø°ÑK ÓÂÂBnN² .àÞ½{°±±App0BBB0vìXð¦Ê>|óçÏÇ¡C‡Êw8*—••…ÀÀ@ 2Çg#´5Pee%Ž;†Ã‡ãòåËèÙ³'^xá„„„ 00% ˜NéÎ;8sæ Ž;†›7o¢oß¾˜7oæÍ›×iYYY8tèöïߌŒ xyyaæÌ™ ÁÀùaÚ]]]"""pêÔ)üòË/())APPæÏŸiÓ¦uºéïÚ=A˜ššŠÃ‡ãÔ©Sˆ‡¥¥%BBB0mÚ4Œ7®KtìF]ÕÔÔàÂ… 8uêÂÂÂP\\Œ!C†`Ú´ixå•W:í$Ì©©©ÜΛ7oÂÌÌ 'NÄÔ©S1qâD¶È è©{÷îáÔ©S8}ú4¢££¡§§‡qãÆaÚ´i˜9s&LMMù±ÝEFFâ¹çžÃöíÛ±téR¾Ãé0 ðóóÃo¼Ï>ûŒïp˜VºvívíÚ…cÇŽA"‘`êÔ©˜?>&L˜À½L—’€àÈ‘#(((€ŸŸ-Z„—^zIãÒ‹Å8zô(öîÝ‹¨¨(X[[cîܹ˜;w.ÜÝÝùa:ŒD"Á… °ÿ~œ:u :::˜5k–.] ¾Ãkí’ ¬©©Á¯¿þŠÝ»w#** ŽŽŽ˜9s&¦OŸ___6ßè!©TŠ«W¯âäÉ“øå—_——,^¼3fÌÐød¾|4Ã?þˆ¨¨(ØÛÛcÖ¬Y†ŸŸ´µµù‘a˜6xøð!Ξ=‹°°0„……BCC±`ÁøùùuŠ;***0hÐ L™2;vìà;œwùòeL˜0çÏŸGPPßá0M "œ;w[¶lAdd$†Ž… "44´S&í¦-¤R)Ο?àøñã°²²ÂêÕ«±xñb›o³¢¢»wïÆ¶mÛPTT„Y³fáÕW_ŸqãØ¦Ë{øð!þóŸÿà‡~@LL ±víZL˜0A£û¤Ï” ¼sçvïÞC‡¡¦¦S§NÅ’%KÄ’‚ £Ad2þøãìÙ³'Ož„‘‘æÌ™ƒ×_]ãnˆŠŠÂ?ü€cÇŽA*•bÆŒ˜?>«—¦‰Døù矱oß>\»v»­kÁ‚°³³ã;¼§öÚk¯!&&111]ö–Ì7âàÁƒHHH`«Ùª™L†£GbË–-HHHÀ¤I“°víZøûûó襼¼<|õÕWؽ{7ôôôðæ›obÅŠj¿}II ¾þúkìØ±‰‹/ÆêÕ«akkËwh £–ÂÃñeËœ?ƒÆš5kª™çžô¢¢¢hÒ¤I$hÀ€ôÕW_QQQÑÓ|Ã0j¦°°¾üòK0` š}šÜÝÝIKK‹fΜIׯ_ç;¤vÃÊ´ú*--¥'NÐ'Ÿ|Âw(̉ÅbÚ¿?¹»»“@  Ù³gSFFßaµZPP-Y²„ï0ÔÂùóçÉÀÀ€|ÈwH­Öªauu5½õÖ[\&ôòåËªŽ«]ù„Ö­[G#GŽ$WWWºsçN‹ÿ¯aœòk¨ÿþª¹YOû}ZKë+™LF¿þú+õë×tuuiÆ T[[ËwXͺ|ù2éêêÒýû÷U¾­¶þM諯¾êðcÁÏÏÞ|óÍÙ£\tt4ÙØØ­­-:uŠïpQÅ1©íïÓbçÿ£îmWff&3†´µµéóÏ?'™LÆ[,2™Œ>þøcÒÖÖ¦qãÆQVVo±4…•uE¬¬ÿº—õ'Nµµ5ÙÙÙÑÕ«Wù§UZLÆÇÇ“‹‹ ™™™ÑîÝ»I*•vD\í¢¢¢‚о}û¸çÔ±Ò{Ö˜$ YYYQkîWÇï¯Z»_Úcÿ);.5T*¥]»v‘©©)õïß¿]“mõý÷ß“¡¡! 4Hc*Û†Zª›$ Y[[·ªL·eǵŸŸ½óÎ;¹æ|þùçÔ«W/’ÉdôèÑ#š4iEFF¶øÿžŒ³¾¾^-„Oû}ZKSë+¢Çåè_ÿúµ{â´½…††ÒÌ™3;d[mù›ž={–æÍ›Gõõõ~,>|˜LLL¨ªªªC¶Ç(Ú¿?éèèÐŒ3Ôväƒ*ŽÉŽnÛ[[Î šó4ýÝö:Çè*}m™LFÛ·o'š={6/·×ÕÕÑ‹/¾H:::ôõ×_óš¨l+ë±²þ˜&”õ‡ÒÔ©SIOO<Èw8-jöˆ:yò$Ѹqã:䪶*ØØØp „¼¼<òóóã9"EíSÿþý[¬ Ôñû«ƒÖî—öÜ KMsÿþ} "###:}út‡n»¾¾ž–,YBZZZôþûï«åü-­ÕRÝÔš2Ý:ª^P—ú§OŸ>í–ÔS‡a{~Ÿ¦hr}Eô¸HFFFjy…Y$‘¾¾>ýöÛo¶ÍÖüMãããÉÙÙ™ÊËËÛôÿÚKUUÓ‘#G:d{Ìÿüð䥥Eÿüç?Õ6a §Šc²£Ú_UyÖøŸ¦½n¯6¾+öµ###ÉÜÜœf̘Aõõõ¶Ýúúzš6mõèу¢££;l»O‹•õÆXYLʺL&£÷ߟ„B¡Z'3‰ˆš\Våüùóxá…°páBœ;wNcW-4h „ââbL™2EEE|‡ÄéȘÔñû«ƒÖî—öÞòãRÙÚÚâüùó˜?>f̘K—.uÈv‰‹-ÂÑ£G†M›6iô*ŸêP7uÔ¶Õ©þÉÉÉá;„vÕßG“ë+pttÄÅ‹±hÑ"„††âäÉ“|‡¤àüùó€I“&uØ6[ú›J¥R¼úê«X¸p¡ÂJÂy,b„ øí·ß:d{ÌcÑÑÑxã7°uëVlܸ€ïš¥éõ“ºyšöº½Úø®Ú×öóóÃ¥K—‰üã¶Ýµk×âêÕ«¸téFÕaÛ}Zšò÷Ô¬¬w,@€M›6aË–-X²d ®]»ÆwHMS–5|ðàuïÞÞzë­ÎW¶¿ßÿˆˆ>üðC@¦¦¦ôúë¯s¯———Ó¦M›hÑ¢EäççG¾¾¾Ü"•••ôË/¿ÐüùóiÔ¨QtðàA233£>}úÐ_ýEááá4|øpÒÖÖ&777ºuë=Î߸qƒÖ­[GNNN”””D#GŽ$mmmruuåF]=MLD‡Ò®ZµŠæÍ›GkÖ¬¡•+W’­­m³WšÚVaa!½ùæ›´jÕ*zçwÈ××—–,YÒâÄà•••ôÁÐܹsiÕªUäããC~ø!w嫹ϕÉdtúôizóÍ7ÉÁÁrrrhܸq$ ¹ {[óžÖî¯æbmj¿¨zÿÉKM·råJêÞ½;åçç«|[ò[Õ}EåÖj©n’_LJJ"ooo …äîîN111Ü{š:ö[[~”m»¾¾ž~þùgzõÕW®ìµTæ)$$„6nÜH‹-"///îŠt[¶ó¬uGSÂÂÂèõ×_Wˆãõ×_§ŠŠŠfco*N"Å„?ýôéééqõpyy9íÞ½›{®¾¾ž¢¢¢hÍš5äääDäááAÝ»w§¿ÿþ»Åz¬-ß§¹}ØRÊt–úŠˆhýúõdddDééé|‡ÂY½z5ùøøtè6[ú›þòË/€ÚôÿÚÛÖ­[ÉÁÁ¡C·Ù•‰Åbêׯ­X±‚ïPZíiŽÉ–Ú³Ö´¿Mµmi§äsÈ.X°€–/_N†††€{©æœ€ˆèöíÛH›7o¦uëÖ‘––•——7Ù'ikߚ؟ÔÕûÚ/^$…ãLU®_¿NÚÚÚ³®+묬7MÓÊú²eËhÀ€$‘HøE)¥GThh(;Vío+h«†'sD ëäÉ“’¡¡¡dnnN="©TJùùù€ÌÍÍéÊ•+”ŸŸO:::dkkKÛ·o§ÚÚZºwïikk“¿¿?=>¡¼xñ"™˜˜zçwèöíÛtüøq233#¡PÈUm©®®Ž|}}é7Þà^ÏÊÊ"]]Ý+ˆ'·UTTDŽŽŽ «p–••‘««+ÙÚÚ6y²XQQAžžž´hÑ"îÙµk £G¶ø¹÷ïß§’’êÞ½; ?þ˜òóó)<<œyzz’L&kñ=­Ù_-Ūl¿¨zÿu&2™Œ饗^RévòòòÈÐÐ~þùg•n‡/ÊŽAy§eÆ TXXHááြ¼¼ˆ¨ùcÿáÇ­*?Mm»¼¼\áùÖ”#{{{rqq!¢ÇÇ… õíÛ·MÛiºãiöuK±?gSŸÕ¯_¿Fõ°ü¹ºº:Š‹‹ãÚ…íÛ·Sxx8½ð T\\Ül=Ö–ïÓÒ>ÌÊÊj2ŽÒÒÒ–v_§ðüóÏSPPßapF­v‹q̘1ƒ´µµyï¼Ê뽎¸Å}ýõ×äàà@555|‡¢2­iÏZj‰šn7ZÛ%"úôÓOIWW—[Di÷îÝ€^yå"Rí9“““Bò}ñâÅTXXHDO×N¶õ|¦)]½¯ýÿ÷¨òíøùùÑòåËU¾>±²þ+ëꧦ¦†ìííéÛo¿å;¥Q999¤¥¥ÕiFé4ôäxöìY…ì}Ãǯ¿þJD Æ“ÿ¯oß¾ £““(<'?1lØÁþî»ïÍ;÷©búæ›o%''+lËÅÅ¥Í ÂÕ«W*))QxßÑ£G -[¶Léç|ðÁ€233¹çjjjèÛo¿¥¢¢¢V®²“i'''Ü¿[zOKû«¥X•í—¦´×þël¢¢¢H(Rnn®Ê¶±fÍ:t¨Ê>ŸoÍ%. Õ»wo …DÔºú«5eLÙ¶Ÿ¬÷ZS޶lÙBÿú׿ˆèqgái¶ÓžuGS”ÅÑRìÊÚeŸ¥l.˜'Ÿ“ÇÞp%ðÖü-[û}ÚºÕaEòŽvïÞ=tóæM¾C!¢ÇÇî–-[øC­­-ÙÙÙñ¥§§€QÃyxxÐúõëùC¥ZÓžµÔþµÜn´¦’¯@*?Oxøð! ‘jÏ ÌÌÌ}÷Ýw$•J)99™D"=];ù4çXÊtõ¾ö­[·¥¦¦ªl)))€âããU¶ uÀÊúc¬¬«§uëÖÑ!CøC)m<áòåË000ÀÈ‘#Ÿ|©Ó¹ví† ‚[·n5ùes¯hk7ÚmÐÑÑAMMÒÿÛðý!!!X¶lâããŸ*¦   €³³³ÂóZZMN'Ù¤ˆˆ€©©©ÂóÏC£Ì¹sçvvvÜsúúúø¿ÿû¿6}®²}«££"âþÝÒ{ZÚ_[¶li6Ögñ´û¯³=z4ôõõqùòeÌ›7O%Û8þ<¦L™¢’ÏVw ˶¾¾>¤R)€§¯¿ž,c­ù-•yX³f ÊÊʰ}ûvhii¡®®®ÍÛiϺ£-Zн=çà’–÷\kþ–­ÕÖ}Ø0Ž®ÂÅÅ}ûöÅï¿ÿOOO¾ÃAii)ºwïÎw àääÄwèÑ£àáÇGåÝ»w h¼š2£ÁÁÁÜÙšhõêÕdccC»víjò=­iÏZj‰Zn7ZÓN­[·Ž\]]éÀtîÜ9úóÏ?)11‘[È@•ç–––ôðáCîßöööÜ¢ ÊÚë¶¶ñOÛ®uõ¾öÉ“'IOO¯Ù}ô¬=zDººº¦²m¨+묬kº—_~™¦NÊwJ)ÕòóÏ?'+++nÏÎâßÿþ7)¬Ú”ššJãÇ'}}}255¥¹sçRAA=^MgË–-€Œ)**Š"##ÉÀÀ€Ð'Ÿ|B¥¥¥´ÿ~n¡ï¿ÿž+¤òá7ß|C"‘ˆòòòèÃ?TXå§­1;w޼½½IGG‡zôèAï¼ó=š^ýuºtéR“'|ʶUTTDK—.%___z÷Ýwiùòå´fÍ®âlJbb"“‘‘Ћ/¾¨°yKŸ{øðanŸíرƒD"8p€„B! Ï>ûŒöîÝÛâ{jjjZÜ_-Ūl¿¨zÿu< +++Ú¶m›J·S[[K®®®´xñb•n‡/ -©TJ»wï&mmm@›7o¦ŠŠ Ú½{7wìòÉ'T[[Ûì±ßš2VSSÓ踮¬¬¤mÛ¶ÒÕÕ¥ƒRyyy‹åè»ï¾####rrr¢sçÎч~HÚÚÚ4jÔ(*((hõvÚ£îP¶gjj*mÚ´‰P(¤;wÒÝ»w[Œ=##£Qœ‰‰‰´yófî¹ýû÷Ó£G(--FŽIB¡H7nÜ €€Z¼x18p€>ûì3ÒÑÑ!ôþûïSRR’B|ÍÕcmù>ÍíCù~o*ŽÎ®¤¤„éÿøß¡pBBB¸• ÕÅùóç ]½z•×8Ž?NÚÚÚTUUÅk]EDDéëëSzz:ß¡<•ÐÐP@ݺukö}Mµgmi›k7¾úê«VµS'NœàN¾>ŒiïÞ½D¤ºsüw€Ï>ûŒÞ{ï=š4iedd‘òþn[ÛøÖÄ®LWîk×××ÓÈ‘#iåÊ•*ßÖòåËiôèÑ{‘•uVÖ5YJJ éëëStt4ß¡(% jûì3ÞâX·n.\¸€¸¸8ÞbèjBCC‘ššŠ«W¯jäœOùùù˜8qb“‹ª "·ß~ "Š+¸çjjjpþüyÌ;·]æg4Ǻuë°wï^¤¦¦ªüÜ»´´ýû÷ÇÒ¥KñÑG©t[ªÂÊ:£‰ª««áëë www9r„ïp”RºôP(ĉ' ‘H0bĤ¦¦vt\ è±»wïÂ××2™ ÇWyrx<—ÈÑ£GñÅ_`Þ¼y¨­­Uù6†éîÞ½‹#F@$áÌ™3j“¯ŸžžŽ‚‚¾Cáüøã8sæ ¯hGGGcÔ¨Q¼m¿+Ú¹s'*++1eÊ[¦¶¶ï½÷vïÞÍw(-zÿý÷±bÅ ¼öÚkÜs†††ðññAïÞ½yŒŽéh[¶lÁ_|C‡uÈÀœ=zàСCøôÓOñÅ_¨|{í•uFUTT`Ò¤I¨®®ÆÎ;ù§IM®Ý³gODDDÀÒÒ>>>سg ×FòÉé[3I=Ãh"®]»0bÄXYYáÊ•+èÙ³g‡mÿùçŸÇÙ³gqæÌx{{ãÆ¶m†a4L&ÃŽ;àããƒ^½z!::666|‡¥`ôèѰ··ÇÑ£GùEöïßU«VA"‘tøösrrpõêUÌž=»Ã·Ý•uïÞgÏžEFF5jÑ’ÔÔTlÞ¼>>>|‡Ò¢ððpÀ¶mÛ¸òEDˆÇ[o½…ÇóÓQ$ V®\‰õë×ã‡~Àøñã;lÛ'NÄž={°nÝ:¬^½Z£ÎWYYg4MNN‘sçΩ÷Jâ-݃\WWGï¾û. …B7n\—š«èiUVVÒ§Ÿ~JZZZ€V¯^M±±±|‡Å0ÏäÖ­[4vìXÒÖÖ¦µk×’X,æ-–¬¬, "¡PHË–-S›Å†Q×®]£áÇ“ŽŽmذ×:«%k×®¥¡C‡ò†R©©©ôå—_vøv7oÞ¬°@Ó±²²²hðàÁÔ³gOúí·ßø§ÓÉËˣŋ“ƒƒ™ššÒàÁƒiòäÉôÙgŸ©t F}¤¥¥Ñ¨Q£ÈØØ˜×2vìØ1222¢Ñ£GssÓ1퇕uæØ±cdaaAžžž”““Ãw8-j1A(÷×_‘§§'iiiÑÂ… )33S•q1 £&222hÁ‚¤¥¥EÞÞÞtýúu¾C"""™LF?þø#Y[[“‘‘}ðÁÜêà Ãt]·nÝ¢éÓ§“@  ÀÀ@Šç;¤Ý¿ŸôõõéÒ¥K|‡¢ª««ÉÚÚš¾ÿþ{¾CéÒªªªháÂ…€æÌ™£°0Ã0O§®®Ž>ÿüs244¤!C†Prr2ß!Qbb"yxxP·nÝhëÖ­TWWÇwH £ñrssiÖ¬Y$hñâÅT]]ÍwH­Òä-ÆO>|8bccqðàA\¹rýúõüyópûömÕ od†7·nÝÂܹsÑ¿DFFâСC¸~ýºÚ ç˜?>ÒÒÒ°nÝ:|óÍ7pppÀ[o½…ŒŒ ¾Ãc¦Éd2\¼x“&M‚§§'²³³†+W®ÀÃÃïðZdkk‹¥K—bÆ l:;v쀡¡!,XÀw(]š¡¡!öîÝ‹³gÏ"::ýúõÃæÍ›Q^^Îwh £qˆÇ‡»»;>øà¼÷Þ{¸qã\]]ù îî‰Á;#7ÂÓÓ§NbíÃ<…òòrlÞ¼nnn¸}û6~ÿýwìÚµ |‡Ö:O“U”H$tðàArww'äååEßÿ=&Ë0®¬¬ŒvîÜI^^^€H‡"‰DÂwh-ª¬¬¤mÛ¶QïÞ½I ¿¿?ýøãTQQÁwh èHzz:mܸ‘H PPP;wŽï°žJQQ™››Ó®]»ø…WYYYdllLGŽá;¦êêjÚ¼y3™˜˜™™­[·Ž ø‹aÔ^]]ýøãäææFZZZ4{ölµ¾Í0++‹fΜI€ÜÝÝiÿþýj=Eè‹üü|Z»v-™˜˜‰‰ mÞ¼™jkkù«ÍDÏviàêիؽ{7Ž;˜5k–,Y‚#F@ ´C “aU""DGGã‡~ÀÏ?ÿ ¡Pˆ^x‹/†¯¯/ßáµ™L&ÃåË—±oß>n…åY³faÁ‚ðóócõÃh¸ªª*?~{öìATT¬­­±páBÌŸ?}ûöå;¼g²ÿ~,_¾ñññèÓ§ßát8©TŠqãÆÁØØ§Nâ;F‰²²2ìܹÿú׿ ‰0þ|¬^½...|‡Æ0j¥¢¢{öìÁ¶mÛPXXˆ9sæàÝwß…››ß¡µJRR>ÿüsüôÓO°¶¶ÆêÕ«±hÑ"ñè•´´4lݺû÷¹9V­Z…¥K—ÂÄÄ„ïОÊ3'åD"~þùgìÚµ qqq°µµÅ´iÓ0}út@WW·=6Ã0L;‹Å¸rå Nž<‰“'OâÁƒðññÁ¢E‹0{ölób»(++áC‡°oß>ÄÅÅÁÚÚÁÁÁ ÁرcahhÈwˆ ôÂýû÷qúôiœ:u W®\aêÔ©˜?>&L˜¡PÈwˆífúôéÈÌÌDTT”z¯r§ï½÷öîÝ‹„„XYYñÓŒÚÚZìÛ·[·nEff&üüü°`ÁÌœ9³Óô!¦­ä©÷ïß'N/^Œ·Þz öööÜéïÂØ·o^{í5|ûí·xã7ø‡yJ"‘¿üò <ˆ¨¨(XXX 88ÁÁÁ?~<»5‘é4d2®_¿Î]ÄJLLäfóæÍÓøé.Z’––†ƒbÿþýÈÍÍ…‡‡w‡ŽO§I”0LEE.\¸€°°0œ={%%%܈ù3ftÊo*O6TYY‰+W®àÊ•+G||<„B!|||¸„áðáÃÙ­ ó***píÚ5.!™L†!C†`̘1\ctEb±˜Ûo‘‘‘øë¯¿P]] ggg.Y8jÔ¨NßÙc˜Ž$‘HpëÖ-DFF"22QQQ(++ƒµµ5àïï1cÆ`À€|‡Ê›ØØXL˜0ÇDZcÇ:í”ß|ó V­Z…O?ýkÖ¬á;¦deeq£ö###!ˆ©S§bòäÉpttä;D†iy¢àôéÓ8sæ Š‹‹áî„„À××·ËÍs-“ÉpõêUœ>}aaaHNN†¥¥%‚ƒƒ1eÊva€ÑHYYY8sæ ÂÂÂàÊzgo¿:4Aø¤G!22’K&$$@ `À€ðööæƒîôWϦ-Äb1âããqãÆ ÄÆÆâÆHII 43f üýýannÎs´šE,#&&‘‘‘ˆˆˆÀÕ«WQQQ=zÀËË Ã† ãš:Ÿ Ãt¤úúz$%%!66±±±ˆ‹‹C||<Äb1¸„`@@[èà ?~<¬­­ñ믿vª…Kêêê°zõjüûßÿÆŽ;ØÈÁNL$)ŒÀ(--…““w!Àß߿ӟp1š§¼¼QQQÜÈö›7oBKK Üh¹ÎT'·‡ŒŒ .Y "‚——???6ˆQ;ÙÙÙGDDÂÃÑž={bÒ¤I Áøñã;åHÁ¦ðš |Rii)®]»Æ%}àáá777 4îîîèׯtttø›aÚL"‘àÞ½{HLLDbb"’’’€¬¬,H¥RcèС\2ÐÇLJ]qçIMM âãã¹ÑP±±±¸{÷.¤R)ôõõáêê WWW 8ûɇLg"O&&&âîÝ»ÜϬ¬,ŒŒŒ0tèP…„`¿~ýºÜíWí¥¶¶Ë—/Çž={0wî\lÛ¶M#/Éd2|÷ÝwøÇ?þÞ½{ãøñãlÔhW__›7o*Ì?Z^^###xzzr#ö½½½áììÌê¦]+\øÅƒ ­­Í%°1zôè.5rH•ÊË˹iEÂÃÃqëÖ-Ô××ÃÆÆ†äååoooXXXð.Ó îݻǫÅÄÄàÖ­[¨ªª‚‰‰ 7÷|`` †ÊÎÓþKí„O’ÉdÈÊÊBBB’““qçÎ$''#55b±ºººèß¿?\]]áììÌ=\\\`eeÅwø ƒ‚‚¤¥¥!=={Èa‰DÂÃîîî\â[ž`b“þª¯šš¤¤¤ 99IIIÜOy‚Wž8ìß¿?úöí«ð` ¢0êH,#++ ÈÈÈ@ff&îÝ»‡””.hhhWWW¸¹¹ÁÝÝûéèèÈê+8uê–-[‰D‚Í›7cÁ‚³pËÕ«W±zõjܼyk×®Åúõ롯¯ÏwXŒš‘‹CLLŒÂ(dSSSxyyqƒÜÝÝáîîÞiV‰eÚŸX,ƽ{÷””„ÄÄD$''#..999ÐÒÒ‚‹‹ —€–ß ÂÏìUUU¸uë–B’öÞ½{ "8::ÂËË nnn8p ÜÜÜп6ˆiRYY’““¹r.¿“E$ÁÀÀC† Q˜&jÀ€¬ŸÚK6E>úJ>ê*%%…K¾TVVŒŒŒ’†ÎÎÎprr‚££#lmmÙ<‡L»‹ÅÈËËCnn®B0==™ÿÏÞÇEUïÿ ûΰ# (*й›^ܯ[¥æ®i7ÓìÞÊJ-[,KëV¦~53-ÓÌ$Íkj¥i’”¡â)®(ÈŽì ëÌ|~ø›#‹¨0g€×óñ˜‡ÃÌæ58Ÿs>ç}>çskü>êF™uîÜíÛ·ç°¹³pxéÒ¥jß+++i}TõßÖ­[Ã××—>>ÒÏžžžFÛ'ÃÐjµÈÈÈ@rr2RRRœœŒäädܼy)))¸yó&222 kVÕŠÒÑJ:HLLÄÕ«W«g²³³¥å”J%|||¤õ“näíí ´iÓ†#ª&++ )))Òº*55)))HJJBZZRRRP^^055…¯¯oµÑ­º‹ÔÆ'11o¼ñ¶oß???Ì›7³gÏ6ŠÑTZ­ûöíÚ5kpøðatíÚË—/Lj#äŽFÍHJJŠTJHH@||<¤¾½½½½ÞzÌßß_ºïëëËÓÉš˜¬¬,$&&Vë/]»v Pc©sçÎh×®¾7Q•••¸råŠTº³ø žžžh×®]µv777™?Ý µZ›7oJƒ)ªžÅríÚ5¸½]u„©®Íó¬¬×" „uÉÊʪµØ“””„ŒŒ h4·7:îîîhÕª<<|GÅ­[·àììŒÐÐPtéÒÁÁÁRGÜÛۻγÊÊʤÊÍ›7qùòeÄÅTâÿŠ IDATÅáܹs¸~ý:,,,лwo 4<ò øI‰–Z­Fff¦4b?55YYYÈÌÌÔ+leffJÛ‡ªáààGGG½ûÒ@XYYÁÁÁfff°··—¶…ÒöÑÖÖ¶ÑûìjµEEEÒ¶­¢¢*•Jú·¼¼%%%Òv1??(,,¬öo~~¾ôüÛ9¸ººêU«Œ¯:ºKŽí?µ<%%%¸yó¦´mÓí¿W-xéöíKKKõ^kff(•J(•J½v^µí+•JX[[ÃÊÊ 666°´´ÔkßÒ¾»n?½1?oyy¹^û®¨¨ú·EEEP«Õ(,,DYY™Ô¶óòò¤û5µý;999éZu…Uwww½Úhc¤X ”‘¢ZѰ°°………RÑ«jL·³ygLט”nçüN&&&ptt”FÙYZZJ;ÆwÒh4 ’E×iª©@jkk ©@ª{ÎÁÁÕŠ€,öÝŸüüüjECݺ¨°°°ÚºIW»ó¹ÒÒR½Ñ÷«®Î“££#´Z-ÊËËakk+íÌÔD×Ij¨<ºâ¨n=Tµ8ªT*¥u–î9ݺ¬êsHRcÑjµˆÇŸþ‰sçÎáܹszxÐÑí¤˜™™A«ÕJÅïª,,,ˆ銀½{÷æ÷—Z¤ÊÊJ½bBNNòóó«í@ßY@+,,Dii©t°é^Õ´-ÔõÕU*,--QYYYãv·¶þûÝè •w>«ÞW*•ÒcŽŽŽRaÀÕÕ•#®¨IS©TÒ[·nIíº  @*Š×T<+((¸ï>§®MßÉÊÊ æææR·¦6­+üß;;;X[[ë@ï<Øqg;¯zã)þM „ÍŒntŠnç¸jÁN÷\M;ëZ­¶Æ£¤£‡;vìLž}úÀÑÑ666ðöö†——/®EÔÀî6b¯¦ƒ]5¤{ý¢E‹0yòdôìٳƢœî@€Žn›¦ÛfÖ6¢‘ˆLm#öªŽÜ«:ò¶ê+wþž'NHý]ÝHÄ;Ýyf€n»¶w®¨åbê-""ðõ×_Ëœ„ˆ¨ºíÛ·#""¢ÆS£‰¨~ ¾þúk<ñÄrG!¢{ÄöKÔü±¿K‰'~S½=÷ÜsrG "ªÕÃ?Œ;wʃ¨IÛ¹s'zõê%w "ºl¿DÍû»Ô˜8‚ˆˆˆˆˆˆˆˆ¨ã5≈ˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0©ÞV¯^Õ«W˃ˆ¨FÑÑј8q¢Ü1ˆš´‰'"::ZîDtØ~‰š?öw©1±@Hõƒ˜˜¹cÕ(99‘‘‘rÇ jÒ"##‘œœ,w "ºl¿DÍû»Ô˜X $""""""""jÁB!w"""""""""’Gµ`,µ`,µ`,µ`,µ`,µ`,µ`,R½EDD ""BîDD5Ú¾}; …Ü1ˆš4…Bí۷˃ˆîÛ/QóÇþ.5&‰ˆˆˆˆˆˆˆˆZ03¹PÓÑ«W/¹#ÕÊ××&L;Q“6aÂøúúʃˆîÛ/QóÇþ.5&…BÈ‚ˆˆˆˆˆˆˆˆˆäÁSŒ‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆˆˆˆˆˆZ0‰ˆˆˆ¨šŽ;bΜ9rÇ """"`ˆˆèݸqCîDÎÝÝNNNrÇ ¢v?Û0n÷ˆš¶sº,R½ÅÄÄ &&FîDD5JNNFddd£¿OJJ ¦OŸÞèïC$‡ÈÈH$''~ûí7|ðÁ2'"¢úªÚ~ks?Û0n÷ˆŒG}ú»lçt¿X ¤z[½z5V¯^-w "¢EGGcâĉúÙÙÙ5j²²²õ}ˆä2qâDDGG˃ˆîÃÝÚïýløÝ#2.wëï²Óƒ`îÙ¹sç0pà@,_¾K–,©©)ŠŠŠEEEX¶læÌ™ƒ oß¾8qâ4 ¢££ñÊ+¯ ‰‰‰èÒ¥ \\\°yóf¸¹¹A¡PàwÞ‘ÞgóæÍ°°°ÀW_}…ýû÷cÁ‚hÓ¦ ’““1tèP˜™™!$$±±±ÒkΟ?Gy¯¿þ:æÌ™ƒîÝ»ã÷ߨT*|÷Ýw˜5k~øalÛ¶ NNNð÷÷GLL ¢¢¢Ð»wo˜››#88gÏžÕûܵ}6"2BÀ’%K€ . _¿~077GPPöïß_çº(-- YYYX°`žþy¼ôÒKèÛ·/ž~úi¤§§Ö­[‡¸¸8ddd`îܹÒ{ßíuDM‰V«ÅÎ;1cÆ 0Bœùä!Äí•ÚÏkµÚj+¿€€€¯µµµôs}>ݺ¢²²Rzlݺu€˜6mšÞ2U×E/¼ð‚ nݺ¥÷ûvìØ!ˆyóæ !ªw êû:¢¦¤¦íe}Úɯ¶¾º5îÖ¾ó5ìÉíœO1¦{¶jÕ*˜ššbÞ¼yèÙ³'òòòààà€ãÇ£k×®· Ïz·qãÆ ÀÚÚZïwš››ã¹çžÃpõêUTTTàÒ¥K “–ѽöÎ׉ÿZ!¼üò˘>}:V­Z…µk×¢¼¼\ïùš~‡™™Y¿·´´Tú¹>ŸˆŒƒ®WmÛcÆŒpûtŒªËT]EEEõ~_xx8Ô:¯Óý¾ŽÈ˜Õ´½¬OÛ""ùÕÖW¯ÍÝúÏwb¿˜H~lçÔX ¤{6cÆ œý窱_L$?¶sj357îîî"77Wú¹uëÖ",,L ___abb"^|ñE±wï^±fÍ1tèP‘ŸŸ/„¢}ûö€P«Õ5þîÄÄDabb"Þ~ûíjÏé^«Õj¥Çtóê³··–––⯿þß|ópqqÄåË—EzzºÐh4€ ¬ö{«fºó½êóÙˆÈ8tèÐAFzì›o¾~~~"''GQs»¿uë–mÚ´Ñ›|ùå—_–ÖqBáêê*¥‹Ô÷uDM‰Z­¤ Í…¨_Û""ùÕÖW¢ú6Lˆ»÷Ÿï| ûÅDòc;§Æ`úæ›o¾)Ga’š®—_~{öìJ¥Âþýû!„À—_~ Œ5 —.]Âwß}‡½{÷ÂÎÎëׯ‡••Ö­[‡ÈÈHét<¸¹¹éýn'''$%%aÑ¢E°µµ•ß¾};¶lÙFwwwtèÐ;wîÄ–-[ „€µµ5ºwïWWW9r{÷îÅøñãáííèèhœ:u ƒ –-[ðË/¿ ¢¢ýû÷GRRÖ­[µZ +++#22[·n…F£——Ú´i¥RYëgsqq1èߟˆê¶víZäääÀÍÍ ;vDvv6:„õë×ÃÆÆ¦Öu‘ ¦L™‚ÔÔT|øá‡¸té~øá˜››ã‹/¾NÅpttÄÏ?ÿ •J…#FÔûuDM…J¥ÂúõëñóÏ?£¸¸~~~ðóóÃÆkm[wnωH>µõÕœœªmÃ@©TÖÚž4i¼¼¼ô^caaÁ~1‘ÌØÎ©1(„h s0‰ˆˆŒ@ÇŽqéÒ¥›b€ˆncÛ""""j¾8!ÕÛêÕ«±zõj¹cÕ(::'N”;Q“6qâD^y›¨‰bû%jþØß¥ÆÄ!Õ[LL bbbäŽADT£äädDFFB­V€ô/Õ_dd$’““k|Žm‹È¸ÕÕ~‰¨yÐõw‰ „DDÔ,”——®_¿xå•W+g$¢fA¥RaÅŠl[DDDDÍç $""""""""jÁ8‚ˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cˆˆˆˆˆˆˆˆ¨cê-""rÇ "ªÑöíÛ¡P(äŽAÔ¤) lß¾]îDtØ~‰š?öw©1±@HDDDDDDDDÔ‚™É€šŽ^½zɈ¨V¾¾¾˜0a‚Ü1ˆš´ &À××WîDtØ~‰š?öw©1)„BîDDDDDDDDD$žbLDDDDDDDDÔ‚±@HDDDDDDDDÔ‚qBªQII ÊËË뵬¥¥%lll9ÑßÔj5ŠŠŠêµ¬B¡€R©läDDMO~~>ê;ÓŒ½½=ÌÌØm$"""j®8!Õè³Ï>ÃÓO?]¯e7lØ€ýë_œˆˆèo·nÝ‚››[½–:t(<ØÈ‰ˆšžaÆáСCõZ6;;®®®œˆˆˆˆˆäÂ!Õ(77...õZ6''ÎÎΜˆˆH_—.]w×å6mÚ„'Ÿ|Ò‰ˆš–Í›7cöìÙw].44çÎ3@""ª¯o¾ù¦Þ#éÇŽ[ïƒjDd<²³³ñý÷ß×kY{{{L™2¥‘QsÇ!ÕjäÈ‘8xð 4MÏ›ššbذa8pà€“[·nÅÌ™3¡Õjk]ÆÜÜYYY<بùùùpwwGeee­Ë˜˜˜àË/¿Ä´iÓ ˜ŒˆîfÆŒøê«¯`nn^ë2º¶]VVKKKCE#¢R^^+++¸k[Ÿ>}:¶lÙb¨hÔLñ"%T«©S§Ö97‘S§N5`""¢¿=öØcuv–ÌÌÌ0bĉj¡T*1bĈ:ç477Çc=fÀTDTº‘B•••µÞÌÍÍñä“O²8HÔDYZZâÉ'Ÿ„¹¹ymGRƒ`jõè£Â¢Öç-,,ðè£0Ñßìíí1jÔ¨Z‹†£žˆîbÚ´iµž)`ff†Q£FÁÞÞÞÀ©ˆèn† ''§:—©¬¬DDD„Qcˆˆˆ¨s¤?899aÈ!JDÍ „T+[[ÛZGèèFØÚÚÊŒˆè¶©S§ÖZܰ²²ÂèÑ£ œˆ¨i=z´túÒ4 Ï 2Rfffˆˆˆ¨ó`¾‹‹ ÂÃà Šˆ\xxx×°°°@DDDgÕ „T§ÚŽXðˆ$ƒ‘#GÖx ÂÜÜãǯµðAD·YYYaüøñ5 ´µµÅÈ‘#eHEDõ1yòdTTTÔøœ……¦N îî5e&&&˜:uj­***0yòd§¢æŠ[ ªÓ°aÃàààPíq 6L†DDD³´´Äĉ«7xƒ¨þj:hnnމ'rî2"#Ö·o_xyyÕø\EEžxâ '"¢ÆðÄOÔz0ÀËË }ûö5p"j®X ¤:YXX`Ò¤Iz;ßæææ˜4iR§4ÊäÉ“«78 QýÕ4—Yee%G$9…BéÓ§×8Ø××={ö”!5´ž={Â××·Úãæææ˜>}: … ©¨9bîêΑ™CDÆdРAzs³XXXà‰'žà\,Dõdff†'žxBïÀŸ‹‹  $c*"ª'žx¢ÆÀ¼HQó2mÚ´ϘáHajH,Ò]õïßÒÏèß¿¿Œ‰ˆˆþfjjª77 çb!ºwUç2ÓÍ]fjj*s*"º›ê=Æ¢QóSÓÁ€ÀÀ@„„„È”ˆš#é®LLL¤«¤é®’Ä ‰È˜T›ÅËË ýúõ“9QÓÒ¯_?i.3Î]FÔ´T=ÍX¡P 88AAA2§"¢†„àà`étbÝéÅD ‰UªÝÎ7wˆÈõèÑ>>>À¹XˆîCQQ&NœhÕªÚµkF#s*"ª)S¦@­V¸=e‹DÍÓôéÓ¥)tÔj5¦L™"s"jn8A¸}*BNNrrr››‹œœTVV¢¨¨HêpèÄÆÆ"66æææ°³³ƒ¹¹9\\\àìì ¸¸¸Ô8Y2Q]òòòPQQ•J•J…ŠŠ äå塲²ÅÅÅÒrºçª*++ƒ³³3RRRPVV†õë×W;=R·¾n°P*•°°°€­­­ôœ“““´n#jN222³gÏââÅ‹HIIAJJ RSSQPP ·lzzº4¯§¥¥%¼½½áåå…Ö­[# ¡¡¡èÒ¥ x2‘ð÷÷ÇC=„S§NA­VcÒ¤IrG"¢F0iÒ$,Z´ðÐCÁßß_æDÔÜ(„BîÔø´Z-7nàÆ¸~ý:nܸäääj;:¶¶¶Ò¼^º¯ŠndŽnG¾&ŽŽŽðõõEÛ¶máçç'ý???ž¢LÔL•––"==éééÈÊÊBNNòòòj¼åææ"//EEEÕæT©‹¥¥%lllô377‡……²²²àííüüüj¯ËÏÏǽlòlmmaoo'''8;;ÃÉɩƛ‹‹ Zµj…V­ZÁÝÝ2 7nÜÀáÇqøðaDEE!--­ÁßÃÆÆ}ûöÅ Aƒ0hÐ tïÞߢF¤R©påʤ¤¤ ##©©©ÈÏÏGII âââðçŸÂÉÉ &L€¹¹9áááoooid°»»»Üƒˆî"33×®]Czz:RSS‘™™‰‚‚TVV"22yyyèÝ»7BCCacc¥R)µsooo´oß¶¶¶r j‚X l†´Z-Ο?èèhœ9sqqq8þ<Š‹‹abb///½Â]ëÖ­áææv×Q€—.]tèÐAïñŠŠ iÔ¡nbVV’““‘””$$ÓÒÒ Õjagg‡àà`„††",, ýû÷GPP‹†DF.;;[jÏÉÉÉR‡%55YYYHMMEaa¡ÞkôŠi5Úìííaaa!è³³³ƒ ,--¡T*ann{{ûzeüá‡0f̘».§ÕjQPP€òòr”””HEÊüü|é±ââbÕZØÔÝÊÊʤßkbbwwwxzzÂËË îîîÒè«¶mÛJë^kkë{ûãÕCbb"vî܉o¿ýgÏž­sYÝ΄ T*¡P(““ƒV­Z¡¢¢EEEP©THKK“Ú»V«­õwzxx`üøñ˜4i~øan׉@qq1þøãDGGãܹsøë¯¿pãÆ:Û`}¸¸¸ 44ÁÁÁèÝ»7ÂÃÃáííÝ@©‰è^¥¤¤ ** ÇÇùó眜œú&&&ðóóCçΊþýû£oß¾,Ò]±@Ø hµZÄÆÆâСCRG¢  ®®®èÚµ+ºté‚„„„ 88–––²ä,++Ã… ‡øøxÄÅÅáìÙ³¸uë”J%úõë‡~ýúaÈ!èÞ½;ç#20µZÄÄD$$$àêÕ«ÕFëF ›ššÂËË >>>RÌÃÃCEçéé‰V­ZÁÃÃCš'¥9+))‘Š'ºÑ“™™™HKK“ ¨éééÈÎΖ^ãáá! uEÃöíÛ#((žžž2~jj´Z-öîÝ‹µk×âðáÃ5.Ó©S'ôïß]»vEhh(BBBàààpÏïUYY‰K—.!..çÎÃñãÇSí”ðóóÃüùó1{öl(•Ê{~/¢–(!!»wïÆ¾}û¤Ó… ! Æ øqãÞ"¶ÝDr©¬¬DTTvíÚ…C‡áÚµky_333ôèÑcÆŒÁc=†N:ä}©ia°‰*..ÆÁƒ±ÿ~ìß¿™™™h×®úõë'!h ^‹/â÷ßGtt4Ž;†ÄÄDxzzbÔ¨Q5j†ÊùÀˆPYYpñâEœ?—/_FBB._¾ŒŠŠ ˜˜˜ÀÇÇG¯x¥»ß¦MøøøpžÑû R©ô ®Uoׯ_Gnn.ÀÉÉ :uBÇŽѱcG¡S§Nðóóã’¨ÕjlÙ²ï¾û.õž³±±ÁÈ‘#ñÈ#`ðàÁÒÕ‰ƒJ¥Btt4~þùgDFF"%%Eïy[[[üë_ÿÂ’%KàêêÚh9ˆšªÌÌLlÞ¼[¶l‘ÎÖ¹“¥¥%:uê„   ¡uëÖðòò‚——”J%¬­­aaaôôt´iÓÅÅÅÐh4ÈËË“X¥§§ãâÅ‹¸páâãã‘——Wã{999áñÇÇܹsÑ­[·ÆüèD-Ê©S§°aÃìÚµ«ÖöçììŒÎ;#88:t€———tÐÝÉÉ ¦¦¦°³³CRR’4Ú¿´´yyyHOOGZZnÞ¼‰ .H·šâ·NŸ>O>ù$§ „MˆZ­ÆÁƒ±uëVüïÿƒF£Aÿþý1zôhŒ=íÚµ“;bƒ¸rå öíÛ‡}ûöáØ±c033ÃØ±cáÇs~#¢{P\\Œ¸¸8ÄÆÆâÌ™38}ú4Ο?µZ KKK¢cÇŽÒŽ‡îgžkxÙÙÙRá6!!.\À¥K—œœ !†nݺI·ÀÀ@®[ ]»váÕW_Õ+&( 6 3gÎĘ1cd9H«Õâ?þÀöíÛ±uëV½‹ ÙÙÙáÅ_ÄK/½ÄSœˆpû¢|ð¾ÿþûj;ð666Ç€0`ÀôèÑ£ÁGõ]½zQQQøí·ßpøða¤¦¦V[¦Gø÷¿ÿÉ“'s[CtÔj5vìØO>ù§Nªö¼··7Œððpôïß¿Á÷ç+++qêÔ)DEEIí½¤¤DoKKKŒ;/¿ü2ÂÂÂôý©éa° HLLĺuë°uëVܺu ááá˜6mÆŽ GGG¹ã5ª‚‚|ÿý÷زe ~ûí7¸¹¹aêÔ©X°`Ú´i#w<"£¢‘{ìØ1DGGãÔ©S¸|ù24 <<<†‡zݺuChh(üüüØáoT*.^¼ˆ3gÎàÌ™3ˆE\\JKKagg‡ÐÐPôêÕ  @¿~ýàææ&wdj$)))˜;w.öïß/=fccƒY³fáÙgŸEÇŽeL§¯  ›7oÆš5kpýúuéq???lذC‡•1‘|N:…eË–aß¾}zÎrppÀ#<‚qãÆaøðáÕ.ÆÕ˜„8yò$vïÞÈÈÈj£’;tè€W_}œ[”¨4 ¾þúk,_¾—/_Ö{®]»v?~<ÆoðiµJJJðã?bÏž=øßÿþ‡¢¢"é9…B1cÆàÍ7ßd¡°cЈýñÇXµjvïÞ <óÌ3˜2e |}}åŽ&‹äädlݺ7nDJJ Ƈ^x½{÷–;‘,Ôj5Ξ=‹èèhDEEá÷ßGvv6-± IDATœÑ·o_tïÞ]iÆ È›µZ‹/J#COœ8!ÍWÕ±cGôïß_ºñ`Jó°iÓ&¼ð ҅€,,,ðÔSOáÕW_E«V­dNW»ŠŠ lÚ´ o¿ý6ÒÓÓÜÞ ™9s&V¯^Í)D¨Å¸uë/^ŒM›6éä>¾1´!>ŒO?ý{÷îEee¥ô\÷îݱnÝ:ôèÑCÆ„DÆíĉxæ™gpúôié1sss<ú裘;w. dSÆã믿ƧŸ~ªwa3Ìž=ï½÷\\\dLH²dtŽ?.(ˆ^½z‰;w µZ-w,£QYY)vìØ!zöì)ˆŠ?ÿüSîXD‘šš*6nÜ(Æ/áíí-¦L™"þïÿþOÄÇÇ F#wL’J¥GŽo¾ù¦+öïß/T*•Ü1é©T*1}út@º >\\¹rEîh÷D¥R‰×^{MXXXHŸ£cÇŽ"!!AîhDnÛ¶mÂÕÕU¯‡‡‡‹_ýUîhuJNNÏ<óŒ^»511óçÏçö„è*•JÌ›7O˜˜˜HíÅÒÒRÌŸ?_ܼySîxu:|ø°0`€Þ:ÊÕÕUlß¾]îhd`,‘ .ˆ±cÇ …B!ú÷ï/Ž;&w$£wìØ1ñðà …B!Æ/.]º$w$¢¥V«Åo¿ý&/^,„B¡ŽŽŽbüøñbãÆâÚµkrG$#UYY)þüóOñÎ;ïˆ>}úSSSacc#FŽ)Ö¬YÃïN˜˜(ºté"uÖœœÄÖ­[åŽõ@âââD=¤Ïdgg'víÚ%w,¢FQTT$f̘¡·Ó&Ž=*w´{’œœ,fΜ) …ô9‚ƒƒE||¼ÜшŒB\\œ ’Ú‡B¡³fÍ2úÂà~ùåѵkW½uÖÌ™3EQQ‘ÜÑÈ@X 4åååâÍ7ß"$$DìÛ·OîHMÎÞ½{Epp°°°°o¿ý¶¨¨¨;щ‰‰ÿþ÷¿E«V­"^zé%%*++åŽGMPNNŽØ±c‡ˆˆˆnnn€èÙ³§øøãEZZšÜñè.\^^^R½[·n"11QîX ¢¬¬LÌŸ?_úl¦¦¦bÆ rÇ"jPqqq"00Púž;::ŠO>ù¤IoÃ;&BCC¥Ïdmm->ÿüs¹cÉjãÆÂÊÊJj¡¡¡"::ZîX÷­²²R¬ZµJ:SI7âŸZevêÔ)"lmmÅÇÌS€Z­~ø¡°±±]ºt§OŸ–;Ñ=IHHK—.íÚµDçÎÅòåËÅõë×åŽFÍŒF£Gÿú׿„³³³055ƒ ›6mùùùrÇkñâââôNG;v¬(--•;Vƒûì³Ï„©©©4ÚbÕªUrG"jGŽJ¥RjÃ}ûöIIIrÇjâÅ_ÔMøÚk¯ ­V+w4"ƒÒjµbñâÅz£_zé¥f3Påúõë¢OŸ>zg14µÑÏtïX ”ц „¥¥¥4hOõj@W®\áááÂÒÒRlÚ´Iî8DuÒh4â‡~C‡ …B´mÛV,^¼XÄÅÅÉZˆŠŠ ±wï^1eÊakk+ìííÅüùó9eƒLnÞ¼)|||¤ù´iÓšõ<Ä;vìæ7S(âÛo¿•;ÑÙ»w¯°´´”Úðܹs›ô¨ÁÚìÞ½[X[[KŸsΜ9,R‹¡ÕjÅìÙ³¥ï¿Ø³gܱ\ee¥xê©§ôæTüá‡äŽEˆB”——‹§Ÿ~Z˜˜˜ˆ·ß~›ÓF ÕjÅo¼! …˜?~³9’CÍGAAøøãE@@€011#FŒ?ýô×$«‚‚ñÉ'ŸˆöíÛ …B!þùÏŠð{i EEE"$$DêˆGDD´ˆ¿ý®]»¤ÑH666"&&FîHD÷åðáÃRqP¡Pˆ·ß~[îHê÷ßÎÎÎÒ:kÑ¢ErG"2ˆ_|QúÞ»¸¸ˆãÇË©Q½õÖ[zÅШ¨(¹#Q#aÐÀÊËËŘ1c„££#«ï°gÏaoo/Æ'ÊËËåŽC$ÊËËÅÿû_áìì,ìííųÏ>Ë‘Zdt4Ø·oŸ4²õ¡‡GŽ‘;V³÷Ì3ÏHðAƒµ¨íÖ{ï½'}ö€€NˆNMNBB‚Þœ]ï½÷žÜ‘ âܹszŸ{Íš5rG"jT}ô‘ÞÜ¢-嬟ªÛiGGGqñâE¹#Q#P!È ***0yòdDGGã—_~Ahh¨Ü‘Z„³gÏbÈ!ÇŽ;`ff&w$j´Z-vìØÅ‹#;;/¼ð.\¥R)w4¢:ÅÅÅá•W^ÁO?ý„þóŸX¹r%·_àСC>|8„ðññÁ™3gàêê*w,ƒš6m¶mÛxúé§ñé§ŸÊœˆ¨~ÊËËÑ»woœ={ðÊ+¯`ÅŠ2§2œ#GŽ`Ĉ¨¨¨€¥¥%Ž?ް°0¹c5¸ØØXôíÛ°±±Áþýû.w,ƒyþùç±jÕ*@·nÝpüøqXXXÈœŠ’‰ÜZ’ùóçã·ß~ÃÁƒ¹se@]»vÅÁƒqäÈÌ›7Oî8Ô]½z}úôÁ´iÓ0dÈ\¹rï¼ó‹ƒÔ$„††âÇÄ‘#G°°0<ÿüó(++“;Z³¡V«±`Á! P(ðù矷¸â ¬Y³>>>€Ï>û gΜ‘9Qý¼þúëRqpøðáxï½÷dNdXƒ Â'Ÿ|àv±4""ååå2§"jXºïvEEà“O>iQÅAøðÃ1xð`ÀéÓ§ñúë¯Ëœˆ „²mÛ6lݺû÷ïG×®]åŽÓâtëÖ ?þø#¶lÙ‚í۷ˇZÍ›7#,, qqqØ´i¼½½åŽEtψ“'Oâ‹/¾À_|^½záüùórÇj¶nÝŠK—.fÍš…áǘHJ¥Ÿ}ö@¥K—Êœˆèî®]»&¨qwwÇ—_~ …B!s*Û;w.{ì1@BBÖ­['s"¢†µvíZi[=~üxÌ™3GæD†gbb‚­[·ÂÍÍ °jÕ*$&&ÊœŠO16€«W¯¢k×®X¹r%æÏŸ/wœmÍš5X²d âââàçç'wjÆJKK1kÖ,DFFbáÂ…xçw`nn.w,º………ppp;†Q¹qã¦NŠÓ§Ocݺu˜9s¦Ü‘š,!qõêUX[[ãÊ•+-þ ÂÀqôèQ·§ éÒ¥‹¼ˆê0iÒ$ìܹÀíbÿÔ©SeN$Ÿììl´k×………pqqÁµk×àèè(w,¢–——‡€€äååÁÑÑW¯^m‘#ýu¶nÝŠéÓ§¦L™Â8ÍGÀâÅ‹1dÈF+æççÃÕÕßÿ}ËiµZ¼÷Þ{X²d úõ뇠  ÄÇÇ7J¦{UßÏð ,X€ððp¼òÊ+ú>Ô²`øðሊŠÂ¡C‡°råJÙ‹ƒ†jc÷ʘ×K«V­ÂàÁƒ¢hlÿmÛ¶ÅÑ£GñÒK/aöìÙ-j®­†öûï¿ãêÕ«€yóæ¬8Xõ;¥V«±dÉ$''׺Œ!-_¾\º¿e˃¾7ѽHMMÅ®]»=ô"""dN$/777©““ƒ¯¿þZæDD cÛ¶mÈËËp{ßÞú†rŠˆˆ@·nÝ‘‘‘ÈÈÈ95Ù©S§ð¿ÿý+W®l´÷033CNN ë\îÃ?Ä'Ÿ|‚åË—cÿþýhÛ¶-òóó-×½¨ïgh|ðöìÙù¨QTTT`ܸqHIIÁ±cÇ0hÐ ¹#0l»Ƽ^zöÙg‘€ÊÊÊF¯7nÔù¼1þÿ™™™á­·Þ¦M›ðÚk¯aýúõrGj’t;Ð … óäVýN™™™áÕW_Å‹/¾(+ï\Æúöí+]à`ûöíÐh4}¢úúꫯ¤ïç²eËZä©ÅwúÏþgggÀ¦M›dNCÔ06oÞ puuÅ¿ÿýo™ÓÈÏÄÄo¾ù&€Ûó(ó`^óÁa#{óÍ71yòdtèСÑÞÃÎÎ^^^ ¬s¹õë×C©TB¡P@©TâÀèß¿£åºõý ¡cÇŽ˜8q¢´R#jHo¿ý6âããqðàA´k×Nî8C¶±{aÌë%333ƒœZœ’’"¦Qcýÿ€™3gbÍš5øÏþÃ/÷HÝ»wúô都÷ß)[[[¬\¹=ö j\Æt§ifff"::ÚàïOTºÓê|||ðÏþSæ4ÆÁÆÆO<ñ€Û1HHH9у¹pá‚t¢ˆˆXYYÉœÈ8Œ1B:ë§7,6¢üü||Xº?~üx“333<ú裀¸¸8dggËœˆ·dhÿþý°´´”.Þ˜žþyØÙÙոܾ}û0wî\hµZddd`îܹxê©§ðÓO?á•W^A@@Ñ¥K¸¸¸ -- YYYX°`žþy¼ôÒKèÛ·/ž~úi¤§§T*¾ûî;Ìš5 ?ü0¶mÛ'''øûû#&&QQQèÝ»7ÌÍÍ,u¹ßÏÐІ KKK8pÀ ïG-òeËðøãÍiÅwª­i4DGG׺>(**²eË0gÎ 0}ûöʼn'¤×ãå—_FDD/^Œ'Ÿ|ï¼ó:uêTk–¦²^Ò¹pázöì 333tîܧN’ž»[.àvñxàÀX¾|9–,YSSSaݺuˆ‹‹“þ÷óÿg,Þÿ}ܸqCš‹î.&&Fºß»woƒ¿Mß©Q£Fá‹/¾ÀåË—k]ÆBBB`cczíÈX?~\ºß·o_“ŸîÝ»Ks/ÿñÇ2§!z0ºï°……zôè!sãÒ¯_?é>æ5‚Í‚ Ä€äŽ! :tè „¢¼¼\ÄÆÆ @¬ZµJ=zTL˜0A\¼xQ´mÛV¼ûî»ÒkóóóE§N„···HMMF¤§§ ÂÉÉIüúë¯"==]˜›› ooo±jÕ*QVV&._¾,ÌÌÌŒêï Ó¯_?ñÜsÏɃš‰ÌÌLabb"Ž9"w”{V×ú ;;[Œ9R¤§§KËOš4I899‰¼¼>^Z¾õÖ[rÇ¡Àa#7nœ˜4i’Ü1$5í„ ¢¤¤Dzì…^Ä­[·ô–ݱc‡ æÍ›'„B«ÕVûÕŠþþþÂÚÚº¡?Λ8q¢?~¼Ü1¨™øî»ï„©©©¨¨¨;Ê}«i}pàÀiÃçm×®]båÊ•€¸páB¿ënŒ}½¤+j4é±6mÚSSÓ{Ê¥T*±nÝ:¡ÑhÄ… DAAA­ƒ¦ê믿–––z/ªÝ³Ï>+µ§¬¬,¹ã!„ÈÍÍÄСCåŽ"(www¹£U³`Á©ýêøÐßÂÃÃááá!w¢âîî.ˆððp¹£ŒŒ i=È7ÍO1nDÙÙÙðôô”;FtW[³¶¶–‹ŠŠ8::ê-«›I7YxMWj333«ö˜¹¹9JKK$oCjÕªU½æý"ªŒŒ 8;;K§Ô4E5­Ž?Ž®]»BÜ> ¤w7nöîÝ Õæ\|+9ãz©êÜRVVVÒU+ë›kÕªU055żyóгgOäååä(†æåå…òòräääÈ¥IÈÍÍ•î;99ɘäoööö wм\”J%ý¿‘±¨ú½Ô]µ—þ¦Û.r{@M®­ßÙ×#ÀÅÅEºÏ¶Þ<°@؈ìììPXX(wŒ{¦›`þÆzë:?º9šº‚‚iGˆèAÙÙÙ¡¨¨Hî N£ÑàÊ•+(//¯ñ9Ý…~{]g¬ë¥úæš1cNž<‰Áƒ#66ýúõÃÇlЬ† û>4Çâgc¨ú½U©T2&ùÛƒ÷šîobÌsoRËUµý˘Ä8•””ûÚÔäé¶Aºï4ý­ê¾Os©´t,6"///¤¤¤Èãžé.ªòóÏ?ë=®û,£G6x¦Æ’’///¹cP3Ñ­[7”••áâÅ‹rGiPÁÁÁP©TX·nÞãéééX·nÚ·oàöEGªjè+Ùëz©¾¹V®\‰°°0üòË/ؽ{7 –.]*-/„0PâÆ‹XZZÊ¥I¨:êÈX޼ë ÞÞÞ2'ùûoRu„‘±puu•îߺuKÆ$ÆI×~«þˆš"Ýw˜£Ù««ºîsww—1 5Qpp°Ñ *++@:-NG·_õñE‹! |ðòóó¥Ç?ýôS„……á?ÿùÞk«îØÖôûjZNnB\¼xAAArG¡f"$$;wÆ7ß|#w”ûVSû}ôÑGáëë‹… báÂ…øá‡°víZ̘1Ó§OǼyó/¾ø"víÚ… .`ýúõÈÌ̼ëû5…õ’î5U_«V«¥ßQß\}ôòòòcÇŽ…Üîxfff"--­Î,ÆN«Õ"22“'O–;J“Ѻuké¾±üÿ§¦¦çªÊwÒÚ}||dNBT]@@€tÿúõë2&1>Z­×®]øûûËœ†èÁøùù¸ÝÎiÖ$&&J÷u'jÚX lD£GÆÍ›7qæÌYs\¾|ï½÷€Û+¶O?ý±±±Xµj•tZܲeËpá·Ô?~#GŽÄÈ‘#ñòË/ã¹çžp{¾-[[[dggã¿ÿý/€Û#‰¢££qìØ1©3ÿþûï#77_}õ’’’7n4š#¬gΜAJJ yä¹£P3òÚk¯aõêÕF³£__*•ªÖõ­­-:„!C†àÿþïÿ0mÚ4œ8q[·n…££#† ‚¯¾ú ööö˜,Vñá‡báÂ…€#GŽ`àÀ2'"ºGŽ‘¦”ùè£ðüóÏËœÈxôêÕ 'Nœ€»»;222Œjcº?AØÈ^yå|úé§ÍòâMUaa!6lØ€—^zIî(ÔÌ( |ñŸté¦OŸ.BKÔœ]¹r#GŽÄàÁƒY¼G2d€ÛóXÊÝWøë¯¿oÐÙ±cÀÒÒC‡•9 Qu …cÇŽ?~œ#‹ªØ¼y3ÀÃÃýû÷—9 уéß¿¿4¿Þ_|!sãñ×_Ig?Œ7ŽÅÁf‚ÂFWWW|ôÑGrG¡ÿï¿ÿý/<==!wj†¼½½±gÏüüóÏxì±Çšä•ÌBEEãš{”ÞÉ“'Ñ¿øûûcÛ¶mrÇi’tÛ¢’’lß¾]¶999X²d 8'''Ùr·çcüñÇÜž®ÅÑÑQÖ1‰ñøá‡¤éA¦OŸ333™=sssL:Àí3&8 s"ã°råJé>7,62¼ÿþûx÷Ýw+wœïäÉ“X¹r%V®\ ~ý©qôìÙQQQ8sæ ºtéÒ¢N;ºzõ*V¬X!MØþî»ïB¥RÉœŠšV«Åûï¿~ýú¡W¯^øé§Ÿ`cc#w¬&éÑG…½½=à½÷Þ“Šë†TYY‰7bË–-z^Ë»ï¾+ý¦M›&s¢ÚõîÝ¡¡¡€-[¶´ø©kÔj5-Z055Åœ9sdNDÔ0æÌ™#í;.Z´¨ÚöZš3gÎH5»uë†îݻ˜ˆ +$0jÔ(Ìœ9&L&ì%ÃËÏÏÇĉ1kÖ,Œ9Rî8ÔÌ…††âܹs Á?þñ¼õÖ[ÒÕo›³víÚaÑ¢EB@W_}UºP5iii6l^ýu¼ûî»Ø³g¬­­åŽÕdÙØØà_ÿú )) 6l0xsss,Z´Hö‘ƒÀí¿ÁÆí۷ǨQ£dNDT·åË—¸}àdþüù-ºp°zõj½Ñƒ2'"j:uÒE¸víZ™ÉG£ÑàÙgŸ…V«ð÷:š^¤Ä@JKKÑ»wo(•Jüøãia`*• #FŒ@QQŽ?+++¹#Q !„Àúõë±páBàý÷ßLj#äŽEtÏJKKñÑGáƒ>€§§'¶oߎnݺÉ«YÈÎÎF@@ŠŠŠàè舸¸8øúúÊËàtz:tè`Û¶mœ„š„ÁƒãÈ‘#€eË–aéÒ¥2'2¼Ó§O£OŸ>¨¨¨€­­-.^¼¹c5˜äädtìØ¥¥¥°´´DLL ºté"w,ƒ{ã7°lÙ2·×}¿üò‹Ì‰¨!q¡X[[ãûï¿Gbb"F’’¹#µ%%%9r$’’’°{÷nÉ  æÍ›‡³gÏ¢}ûö9r$ˆ“'Oʨ^Ôj5>ÿüs`ÅŠxñÅqúôi›››tZ^AAfÏžÝ"çïüôÓO¥â`÷îÝ¥«»Ï?ÿ€·ÞzK*¶ÙÙÙ˜4i’45À‡~Èâ 5;¾¾¾øðÃååå˜4i²³³eNeXF *•J|þùç2'¢†Æ¡ùûûã×_ÅåË—1bĈ·B‘Cff&† ‚ÄÄDüúë¯ðóó“;µPؽ{7þøãTTT W¯^;v,Ž=*w4¢©T*lذ;wÆüùó1vìX\»v K—.å(øFðòË/£gÏž€_~ùo¼ñ†Ì‰ +&& .XYYaË–-œ+˜š ???¬_¿ÀíÓïÆŒƒ˜˜™SFII ÆŒƒÿ×ÞÇUU'þ_VpAÜDPqiQs©qÅ%Ó‘Ô¦œ,¿eéLfßúŽK5Úf‰i𦥕¦i¸´©[˜ë(Š[¸€ˆ ˆ²Èz~ð󎌖Xȹx_ÏÇÃGtÎ}xßáÀyßÏrüøqIÒÃ?¬1cƘœ ¸3ž~úi…‡‡K’Ž9¢~ýúÙÍÀŸØØX 2ĺŒÂ¼yóÔ°aCsC¡Ìñ›W9kÚ´©6oÞ¬äädµoß^{öì1;Ò]kçÎêСƒÒÒÒ­Æ› Ðý÷߯mÛ¶iõêÕJKKÓ< 6mÚè_ÿú—®^½jv<@§NÒ‹/¾(????^]ºtQ\\œæÌ™£:uê˜ï®åää¤Å‹[×ì|íµ×´téR“S•'N”¸Ézã7`r*àö >Ü:øZi¶k×.“SÝY—/_VŸ>}¬eè½÷Þ«Å‹›œ ¸³/^¬Ž;J*.ÍÂÃÃuåÊ“SÝY;wî,1 ròäÉ:t¨É©p'Pš Y³fÚ¹s§¢9sæXùÄWTT¤ÈÈH………)88X±±±”ƒ°9ýû÷Wtt´öìÙ£öíÛëþçäçç§ &0ýå.''G+W®Ô€Ô¤I}öÙgš8q¢Îœ9£?üPM›65;¢]ð÷÷ײeËäèè(Ã0ôøãëË/¿4;ÖuêÔ©³*ž~úi7ÎäTÀï3}út=ýôӒЧÝ>ðÀúæ›oLNug$‚¹> IDAT''«K—.֙͛7WTT#Ìq×sww׺uëÔ¬Y3IÅ£þÃÂÂtîÜ9““ÝëÖ­S·nÝtñâEIÒØ±c5eÊsCáÎ1`š‚‚cÊ”)†³³³b9rÄìHÞáÇN:...Æ´iÓŒÂÂB³#¥rþüyãŸÿü§`H2š5kfLž<Ùˆ7;îRÆ7ß|cŒ5ʨ^½ºáììlôîÝÛX¾|¹‘ŸŸov<»6wî\Ãb±’ GGGcÑ¢EfGº#âããúõë’ IFÏž=ÜÜ\³cHQQ‘1~üxë×µ³³³ñæ›oEEEfG+3[¶l1üüü¬¯±cǎƹsçÌŽ”«ääd£}ûöÖï???cëÖ­fÇ*3………ÆŒ3 '''ëk|á…îªknDAhöîÝk´k×Ψ\¹²ñòË/—/_6;R…séÒ%ãå—_6*UªdtèÐÁØ¿¿Ù‘€ßmß¾}Æ‹/¾h4hÐÀdÿøÇ?Œ;wRzã¹r励zõjcôèÑF:u ‹Åb„††sçÎ5.\¸`v<\çƒ>0 I†Åb1^y壠 ÀìXeæ»ï¾3jÕªe½éèׯŸ‘““cv, ÌÌœ9³Äõƒ>hœ8qÂìXHvv¶ñÊ+¯”x]}úô1®\¹bv4ÀW®\1zõêeý~prr2þñÙÙÙfGûCŽ;fôìÙ³Ä늌Œ4;Ê¡ÈÏÏ7Þ~ûmÃÃÃèU«–ñÞ{ïñ.z)äææï¾û®Q«V-ÃÓÓÓ˜9s&#_p×(**2¶lÙbŒ?Þð÷÷7$µk×6}ôQãóÏ?7RSSÍŽˆ àðáÃÆÌ™3îÝ»...†‹‹‹Ñ­[7ãí·ß6N:ev<ü†¥K—®®®% †¤¤$³cý!ùùùÆÔ©S GGGëë9r¤‘——gv4 ÌÅÄÄ”%[¹recêÔ©²<ˆŠŠ27n\bdäŒ3M»WXXhLŸ>½DqÞ¤IcݺufG»mÙÙÙÆäɓʕ+[_Kýúõ-[¶˜ å„‚ÐÆ¤¥¥/¼ð‚Q¹reÃÏÏÏxóÍ7ôôt³cÙœôôtã7Þ0üüüŒÊ•+'Näó„»ÞñãÇÙ³g½{÷6*W®l8::íÚµ3Æg¬X±ÂHNN6;"LVXXh8pÀ˜3gŽ1|øp£^½z†$ÃÛÛÛ=z´ñå—_—.]2;&nÃÏ?ÿ\¢`ððð0æÏŸ_!oÊþùg£M›6% †Y³f™ ¸£.^¼h 0Àúu/Éðõõ5fÏž]!FÍþðÃFHHH‰ü76vìØav4À¦ìر£D‰.ÉèÒ¥‹±iÓ&³£ÝRNNŽ1kÖ,ÃÇǧDþ2 ÁÎX Ã0ÊrMC”3gÎhÖ¬YZ°` ÃÐc=¦±cÇÊßßßìh¦Š×œ9s´hÑ"9;;kôèÑ7nœüüüÌŽ”«ììlEGG+&&F[¶lÑ®]»”››«fÍš)$$D¡¡¡j×®Zµj%'''³ãâÉÈÈÐÞ½{µsçNmݺUÛ¶mSZZšj×®­N:©K—.zàÔ¶m[Y,³ãâwJMMÕˆ#´aÃ뱎;êŸÿü§zôèab²Ò9uê”&Ož¬O>ùĺ)›ŸŸŸ–-[¦“ÓåãË/¿Ôøñ㕘˜h=æë뫱cÇêñÇ—···‰éJÊÍÍÕªU«4gÎmÛ¶ÍzÜÅÅE&LÐ+¯¼bÝqÀdffêµ×^ӻᆱ¼¼<ëñÐÐP;V”«««‰ KJNNÖÂ… 5wî\={Öz¼~ýúš9s¦lb:˜‚ÐÆ]¹rE .ÔìÙ³uâÄ ÝsÏ=1b„†ªÚµk›¯\œ?^_|ñ…/^¬Ý»w«iÓ¦zöÙgõ—¿üEÕªU3;`rrr´k×.ÅÄÄhëÖ­Ú¾}»._¾¬J•*)((HíÚµSûöí¬Ö­[ËÅÅÅìȸM©©©Ú³gO‰?'Nœajذ¡BCC­å°¿¿?…à]Æ0 }üñÇúÛßþ¦ôôtëñîÝ»kÒ¤IêÞ½»Íý›Ÿ8qB³fÍÒüùó­7J‹Ecƌь3äîînrB |effjÆŒš5k–233­Ç®ˆˆ=ôÐCª\¹r¹g3 C»víÒ_|¡%K–Xw—Š¿oû÷ï¯3fØý` 4:¤—^zIQQQº¾n©S§ŽFŽ©GyD:t0åçvvv¶¾ûî;}úé§úú믕ŸŸo=W­Z5=ÿüóš8q¢ÜÜÜÊ=ÌGAXA†¡-[¶è“O>ÑŠ+”­nݺ©_¿~êÓ§6lhvÄ2• uëÖ)**J›7oV•*U4tèPEDD(44Ôæn‚[STT¤cÇŽÝP(]ºtIÎÎÎjÑ¢…Z¶l©-Z( @þþþò÷÷7å¦%]¸pAqqqŠ×‘#GtèÐ!>|XgΜ‘$5jÔÈZö^+~íå #H)))úÛßþ¦Ï>ûÌ:O’4nÜ8 6ÌÔâ­°°P7nÔœ9s´víÚÛ¶m«Ù³g3jv/55Uï¼óŽæÎ«ŒŒŒçªV­ªÞ½{«W¯^ UÓ¦MïXŽôôtmݺU7nÔW_}¥Ó§O—8ïàà þýûkòäÉjÛ¶íËÜ­öìÙ£iÓ¦)**ªÄÏC©x”ÞÀÕ£G…„„ÈÃÃãŽå8~ü¸bbb´~ýzmذAÙÙÙ%λ»»ë™gžÑ„ T£F;–¶‚°ÊÉÉÑúõëµfÍmذA/^T`` |ðA………©sçΪY³¦Ù1oËÅ‹µmÛ6ÅÄÄèÛo¿U\\œj×®­^½z©ÿþêÛ·¯M Ç*ª_~ùE{öìÑtøða>|XGU^^ž,‹4h 5oÞ\ 6TÆ Õ¨Q#5jÔˆÑ>eÄ0 %''+!!A¿üò‹Nž<©“'O*>>^ñññJKK“$yzzªeË–Ö·mÛ¶j×®<==M~°û÷ï×Ë/¿¬uëÖ•8îêêª^½zièСêÝ»·ªW¯~dzjÇŽúâ‹/ôå—_êܹs%Î7mÚT¯¾úªyä988Üñ<@E‘™™©Ï?ÿ\óçÏ×®]»nú___uîÜYAAA P«V­äçç§*Uª”úy tîÜ99rDqqq:xð bccuðàÁJ IòööÖã?®¿þõ¯wÝ À Z°`.\¨”””Î;88¨uëÖêØ±£¨æÍ›ËÛÛû¶– ÊÎÎÖ™3g§¸¸88p@Û¶m+1}øz;vÔ˜1c4lØ0– €$  ¯°°P±±±Z»v­6nܨ={ö¨°°P-[¶TçÎÕ®];)((¨\nJ###CÐÁƒµgÏmݺUñññrttTûöíÕ½{wõíÛW÷Þ{/7@9(,,TBB‚u¤ÚÑ£GuôèQ%$$èìÙ³Ö©žžžÖÒ°Aƒòõõ•———êÖ­«ºuëÊËËKµjÕ2ùÕ˜+??_çÏŸWrr²Î;§ääd%''+))ÉZž:uJ¹¹¹’ŠËœ ¨qãÆjÑ¢…u$g«V­ˆRÙ³gfÏž­Ï>ûÌúuu“““õçj—.]Ô¦M›2Yç,++KÔŽ;´iÓ&EGGëòåË7<.44TÏ<óŒ ÄZ¨À->|X+W®ÔªU«´wïÞ[>ÞÝÝ]¾¾¾rwwWÕªUåêêª*Uª(??_™™™ÊÏÏ×¥K—”’’¢óçÏß´¼ž···  Áƒ+,,LÎÎÎeõÒüùùùÚ¼y³V®\©Õ«Wëüùó¿ùxyyy©N:òðð³³³ÜÜÜäìì¬ììlåææ*++KJJJºaDòÍkРA>>òññ±ŽÄ¼~dfݺuY2eâÂ… Z²d‰–/_®;wþêã¼¼¼¤úõë«^½zòõõ•§§§õ†ÃÃÃCYYYÖr!''G‰‰‰JNNÖéÓ§uøða?~üWË___ 2D#GŽTppðz¹À]-11QÑÑÑŠŽŽÖ¶mÛtäÈ–ésÔ­[W÷ß¿ºtéb}7æòSXX¨ýû÷+&&FÑÑÑŠUrrr™>‡“““š7o®ÐÐP…††*,,LõêÕ+ÓçÀÝ…‚Ð$%%éàÁƒÚ¿¿:¤'NèäÉ“:{ö¬õ|GGGÕ¨QC5kÖTÍš5U£F UªTI...ÖÒîÚ† W®\‘T\FæååéêÕ«JKKSjjªRSS•––fý%ÆÁÁA>>>jذ¡š4i¢€€µiÓFAAAòññ1á³ ¬¥¥¥éܹsJIIQRR’Ο?oýïÍ µëC¾ž›››\\\äááa½ö\+-þ{Z­‡‡G‰bÍÁÁÁ:º  Àzº&77·Äz+yyyÊÊʲ^Ç®åÊÌÌ´¾#{3U«V½¡ä¬Y³¦|||T§N£*½½½YÓ¦HHHЪU«´iÓ&ÅÄÄ”Ø ¡¬9::*88XݺuSŸ>}BÉ”±«W¯êðáÃ:xð Ž=ª3gÎèܹsÖC×F fffZ~ºººªzõêòòòR½zõäíí­¦M›ªU«V ¬pËö 55UСC‡tìØ1¥¤¤(11Q)))ÊÈȰþþš——gýÙÍÍMîîîªW¯ž¼¼¼äçç§-Z¨U«V `™.Ü B;–——§S§NéôéÓºpáB‰‚/55U¹¹¹ºzõªrrr$IG•$5oÞ\’T¹reUªTI®®®ÖRñZÁxíâT¿~}vKPBfff‰â033S999º|ù²òóó•‘‘a-ô®\¹bÉtMQQÑ S(òóóuîÜ9?^Mš4¹a¡gGGÇË,\û…ªJ•*ruu•»»»\\\T­Z5ëµÍÝÝ]nnn%F;r=CESPP ]»viïÞ½Ú¿¿öíÛ§øøøRMCúo®®®jÒ¤‰‚‚‚Ô¶m[µmÛV÷Ýwß]X僂¥!IZºt©ÉIàFË–-SDD„ø±ÜZff¦Îœ9£¤¤$¥§§[G$dddÈÍÍMŽŽŽòôôTåÊ•åëë+yyy™w+FØ777µlÙ’Ê ‰„€]ciÀŽQvŒ‚°c„€£ ì!`Ç(;FAˆR‹ˆˆPDD„Ù1খ-[&‹Åbv ¨p(;ædvT÷Þ{¯ÙàWÕ¯__C† 1;T8Ã0 ³C0SŒ;FAØ1 BÀŽQvŒ‚°c„€£ D©ÅÆÆ*66ÖìpS§OŸÖŠ+ÌŽŽ“ÙPqDFFJ’–.]jr¸ÑÖ­[!Ã0ÌŽ #;ÆB”Ú¸qãÌŽ¿*$$DË—/7;T8ƒ¹X€ÝbŠ1`Ç(;FAØ1 Bü.þþþ=z´Ù1সF@éQâw©S§Ž<==ÍŽ7Å5 J]Œ;ÆB”Zdd¤"##ÍŽ7µuëV=òÈ#fÇ€ ‡‚¥«Ÿ~úIË—/רQ£Ô¥K†¡ŸþYÿû¿ÿ«&MšèСCêܹ³œ uëÖ™€8}ú´V¬XÁ5 n!n‹ÅbQ¯^½´dÉ?^EEEÊÈÈМ9sôË/¿hÑ¢Eš;w®–/_®ääd…‡‡k÷îÝfÇ`G¸FÀía BÜ6Ã0äàà -Z(>>^’Ô¢E =zTùùùrrr’$}ðÁ;v¬FŒ¡%K–˜€á·‡„¸m‹åW]»ñ–¤~ýúI’öïß_>Á@\£àvQâŽñöö–$¹ººšœnÄ5 ŠQâŽIOO—$õèÑÃä$p#®QPŒ‚·­°°P’TTTtùëmܸQ5Òßÿþ÷rË\£àöPâ¶deeiöìÙ’¤S§NéÓO?Õ•+W¬ççΫ˗/+11QGŽÑöíÛU£F ³â°3\£àö±‹1Ê„¿¿¿Ž9"¾œØ"®QðëAØ1 B”ZDD„"""nz®   Ä ¼-[¶L‹å¦ç¸FÀ¯£ Ä’••¥3f(!!A’ôâ‹/j÷îÝ&§€b\£àÖXƒ¥)I7nœÉIàF[·nUdd¤–/_nv¨P(;ÆcÀŽQvŒ‚°c„€£ ì!`Ç(Qj±±±Š5;ÜÔéÓ§µbÅ ³c@…ãdvT‘‘‘’¤¥K—šœn´uëVEDDÈ0 ³£@…ÂBÀŽ1‚¥6nÜ8³#À¯ ÑòåËÍŽŽÅ`.`·˜b Ø1 BÀޱ!njÙ²eš>>òññQÍš5åîî.‹Å"777IRff¦ ÃPFF†RSSuöìY%%%éäɓЋ‹S\\œNž<©›}i999©[·n=z´  ggç;þ9`ÛvíÚ¥ùóçkåÊ•JOO¿écjÔ¨¡ÀÀ@µjÕJ-Z´êÖ­+///yzzÊÑÑQnnn***RAAòòò”““£ôôt%''ëìÙ³:sæŒ:dý“——wÓçjÙ²¥FŽ©Ç\uêÔ¹“/lá]êâÅ‹š5k–,X ”””ç,‹uúoXX˜BBBäééY¦ÏŸžž®-[¶(::Z?þø£öîÝ{Caèíí­Ñ£GëùçŸgT!`g ôùçŸkÖ¬YÚµk× ç}}}Õ½{wuíÚU¡¡¡jÚ´i™>~~¾víÚ¥èèhEGG+&&FÙÙÙ%ãêêªjâĉ .Óç[BAx—¹xñ¢Þzë-Í;W™™™Öã Ó Aƒ4`ÀÕ«W¯\s%&&ꫯ¾ÒªU«£¢¢"ë9777=û쳚0a‚jÕªU®¹”¯ÂÂB-]ºT¯¿þºŽ=Zâ\Ó¦M5xð` ûì3IÅ7Ý#FŒÐ[o½UawÝ„Lbb¢ºwïnÝý300P6l¨PSõJãôéÓêÝ»·âââ$ÒÙ¼y³¼½½MNà·†¡¿þõ¯ú׿þ%IªR¥Š–-[¦ððp““•­‚‚;V ,T¼¦â—_~©¾}ûšœ n[ÅV êÝ»·µìÚµ«¶lÙrוƒ’T¿~}mÙ²E]ºt‘T¼˃>hÖÀ6½ð Ör°fÍšÚ¸qã]WJ’“““>üðCM:URñZªC‡-±™ TŒ ¬ Õ£Gýøã’ŠËÁuëÖ©J•*æ»Ã²³³Õ³gOmß¾]’Ô½{w}ûí·rtt49€ÿöî»ïj„ ’$wwwmÙ²EAAA&§ºóf̘¡—^zIRñëŽU‹-LN¥Ç bêÔ©Ör0((H«W¯¾ëËA©xzâúõë­%ÃÆõú믛œ ÀÛ½{·&Mš$©øûvõêÕvQJÒ¤I“ôüóÏK*é=|øpåå噜 J„ÀO?ý¤ÊÃÃCûöíSƒ ÌŽU®Ô¶m[]¾|YŽŽŽÚ¾}»î¹ç³cPñôÚ6mÚèÈ‘#’¤ hôèÑ&§*_EEEzðÁµqãFIÒ‹/¾¨3f˜œ J‡„6Î0 ?^………’¤yóæÙ]9(I5Ò| ©xºõøñãE· ؆÷ßßZÉ£ üIDAT<ØîÊAIrppÐ'Ÿ|¢ÚµkK’Þ{ï=ýòË/&§€Ò¡ ´q+W®ÔO?ý$I0`€†jr"ó >\ýû÷—$mß¾]kÖ¬19€ôôtë´wwwÍ›7ÏäDæ©[·®ÞyçIÅ£*_yå“@éPÚ¸ÈÈHIÅ;f2]­x3'''IÒ¬Y³LNàÓO?Uzzº$饗^R­ZµLNd®ˆˆµk×N’´bÅ ;wÎäDpk„6ìØ±cÚºu«$iĈìŠ)©eË–ÖQ”ÑÑÑ:qâ„ɉû¶páBIR­ZµôÜsÏ™œÆ|š2eŠ$©  @‹/67”¡ [¶l™u½'žxÂä4¶c̘1’Š×g\¶l™ÉiûuèÐ!íÛ·ORñȹJ•*™œÈ6ôêÕK¾¾¾’Ä5 @…@AhÃ6mÚ$IòööVçÎMNc;BBBT§NI²î  ü]ÿý7xð`“Ø'''…‡‡K’þýïëÂ… &'€ßFAh£òòòôóÏ?K’:uêdrÛb±X¬…éÏ?ÿ¬‚‚“öiûöí’$uìØÑä4¶åú7u¶mÛfb¸5 BuüøqåääH’Ú´icrÛ$IÊÎÎÖ±cÇLNاÿûß’¤fÍš1½ø¿´nÝÚúñµÏØ* BuþüyëÇ5jÔ01‰mº~§T¦ïæ¸xñ¢$©víÚ&'±=×NRSSML·FAh£222¬SÞ¨fÍšÖÓÓÓMLد´´4I’»»»ÉIlÏõ×( B¶Ž‚ÐFU­ZÕúqvv¶‰IlÓõŸ“ë?WÊ›››$®Q7såÊëÇUªT11 Ü¡º~Ô £OntýìëGê(?צú_Iˆÿ¸6ýZ’u×u°U„6ªaÆrp(þç9~ü¸ÉilÏ/¿ü"IrppPãÆMNاFI’d†Éil˵k”ôŸÏØ* BU£F µlÙR’´cÇ“ÓØžíÛ·K’Xÿ 0I§N$ <|ø°Éil˵k”$…„„˜˜n‚І]»©<|ø°NŸ>mrÛqòäIÅÇÇK’ºtébrÀ~]ÿý÷í·ßš˜Äö|óÍ7’Ч7oÞÜä4ðÛ(mØÃ?,I***ÒâÅ‹MNc;-ZdÎxís ü…††Z××[´h‘ÉilÇÁƒµsçNIÒ Aƒd±XLN¿‚ІuëÖM 6”$Í™3G™™™æ²—/_Ö¼yó$¯ëÕµkWsvÌÙÙY>ú¨$éÀZ¿~½É‰lÃo¼aýø‰'ž01 ”¡ spp°Þ\¦¤¤èí·ß69‘ùÞzë-ëÆO>ù$#s“=Úº¡Ò¤I“TXXhr"síÝ»WË–-“$µk×N:t09ÜšÅ`ëI›vùòe5kÖLçÏŸWÕªUµgÏ»]Ï*>>^íÛ·Wvv¶¼½½uìØ1¹¹¹™ °{£FÒ’%K$Iï½÷žž{î9“™£°°P]ºt±nP²aÃýéO29Ü#m\õêÕ5eÊIRVV–†®¼¼úÈäDPz„€ƒƒƒ>ùäÕ®][’4oÞ<½öÚk&§*?ÿ÷ÿ§ùóçK’¼¼¼´dÉÖlÌÓO?­ððpIÒ‘#GÔ¯_?ëˆß»]ll¬† b]qÞ¼yÖ ¦ "  ¬ êÖ­«¨¨(U©RERqiöæ›ošœêΛ1c†µ ­R¥ŠÖ¬Y#ooo“S¸™Å‹«cÇŽ’ŠK³ððp]¹rÅäTwÖÎ;Õ·o_k:yòd :ÔäTp{ؤ¤‚‰ŠŠÒÃ?l]‡ðùçŸ×;ï¼cÝEônQXX¨ñãÇköìÙ’$}ýõ×z衇LNà·\¸pA;wÖ±cÇ$IÁÁÁZ¿~ý]Yì¯[·NC‡UVV–$iìØ±š3gŽÉ©àöÝ]­’èׯŸ¢¢¢T­Z5IÅ;††‡‡+55ÕädeçÂ… ·–ƒÕªUÓºuë(  víÚŠ‰‰Qûöí%I{÷îÕ=÷Ü£mÛ¶™œ¬ìé7ÞЀ¬åà /¼ ÷ßßädðûPV@>ø 6oÞ,___IÒÚµkÕ¦MEEE™œìûú믬uëÖI’|}}£=z˜œ @iy{{ëÇT¯^½$IgΜQ×®]5yòdåä䘜î9~ü¸þô§?iÒ¤I*((“““"##õæ›o²6*€ ‹‚°‚jß¾½öîݫ޽{K’’’’Ô¿…‡‡ëäÉ“æ†ûNœ8¡~ýú)<<\III’¤>}úhß¾}jÛ¶­ÉéÜ.777­]»VÓ§O—“““ 4mÚ4iýúõfÇ»m999š2eŠZ·n­ï¿ÿ^RñîÍ›7oÖ³Ï>kr:øc(+°ÚµkkíÚµzçwäêê*©x^‹-4vìX9sÆä„·vêÔ)=õÔS ÐÚµk%I®®®š9s¦¢¢¢T«V-“ø½4iÒ$mÙ²E7–Tüf@Ÿ>}¦Í›7›œðÖ®^½ªÈÈH5mÚTS§NµŽ€8p öîÝ«“ÀÇ&%w‰#GŽè™gžÑ?ü`=æââ¢G}TO=õ”ugQ[±sçNÍ›7OK—.µn¸"I={öÔû￯æÍ››˜@YËÌÌÔk¯½¦wß}·Ä÷|hh¨ÆŽ«Zßè°ÉÉÉZ¸p¡æÎ«³gÏZׯ__3gÎÔàÁƒMLe‹‚ð.óÅ_hòäÉ:räH‰ãÁÁÁzüñÇ5hÐ ùøø˜’-11Q«V­Ò¢E‹´oß¾çüýýõꫯêá‡6%€òqèÐ!½ôÒKŠŠŠÒõ?~êÔ©£‘#Gê‘GQ‡LYÏ/;;[ß}÷>ýôS}ýõ×ÊÏÏ·ž«V­šžþyMœ8Qnnnåž î$ »Paa¡>ûì3½þúëŠ/qÎÁÁA÷ÜsÂÃæŽ;ÊÉÉéŽäÈÏÏ×®]»­5kÖ(66Vÿýå —_~YÆ “ƒ3Þ{±gÏM›6MQQQ****q®~ýú8p zôè¡yxxܱÇWLLŒÖ¯_¯ 6(;;»Äywww=óÌ3š0a‚jÔ¨qÇr€™(ïb†ahóæÍúðÃõÕW_•˜ÖwMÕªUuß}÷©]»v PPP4hpÛkÿ]¸pA§NÒÁƒ§={ö(66VYYY7<ÖÕÕUÔ“O>©®]»²ó'`Ç´`Á-\¸P)))7œwppPëÖ­Õ±cG*00PÍ›7—··÷m½¹‘­3gÎ(..Nqqq:pà€¶mÛVbúðõ:vì¨1cÆhذaªZµêï~}PPÚ‰ÔÔT}ýõ×Z¹r¥~øáåææþæã]]]U·n]Õ¬YSžžž²X,ÖQ<—.]’aJOOWjjª’““Kõ÷õèÑCƒVÿþýU³fÍ2{m*¾üü|mÞ¼Y+W®ÔêÕ«uþüùß|¼ƒƒƒ¼¼¼T§NyxxÈÙÙYnnnrvvVvv¶rss•••¥ŒŒ %%%)##ã–‚ƒƒ5hÐ `_ - `Ubuntu `_ - `Debian `_ Or you can build your own binaries from sources. You can fetch all sources from git (below you can find all commands you need) or you can download it as tarballs at: - `libestr `_ - `liblognorm `_ Please note if you compile it from tarballs then you have to do the same steps which are mentioned below, apart from:: $ git clone ... $ autoreconf -vfi Building from git ----------------- To build liblognorm from sources, you need to have `json-c `_ installed. Open a terminal and switch to the folder where you want to build liblognorm. Below you will find the necessary commands. First, build and install prerequisite library called **libestr**:: $ git clone git@github.com:rsyslog/libestr.git $ cd libestr $ autoreconf -vfi $ ./configure $ make $ sudo make install leave that folder and repeat this step again for liblognorm:: $ cd .. $ git clone git@github.com:rsyslog/liblognorm.git $ cd liblognorm $ autoreconf -vfi $ ./configure $ make $ sudo make install That’s all you have to do. Testing ------- For a first test we need two further things, a test log and the rulebase. Both can be downloaded `here `_. After downloading these examples you can use liblognorm. Go to liblognorm/src and use the command below:: $ ./lognormalize -r messages.sampdb -o json ` tool to debug. liblognorm-2.1.0/doc/introduction.rst000066400000000000000000000030441520037563000177000ustar00rootroot00000000000000Introduction ============ Briefly described, liblognorm is a tool to normalize log data. People who need to take a look at logs often have a common problem. Logs from different machines (from different vendors) usually have different formats. Even if it is the same type of log (e.g. from firewalls), the log entries are so different, that it is pretty hard to read these. This is where liblognorm comes into the game. With this tool you can normalize all your logs. All you need is liblognorm and its dependencies and a sample database that fits the logs you want to normalize. So, for example, if you have traffic logs from three different firewalls, liblognorm will be able to "normalize" the events into generic ones. Among others, it will extract source and destination ip addresses and ports and make them available via well-defined fields. As the end result, a common log analysis application will be able to work on that common set and so this backend will be independent from the actual firewalls feeding it. Even better, once we have a well-understood interim format, it is also easy to convert that into any other vendor specific format, so that you can use that vendor's analysis tool. By design, liblognorm is constructed as a library. Thus, it can be used by other tools. In short, liblognorm works by: 1. Matching a line to a rule from predefined configuration; 2. Picking out variable fields from the line; 3. Returning them as a JSON hash object. Then, a consumer of this object can construct new, normalized log line on its own. liblognorm-2.1.0/doc/libraryapi.rst000066400000000000000000000004431520037563000173150ustar00rootroot00000000000000Library API =========== To use the library, include liblognorm.h (which is quoted below) into your code. The API is fairly simple and hardly needs further explanations. .. literalinclude:: ../src/liblognorm.h :start-after: #define LIBLOGNORM_H_INCLUDED :end-before: #endif :language: c liblognorm-2.1.0/doc/license.rst000066400000000000000000000002201520037563000165720ustar00rootroot00000000000000Licensing ========= Liblognorm is available under the terms of the GNU LGPL v2.1 or above (full text below). .. literalinclude:: ../COPYING liblognorm-2.1.0/doc/lognormalizer.rst000066400000000000000000000200271520037563000200430ustar00rootroot00000000000000Lognormalizer ============= Lognormalizer is a sample tool which is often used to test and debug rulebases before real use. Nevertheless, it can be used in production as a simple command line interface to liblognorm. This tool reads log lines from its standard input and prints results to standard output. You need to use redirections if you want to read or write files. An example of the command:: $ lognormalizer -r messages.sampdb -e json Specifies name of the file containing the rulebase. :: -v Increase verbosity level. Can be used several times. If used three times, internal data structures are dumped (make sense to developers, only). :: -p Print only successfully parsed messages. :: -P Print only messages **not** successfully parsed. :: -L Add line number information to events not successfully parsed. This is meant as a troubleshooting aid when working with unparsable events, as the information can be used to directly go to the line in question in the source data file. The line number is contained in a field named ``lognormalizer.line_nbr``. :: -t Print only those messages which have this tag. :: -T Include 'event.tags' attribute when output is in JSON format. This attribute contains list of tags of the matched rule. :: -E Encoder-specific data. For CSV, it is the list of fields to be output, separated by comma or space. It is currently unused for other formats. :: -d Generate DOT file describing parse tree. It is used to plot parse graph with GraphViz. :: -H At end of run, print a summary line with number of messages processed, parsed and unparsed to stdout. :: -U At end of run, print a summary line with number of messages unparsed to stdout. Note that this message is only printed if there was at least one unparsable message. :: -o Special options. The following ones can be set: * **allowRegex** Permits to use regular expressions inse the v1 engine This is deprecated and should not be used for new deployments. * **addExecPath** Includes metadata into the event on how it was (tried) to be parsed. Can be useful in troubleshooting normalization problems. * **addOriginalMsg** Always add the "original-msg" data item. By default, this is only done when a message could not be parsed. * **addRule** Add a mockup of the rule that was processed. Note that it is *not* an exact copy of the rule, but a rule that correctly describes the parsed message. Most importantly, prefixes are appended and custom data types are expanded (and no longer visible as such). This option is primarily meant for postprocessing, e.g. as input to an anonymizer. * **addRuleLocation** For rules that successfully parsed, add the location of the rule inside the rulebase. Both the file name and the line number are given. If two rules evaluate to the same end node, only a single rule location is given. However, in practice this is extremely unlikely and as such for practical reasons the information can be considered reliable. * **turbo** Enable TurboVM bytecode engine for normalization. This requires liblognorm to be built with ``--enable-turbo``. When enabled, normalization uses the compiled bytecode VM with SIMD acceleration. Output uses native JSON types (numbers as integers, not strings) and nested objects for dotted field names. Falls back to standard normalization if bytecode compilation failed. See :doc:`turbo` for details. :: -s At end of run, print internal parse DAG statistics and exit. This option is meant for developers and researches which want to get insight into the quality of the algorithm and/or how efficient the rulebase could be processed. **NOT** intended for end users. This option is performance intense. :: -S Even stronger statistics than -s. Requires that the version is compiled with --enable-advanced-statistics, which causes a considerable performance loss. :: -x Print statistics as a DOT file. In order to keep the graph readable, information is only emitted for called nodes. :: -e Output format. By default, output is in JSON format. With this option, you can change it to a different one. Supported Output Formats ........................ The JSON, XML, and CSV formats should be self-explanatory. The cee-syslog format emits messages according to the Mitre CEE spec. Note that the cee-syslog format is primarily supported for backward-compatibility. It does **not** support nested data items and as such cannot be used when the rulebase makes use of this feature (we assume this most often happens nowadays). We strongly recommend not use it for new deployments. Support may be removed in later releases. The raw format outputs an exact copy of the input message, without any normalization visible. The prime use case of "raw" is to extract either all messages that could or could not be normalized. To do so specify the -p or -P option. Also, it works in combination with the -t option to extract a subset based on tagging. In any case, the core use is to prepare a subset of the original file for further processing. Examples -------- These examples were created using sample rulebase from source package. Default (CEE) output:: $ lognormalizer -r rulebases/sample.rulebase Weight: 42kg [cee@115 event.tags="tag2" unit="kg" N="42" fat="free"] Snow White and the Seven Dwarfs [cee@115 event.tags="tale" company="the Seven Dwarfs"] 2012-10-11 src=127.0.0.1 dst=88.111.222.19 [cee@115 dst="88.111.222.19" src="127.0.0.1" date="2012-10-11"] JSON output, flat tags enabled:: $ lognormalizer -r rulebases/sample.rulebase -e json -T %% { "event.tags": [ "tag3", "percent" ], "percent": "100", "part": "wha", "whole": "whale" } Weight: 42kg { "unit": "kg", "N": "42", "event.tags": [ "tag2" ], "fat": "free" } CSV output with fixed field list:: $ lognormalizer -r rulebases/sample.rulebase -e csv -E'N unit' Weight: 42kg "42","kg" Weight: 115lbs "115","lbs" Anything not matching the rule , Creating a graph of the rulebase -------------------------------- To get a better overview of a rulebase you can create a graph that shows you the chain of normalization (parse-tree). At first you have to install an additional package called graphviz. Graphviz is a tool that creates such a graph with the help of a control file (created with the rulebase). `Here `_ you will find more information about graphviz. To install it you can use the package manager. For example, on RedHat systems it is yum command:: $ sudo yum install graphviz The next step would be creating the control file for graphviz. Therefore we use the normalizer command with the options -d "preferred filename for the control file" and -r "rulebase":: $ lognormalize -d control.dot -r messages.rb Please note that there is no need for an input or output file. If you have a look at the control file now you will see that the content is a little bit confusing, but it includes all information, like the nodes, fields and parser, that graphviz needs to create the graph. Of course you can edit that file, but please note that it is a lot of work. Now we can create the graph by typing:: $ dot control.dot -Tpng >graph.png dot + name of control file + option -T -> file format + output file That is just one example for using graphviz, of course you can do many other great things with it. But I think this "simple" graph could be very helpful for the normalizer. Below you see sample for such a graph, but please note that this is not such a pretty one. Such a graph can grow very fast by editing your rulebase. .. figure:: graph.png :width: 90 % :alt: graph sample liblognorm-2.1.0/doc/pdag_implementation_model.rst000066400000000000000000000135121520037563000223600ustar00rootroot00000000000000PDAG engine notes ================= The following notes summarize the implementation-relevant insights from Rainer Gerhards' master's thesis, *Efficient normalization of IT log messages under realtime conditions* (2016), which introduced liblognorm v2's PDAG-based normalizer. The full thesis is available from the author's site: `PDF download `_. These notes are meant as a quick reference for developers and AI-assisted maintenance work. Terminology ----------- - **log message** ``l``: input string/byte sequence to parse - **suffix** ``s``: the not-yet-consumed suffix of ``l`` during parsing - **motif**: token/parser that matches a prefix of ``s`` and optionally extracts a value - **rulebase**: set of rules, each rule is a sequence of motifs and literals defining a message format - **terminal node**: node that represents the end of at least one rule - **component**: disconnected subgraph used to model a named user-defined motif - **parsing**: combined operation of finding a matching rule and extracting fields during that walk Constraint: **motifs may not match the empty string**; each successful motif must consume at least one byte. Motivation for the PDAG design ------------------------------ The PDAG approach addresses challenges observed in the v1 proof-of-concept: - high per-node memory consumption from literal lookup tables - ambiguous matches that require controllable priority - speed and cache friendliness - richer built-in motif set and user extensibility - support for mixed message formats in one normalizer - runtime characteristics that can be analyzed Implementation deltas observed in liblognorm -------------------------------------------- The current codebase mostly follows the thesis guidance but there are some notable deviations: - **Empty-match motif still exists**: the ``rest`` parser succeeds even when invoked at the end of the input (it consumes zero bytes), violating the thesis invariant that every motif must consume at least one byte on success.ã€F:src/parser.c†L1570-L1590】 - **Literals are not expanded per character during loading**: rule loading adds complete literal parsers directly, rather than first expanding them into single-character motifs and recombining later. Literal path compaction is only attempted when a literal node has a single predecessor and child, which skips compression on shared DAG segments (``refcnt`` must be ``1``), leaving many multi-character literals uncompressed when suffix sharing is present.ã€F:src/pdag.c†L332-L360】 - **Literal compression depends on parser metadata**: compaction refuses to merge literals that carry a field name or terminate a rule, whereas the thesis model compresses pure literal stretches regardless of adjacent rule boundaries. This keeps more nodes intact than the thesis’s “compress all literal paths†expectation.ã€F:src/pdag.c†L332-L356】 PDAG data model --------------- - The rulebase is represented as a **rooted DAG** with one designated root component. - A PDAG can have **multiple disconnected components**; each component has one root node and models a named user-defined motif. - **Literals are handled as motifs** so parsing logic can evaluate all edges in a uniform way and in **priority order**. - Looping constructs belong **inside motif parsers**, keeping the PDAG itself acyclic. Construction workflow --------------------- **Load phase** (build first, optimize later) 1. Select the current component (root component by default; switch if a rule specifies another component). 2. Split the rule into a sequence of motifs ``M``. 3. Expand literal strings into per-character literal motifs during load. 4. For each motif, create the edge and destination node if it does not already exist and advance; mark the final node as terminal. **Preparation/optimization phase** 1. Establish the **motif priority order** for each node's outgoing edges. 2. Apply **literal path compression** where nodes have one incoming and one outgoing literal edge, collapsing runs of literals into a single edge. Parsing algorithm ----------------- Given a node ``n`` and suffix ``s``: 1. If ``s`` is empty: succeed only if ``n`` is terminal. 2. Otherwise, iterate over ``n``'s outgoing edges **in priority order**: - If an edge matches a prefix of ``s``, recurse into its destination with the remaining suffix. Extraction happens only if the recursive call succeeds. - On failure, discard any partial extraction from that attempt and continue with the next edge. 3. If no edge succeeds, parsing fails. Disconnected components act like motifs by recursively invoking parsing on their own root and succeeding once a terminal node in that component is reached, without requiring all of ``s`` to be consumed. Ambiguity control ----------------- Motif ambiguity is handled by **prioritized edge evaluation**; deterministic outcomes rely on traversing edges in that configured order. Performance considerations -------------------------- - Keep the PDAG **read-only after construction**; store mutable state in the message object or on the stack for better cache behavior and thread safety. - Use **small indexes instead of function pointers** for motif parser identifiers to shrink edge structures. - Favor **narrow integer types and bitfields** where safe to reduce memory footprint. Complexity expectations ----------------------- - Practical behavior is typically close to **O(|l|)** because motifs often consume multiple bytes and nodes have few outgoing edges. - Theoretical worst case with backtracking is exponential in ``|l|``. Without backtracking, an adversarial setup can reach **O(|l|^2)** if many costly motif checks occur at each node. - Expected practical worst case with limited backtracking depth ``v`` is roughly **O(|l|^(2+v))**. liblognorm-2.1.0/doc/sample_rulebase.rst000066400000000000000000000001411520037563000203150ustar00rootroot00000000000000Sample rulebase =============== .. literalinclude:: ../rulebases/sample.rulebase :linenos: liblognorm-2.1.0/doc/turbo.rst000066400000000000000000000121111520037563000163050ustar00rootroot00000000000000TurboVM Bytecode Engine ======================= TurboVM is an optional high-performance normalization engine for liblognorm. It compiles rulebases into bytecode at startup and executes them through a linear virtual machine with SIMD-accelerated parsing primitives. When enabled, it provides significant throughput improvements over the default recursive parser, especially on high-volume log streams. Overview -------- The default liblognorm normalization engine (the "recursive walker") traverses the parse DAG node-by-node for each log message, allocating json-c objects as fields are extracted. TurboVM replaces this with: - **Bytecode compilation**: rulebases are compiled into a compact instruction sequence at load time. Each rule becomes a linear program. - **Arena allocation**: all per-message memory comes from a single pre-allocated arena (~16 KB), fitting in L1 cache. Zero malloc/free per message. - **SIMD parsing**: character scanning, delimiter search, whitespace skipping, and IP address parsing use SSE4.2 or NEON intrinsics when available. - **Typed field output**: extracted fields carry their native type (string, integer, double, boolean) instead of converting everything to JSON strings. - **Nested JSON**: dotted field names (e.g. ``source.ip``) produce properly nested JSON objects (``{"source":{"ip":"..."}}``), enabling direct ECS (Elastic Common Schema) output. Building with TurboVM --------------------- TurboVM is an optional build feature, disabled by default:: ./configure --enable-turbo The build system automatically detects the CPU architecture and enables the appropriate SIMD instruction set: - **x86-64**: SSE4.2 (Intel Nehalem+, AMD Bulldozer+) - **ARM64**: NEON (all ARMv8-A processors, including Apple M1/M2) - **Other**: scalar fallback (functional but without SIMD acceleration) No additional dependencies are required. Using with lognormalizer ------------------------ The ``lognormalizer`` command-line tool supports turbo mode via the ``-oturbo`` option:: $ lognormalizer -r rules.rb -e json -oturbo < messages.log In turbo mode: - Normalization uses the TurboVM bytecode engine - Output is compact JSON with nested objects for dotted field names - Numeric fields are emitted as native JSON numbers (not strings) - The ``getline()`` system call is used for input (more efficient than ``fgets()`` for large-scale processing) If a rulebase cannot be compiled to bytecode (e.g. it uses unsupported parser types), lognormalizer falls back to standard normalization automatically. Library API ----------- To enable TurboVM in your application, set the ``LN_CTXOPT_TURBO`` option on the normalization context before loading rules:: #include ln_ctx ctx = ln_initCtx(); ln_setCtxOpts(ctx, LN_CTXOPT_TURBO); ln_loadSamples(ctx, "/path/to/rules.rb"); After loading, verify that compilation succeeded:: if (ln_turbo_is_available(ctx)) { /* TurboVM ready — ln_normalize() will use the fast path */ } For direct string output (bypassing json-c entirely):: char *json_str = NULL; size_t json_len = 0; int r = ln_normalize_to_str(ctx, msg, msg_len, &json_str, &json_len); if (r == 0 && json_str) { /* json_str contains the normalized JSON string */ free(json_str); } The standard ``ln_normalize()`` function also benefits from TurboVM when it is enabled — the bytecode engine is used internally, with automatic fallback to the recursive walker if needed. Supported Parsers ----------------- TurboVM supports 32 of the 33 parser types defined in liblognorm v2. The following parsers are compiled to bytecode: - **Text**: ``word``, ``alpha``, ``string``, ``rest``, ``char-to``, ``char-separated``, ``string-to``, ``op-quoted-string``, ``quoted-string``, ``literal`` - **Numeric**: ``number``, ``float``, ``hexnumber`` - **Network**: ``ipv4``, ``ipv6``, ``mac48`` - **Date/Time**: ``date-rfc3164``, ``date-rfc5424``, ``date-iso``, ``time-24hr``, ``time-12hr``, ``duration``, ``kernel-timestamp`` - **Structured**: ``json``, ``cee-syslog``, ``cef``, ``v2-iptables``, ``checkpoint-lea``, ``name-value-list`` - **Special**: ``whitespace`` (as skip), ``cisco-interface-spec`` The following parser type falls back to the legacy recursive engine: - **repeat**: requires recursive sub-rule invocation, which is outside the scope of the single-pass VM instruction set. The fallback is automatic and transparent — rulebases using ``repeat`` will still work correctly via the standard engine. Performance Notes ----------------- Throughput improvements depend on the rulebase complexity and message format. Typical observations: - Simple rulebases (5-10 rules): 2-3x throughput improvement - Complex rulebases (50+ rules with alternatives): 5-10x improvement - The ``ln_normalize_to_str()`` path avoids json-c entirely and provides the highest throughput for applications that consume JSON as strings TurboVM adds no overhead when disabled (``--disable-turbo`` or default). When enabled but compilation fails for a specific rule, only that rule falls back to the recursive walker — other rules still use bytecode. liblognorm-2.1.0/lognorm.pc.in000066400000000000000000000004401520037563000162630ustar00rootroot00000000000000prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: lognorm Description: fast samples-based log normalization library Version: @VERSION@ Requires: libfastjson Libs: -L${libdir} -llognorm Libs.private: @pkg_config_libs_private@ Cflags: -I${includedir} liblognorm-2.1.0/m4/000077500000000000000000000000001520037563000141775ustar00rootroot00000000000000liblognorm-2.1.0/m4/dummy000066400000000000000000000000001520037563000152430ustar00rootroot00000000000000liblognorm-2.1.0/rulebases/000077500000000000000000000000001520037563000156445ustar00rootroot00000000000000liblognorm-2.1.0/rulebases/cisco.rulebase000066400000000000000000000012461520037563000204730ustar00rootroot00000000000000prefix=%date:date-rfc3164% %host:word% %seqnum:number%: %othseq:char-to:\x3a%: %%%tag:char-to:\x3a%: rule=: Configured from console by %tty:word:% (%ip:ipv4%) rule=: Authentication failure for %proto:word% req from host %ip:ipv4% rule=: Interface %interface:char-to:,%, changed state to %state:word% rule=: Line protocol on Interface %interface:char-to:,%, changed state to %state:word% rule=: Attempted to connect to %servname:word% from %ip:ipv4% # too-generic syntaces (like %port:word% below) cause problems. # Best is to have very specific syntaxes, but as an # interim solution we may need to backtrack if there is no other way to handle it. #: %port:word% transmit error liblognorm-2.1.0/rulebases/messages.rulebase000066400000000000000000000011441520037563000211770ustar00rootroot00000000000000prefix=%date:date-rfc3164% %host:word% %tag:char-to:\x3a%: rule=: restart. rule=: Bad line received from identity server at %ip:ipv4%: %port:number% rule=: FTP session closed rule=: wu-ftpd - TLS settings: control %wuftp-control:char-to:,%, client_cert %wuftp-clcert:char-to:,%, data %wuftp-allow:word% rule=: User %user:word% timed out after %timeout:number% seconds at %otherdatesyntax:word% %otherdate:date-rfc3164% %otheryear:word% rule=: getpeername (in.ftpd): Transport endpoint is not connected # the one below is problematic (and needs some backtracking) #: %disk:char-to:\x3a%: timeout waiting for DMA liblognorm-2.1.0/rulebases/sample.rulebase000066400000000000000000000036411520037563000206550ustar00rootroot00000000000000# Some sample rules and strings matching them # Prefix sample: # myhostname: code=23 prefix=%host:char-to:\x3a%: rule=prefixed_code:code=%code:number% # myhostname: name=somename rule=prefixed_name:name=%name:word% # Reset prefix to default (empty value): prefix= # Quantity: 555 rule=tag1:Quantity: %N:number% # Weight: 42kg rule=tag2:Weight: %N:number%%unit:word% annotate=tag2:+fat="free" # %% rule=tag3,percent:\x25%% annotate=percent:+percent="100" annotate=tag3:+whole="whale" annotate=tag3:+part="wha" # literal rule=tag4,tag5,tag6,tag4:literal annotate=tag4:+this="that" # first field,second field,third field,fourth field rule=csv:%r1:char-to:,%,%r2:char-to:,%,%r3:char-to:,%,%r4:rest% # CSV: field1,,field3 rule=better-csv:CSV: %f1:char-sep:,%,%f2:char-sep:,%,%f3:char-sep:,% # Snow White and the Seven Dwarfs rule=tale:Snow White and %company:rest% # iptables: SRC=192.168.1.134 DST=46.252.161.13 LEN=48 TOS=0x00 PREC=0x00 rule=ipt:iptables: %dummy:iptables% # 2012-10-11 src=127.0.0.1 dst=88.111.222.19 rule=:%date:date-iso% src=%src:ipv4% dst=%dst:ipv4% # Oct 29 09:47:08 server rsyslogd: rsyslogd's groupid changed to 103 rule=syslog:%date1:date-rfc3164% %host:word% %tag:char-to:\x3a%: %text:rest% # Oct 29 09:47:08 rule=rfc3164:%date1:date-rfc3164% # 1985-04-12T19:20:50.52-04:00 rule=rfc5424:%date1:date-rfc5424% # 1985-04-12T19:20:50.52-04:00 testing 123 rule=rfc5424:%date1:date-rfc5424% %test:word% %test2:number% # quoted_string="Contents of a quoted string cannot include quote marks" rule=quote:quoted_string=%quote:quoted-string% # tokenized words: aaa.org; bbb.com; ccc.net rule=tokenized_words:tokenized words: %arr:tokenized:; :char-sep:\x3b% # tokenized regex: aaa.org; bbb.com; ccc.net rule=tokenized_regex:tokenized regex: %arr:tokenized:; :regex:[^; ]+% # regex: abcdef rule=regex:regex: %token:regex:abc.ef% # host451 # generates { basename:"host", hostid:451 } rule=:%basename:alpha%%hostid:number% liblognorm-2.1.0/rulebases/syntax.txt000066400000000000000000000103621520037563000177350ustar00rootroot00000000000000WARNING ======= This file is somewhat obsolete, for current information look at doc/ directory. Basic syntax ============ Each line in rulebase file is evaluated separately. Lines starting with '#' are commentaries. Empty lines are just skipped, they can be inserted for readability. If the line starts with 'rule=', then it contains a rule. This line has following format: rule=[[,...]]: Everything before a colon is treated as comma-separated list of tags, which will be attached to a match. After the colon, match description should be given. It consists of string literals and field selectors. String literals should match exactly. Field selector has this format: %:[:]% Percent sign is used to enclose field selector. If you need to match literal '%', it can be written as '%%' or '\x25'. Behaviour of field selector depends on its type, which is described below. If field name is set to '-', this field is matched but not saved. Several rules can have a common prefix. You can set it once with this syntax: prefix= Every following rule will be treated as an addition to this prefix. Prefix can be reset to default (empty value) by the line: prefix= Tags of the matched rule are attached to the message and can be used to annotate it. Annotation allows to add fixed fields to the message. Syntax is as following: annotate=:+="" Field value should always be enclosed in double quote marks. There can be multiple annotations for the same tag. Field types =========== Field type: 'number' Matches: One or more decimal digits. Extra data: Not used Example: %field_name:number% Field type: 'word' Matches: One or more characters, up to the next space (\x20), or up to end of line. Extra data: Not used Example: %field_name:word% Field type: 'alpha' Matches: One or more alphabetic characters, up to the next whitespace, punctuation, decimal digit or ctrl. Extra data: Not used Example: %field_name:alpha% Field type: 'char-to' Matches: One or more characters, up to the next character given in extra data. Extra data: One character (can be escaped) Example: %field_name:char-to:,% %field_name:char-to:\x25% Field type: 'char-sep' Matches: Zero or more characters, up to the next character given in extra data, or up to end of line. Extra data: One character (can be escaped) Example: %field_name:char-sep:,% %field_name:char-sep:\x25% Field type: 'rest' Matches: Zero or more characters till end of line. Extra data: Not used Example: %field_name:rest% Notes: Should be always at end of the rule. Field type: 'quoted-string' Matches: Zero or more characters, surrounded by double quote marks. Extra data: Not used Example: %field_name:quoted-string% Notes: Quote marks are stripped from the match. Field type: 'date-iso' Matches: Date of format 'YYYY-MM-DD'. Extra data: Not used Example: %field-name:date-iso% Field type: 'time-24hr' Matches: Time of format 'HH:MM:SS', where HH is 00..23. Extra data: Not used Example: %field_name:time-24hr% Field type: 'time-12hr' Matches: Time of format 'HH:MM:SS', where HH is 00..12. Extra data: Not used Example: %field_name:time-12hr% Field type: 'ipv4' Matches: IPv4 address, in dot-decimal notation (AAA.BBB.CCC.DDD). Extra data: Not used Example: %field_name:ipv4% Field type: 'date-rfc3164' Matches: Valid date/time in RFC3164 format, i.e.: 'Oct 29 09:47:08' Extra data: Not used Example: %field_name:date-rfc3164% Notes: This parser implements several quirks to match malformed timestamps from some devices. Field type: 'date-rfc5424' Matches: Valid date/time in RFC5424 format, i.e.: '1985-04-12T19:20:50.52-04:00' Extra data: Not used Example: %field_name:date-rfc5424% Notes: Slightly different formats are allowed. Field type: 'iptables' Matches: Name=value pairs, separated by spaces, as in Netfilter log messages. Extra data: Not used Example: %-:iptables% Notes: Name of the selector is not used; names from the line are used instead. This selector always matches everything till end of the line. Cannot match zero characters. Examples ======== Look at sample.rulebase for example rules and matching lines. liblognorm-2.1.0/src/000077500000000000000000000000001520037563000144465ustar00rootroot00000000000000liblognorm-2.1.0/src/.gitignore000066400000000000000000000001171520037563000164350ustar00rootroot00000000000000*.o *.lo *.la Makefile Makefile.in .deps .libs lognormalizer lognorm-features.hliblognorm-2.1.0/src/Makefile.am000066400000000000000000000046611520037563000165110ustar00rootroot00000000000000# Uncomment for debugging DEBUG = -g PTHREADS_CFLAGS = -pthread #CFLAGS += $(DEBUG) # we need to clean the normalizer up once we have reached a decent # milestone (latest at initial release!) bin_PROGRAMS = lognormalizer lognormalizer_SOURCES = lognormalizer.c lognormalizer_CPPFLAGS = -I$(top_srcdir) $(WARN_CFLAGS) $(JSON_C_CFLAGS) $(LIBESTR_CFLAGS) lognormalizer_LDADD = $(JSON_C_LIBS) $(LIBLOGNORM_LIBS) $(LIBESTR_LIBS) ../compat/compat.la lognormalizer_DEPENDENCIES = liblognorm.la check_PROGRAMS = ln_test ln_test_SOURCES = $(lognormalizer_SOURCES) ln_test_CPPFLAGS = $(lognormalizer_CPPFLAGS) ln_test_LDADD = $(lognormalizer_LDADD) ln_test_DEPENDENCIES = $(lognormalizer_DEPENDENCIES) ln_test_LDFLAGS = -no-install lib_LTLIBRARIES = liblognorm.la liblognorm_la_SOURCES = \ liblognorm.c \ pdag.c \ pdag.h \ annot.c \ annot.h \ samp.c \ samp.h \ lognorm.c \ lognorm.h \ parser.c \ parser.h \ enc_syslog.c \ enc_csv.c \ enc_xml.c \ enc.h \ lognorm-features.h \ helpers.h \ internal.h # Users violently requested that v2 shall be able to understand v1 # rulebases. As both are very very different, we now include the # full v1 engine for this purpose. This here is what does this. # see also: https://github.com/rsyslog/liblognorm/issues/103 liblognorm_la_SOURCES += \ v1_liblognorm.c v1_liblognorm.h \ v1_parser.c v1_parser.h \ v1_ptree.c v1_ptree.h \ v1_samp.c v1_samp.h liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS) liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr # info on version-info: # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html # Note: v2 now starts at version 5, as v1 previously also had 4 liblognorm_la_LDFLAGS = -version-info 6:0:1 # Public API include_HEADERS = liblognorm.h lognorm-features.h # TurboVM bytecode engine (optional, SIMD-accelerated log parsing) if ENABLE_TURBO liblognorm_la_SOURCES += \ turbo.c \ turbo.h \ turbo_vm.c \ turbo_vm.h \ turbo_vm_opt.h \ turbo_opcode.h \ turbo_simd.c \ turbo_simd.h \ turbo_arena.c \ turbo_arena.h \ turbo_result.c \ turbo_result.h \ turbo_result_fast.c \ turbo_result_fast.h \ turbo_json.h \ turbo_json_fast.c \ turbo_json_fast.h \ turbo_json_impl.c \ turbo_snapshot.c \ turbo_snapshot.h liblognorm_la_CPPFLAGS += $(TURBO_CFLAGS) include_HEADERS += turbo.h turbo_result.h turbo_result_fast.h \ turbo_arena.h turbo_snapshot.h turbo_opcode.h endif liblognorm-2.1.0/src/annot.c000066400000000000000000000123101520037563000157260ustar00rootroot00000000000000/** * @file annot.c * @brief Implementation of the annotation set object. * @class ln_annot annot.h *//* * Copyright 2011 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Sannott, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include #include "lognorm.h" #include "samp.h" #include "annot.h" #include "internal.h" ln_annotSet* ln_newAnnotSet(ln_ctx ctx) { ln_annotSet *as; if((as = calloc(1, sizeof(struct ln_annotSet_s))) == NULL) goto done; as->ctx = ctx; done: return as; } void ln_deleteAnnotSet(ln_annotSet *as) { ln_annot *node, *nextnode; if(as == NULL) goto done; for(node = as->aroot; node != NULL; node = nextnode) { nextnode = node->next; ln_deleteAnnot(node); } free(as); done: return; } ln_annot* ln_findAnnot(ln_annotSet *as, es_str_t *tag) { ln_annot *annot; if(as == NULL) { annot = NULL; goto done; } for( annot = as->aroot ; annot != NULL && es_strcmp(annot->tag, tag) ; annot = annot->next) { ; /* do nothing, just search... */ } done: return annot; } /** * Combine two annotations. * @param[in] annot currently existing and surviving annotation * @param[in] add annotation to be added. This will be destructed * as part of the process. * @returns 0 if ok, something else otherwise */ static int ln_combineAnnot(ln_annot *annot, ln_annot *add) { int r = 0; ln_annot_op *op, *nextop; for(op = add->oproot; op != NULL; op = nextop) { CHKR(ln_addAnnotOp(annot, op->opc, op->name, op->value)); nextop = op->next; free(op); } es_deleteStr(add->tag); free(add); done: return r; } int ln_addAnnotToSet(ln_annotSet *as, ln_annot *annot) { int r = 0; ln_annot *aexist; assert(annot->tag != NULL); aexist = ln_findAnnot(as, annot->tag); if(aexist == NULL) { /* does not yet exist, simply store new annot */ annot->next = as->aroot; as->aroot = annot; } else { /* annotation already exists, combine */ r = ln_combineAnnot(aexist, annot); } return r; } ln_annot* ln_newAnnot(es_str_t *tag) { ln_annot *annot; if((annot = calloc(1, sizeof(struct ln_annot_s))) == NULL) goto done; annot->tag = tag; done: return annot; } void ln_deleteAnnot(ln_annot *annot) { ln_annot_op *op, *nextop; if(annot == NULL) goto done; es_deleteStr(annot->tag); for(op = annot->oproot; op != NULL; op = nextop) { es_deleteStr(op->name); if(op->value != NULL) es_deleteStr(op->value); nextop = op->next; free(op); } free(annot); done: return; } int ln_addAnnotOp(ln_annot *annot, ln_annot_opcode opc, es_str_t *name, es_str_t *value) { int r = -1; ln_annot_op *node; if((node = calloc(1, sizeof(struct ln_annot_op_s))) == NULL) goto done; node->opc = opc; node->name = name; node->value = value; if(annot->oproot != NULL) { node->next = annot->oproot; } annot->oproot = node; r = 0; done: return r; } /* annotate the event with a specific tag. helper to keep code * small and easy to follow. */ static inline int ln_annotateEventWithTag(ln_ctx ctx, struct json_object *json, es_str_t *tag) { int r=0; ln_annot *annot; ln_annot_op *op; struct json_object *field; char *cstr; if (NULL == (annot = ln_findAnnot(ctx->pas, tag))) goto done; for(op = annot->oproot ; op != NULL ; op = op->next) { if(op->opc == ln_annot_ADD) { CHKN(cstr = ln_es_str2cstr(&op->value)); CHKN(field = json_object_new_string(cstr)); CHKN(cstr = ln_es_str2cstr(&op->name)); json_object_object_add(json, cstr, field); } else { // TODO: implement } } done: return r; } int ln_annotate(ln_ctx ctx, struct json_object *json, struct json_object *tagbucket) { int r = 0; es_str_t *tag; struct json_object *tagObj; const char *tagCstr; int i; ln_dbgprintf(ctx, "ln_annotate called [aroot=%p]", ctx->pas->aroot); /* shortcut: terminate immediately if nothing to do... */ if(ctx->pas->aroot == NULL) goto done; /* iterate over tagbucket */ for (i = json_object_array_length(tagbucket) - 1; i >= 0; i--) { CHKN(tagObj = json_object_array_get_idx(tagbucket, i)); CHKN(tagCstr = json_object_get_string(tagObj)); ln_dbgprintf(ctx, "ln_annotate, current tag %d, cstr %s", i, tagCstr); CHKN(tag = es_newStrFromCStr(tagCstr, strlen(tagCstr))); CHKR(ln_annotateEventWithTag(ctx, json, tag)); es_deleteStr(tag); } done: return r; } liblognorm-2.1.0/src/annot.h000066400000000000000000000115261520037563000157430ustar00rootroot00000000000000/** * @file annot.h * @brief The annotation set object * @class ln_annot annot.h *//* * Copyright 2011 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is meant to be included by applications using liblognorm. * For lognorm library files themselves, include "lognorm.h". * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_ANNOT_H_INCLUDED #define LIBLOGNORM_ANNOT_H_INCLUDED #include typedef struct ln_annotSet_s ln_annotSet; typedef struct ln_annot_s ln_annot; typedef struct ln_annot_op_s ln_annot_op; typedef enum {ln_annot_ADD=0, ln_annot_RM=1} ln_annot_opcode; /** * List of annotation operations. */ struct ln_annot_op_s { ln_annot_op *next; ln_annot_opcode opc; /**< opcode */ es_str_t *name; es_str_t *value; }; /** * annotation object */ struct ln_annot_s { ln_annot *next; /**< used for chaining annotations */ es_str_t *tag; /**< tag associated for this annotation */ ln_annot_op *oproot; }; /** * annotation set object * * Note: we do not (yet) use a hash table. However, performance should * be gained by pre-processing rules so that tags directly point into * the annotation. This is even faster than hash table access. */ struct ln_annotSet_s { ln_annot *aroot; ln_ctx ctx; /**< save our context for easy dbgprintf et al... */ }; /* Methods */ /** * Allocates and initializes a new annotation set. * @memberof ln_annot * * @param[in] ctx current library context. This MUST match the * context of the parent. * * @return pointer to new node or NULL on error */ ln_annotSet* ln_newAnnotSet(ln_ctx ctx); /** * Free annotation set and destruct all members. * @memberof ln_annot * * @param[in] tree pointer to annot to free */ void ln_deleteAnnotSet(ln_annotSet *as); /** * Find annotation inside set based on given tag name. * @memberof ln_annot * * @param[in] as annotation set * @param[in] tag tag name to look for * * @returns NULL if not found, ptr to object otherwise */ ln_annot* ln_findAnnot(ln_annotSet *as, es_str_t *tag); /** * Add annotation to set. * If an annotation associated with this tag already exists, these * are combined. If not, a new annotation is added. Note that the * caller must not access any of the objects passed in to this method * after it has finished (objects may become deallocated during the * method). * @memberof ln_annot * * @param[in] as annotation set * @param[in] annot annotation to add * * @returns 0 on success, something else otherwise */ int ln_addAnnotToSet(ln_annotSet *as, ln_annot *annot); /** * Allocates and initializes a new annotation. * The tag passed in identifies the new annotation. The caller * no longer owns the tag string after calling this method, so * it must not access the same copy when the method returns. * @memberof ln_annot * * @param[in] tag tag associated to annot (must not be NULL) * @return pointer to new node or NULL on error */ ln_annot* ln_newAnnot(es_str_t *tag); /** * Free annotation and destruct all members. * @memberof ln_annot * * @param[in] tree pointer to annot to free */ void ln_deleteAnnot(ln_annot *annot); /** * Add an operation to the annotation set. * The operation description will be added as entry. * @memberof ln_annot * * @param[in] annot pointer to annot to modify * @param[in] op operation * @param[in] name name of field, must NOT be re-used by caller * @param[in] value value of field, may be NULL (e.g. in remove operation), * must NOT be re-used by caller * @returns 0 on success, something else otherwise */ int ln_addAnnotOp(ln_annot *annot, ln_annot_opcode opc, es_str_t *name, es_str_t *value); /** * Annotate an event. * This adds annotations based on the event's tagbucket. * @memberof ln_annot * * @param[in] ctx current context * @param[in] event event to annotate (updated with annotations on exit) * @returns 0 on success, something else otherwise */ int ln_annotate(ln_ctx ctx, struct json_object *json, struct json_object *tags); #endif /* #ifndef LOGNORM_ANNOT_H_INCLUDED */ liblognorm-2.1.0/src/enc.h000066400000000000000000000026361520037563000153730ustar00rootroot00000000000000/** * @file enc.h * @brief Encoder functions */ /* * liblognorm - a fast samples-based log normalization library * Copyright 2010 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_ENC_H_INCLUDED #define LIBLOGNORM_ENC_H_INCLUDED int ln_fmtEventToRFC5424(struct json_object *json, es_str_t **str); int ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData); int ln_fmtEventToXML(struct json_object *json, es_str_t **str); #endif /* LIBLOGNORM_ENC_H_INCLUDED */ liblognorm-2.1.0/src/enc_csv.c000066400000000000000000000127011520037563000162330ustar00rootroot00000000000000/** * @file enc_csv.c * Encoder for CSV format. Note: CEE currently think about what a * CEE-compliant CSV format may look like. As such, the format of * this output will most probably change once the final decision * has been made. At this time (2010-12), I do NOT even try to * stay inline with the discussion. * * This file contains code from all related objects that is required in * order to encode this format. The core idea of putting all of this into * a single file is that this makes it very straightforward to write * encoders for different encodings, as all is in one place. * */ /* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include "lognorm.h" #include "internal.h" #include "enc.h" static char hexdigit[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; /* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully * supported. The algorithm is that we must build the wide character from * UTF-8 (if char > 127) and build the full 4-octet Unicode character out * of it. Then, this needs to be encoded. Currently, we work on a * byte-by-byte basis, which simply is incorrect. * rgerhards, 2010-11-09 */ static int ln_addValue_CSV(const char *buf, es_str_t **str) { int r; unsigned char c; es_size_t i; char numbuf[4]; int j; assert(str != NULL); assert(*str != NULL); assert(buf != NULL); for(i = 0; i < strlen(buf); i++) { c = buf[i]; if((c >= 0x23 && c <= 0x5b) || (c >= 0x5d /* && c <= 0x10FFFF*/) || c == 0x20 || c == 0x21) { /* no need to escape */ es_addChar(str, c); } else { /* we must escape, try RFC4627-defined special sequences first */ switch(c) { case '\0': es_addBuf(str, "\\u0000", 6); break; case '\"': es_addBuf(str, "\\\"", 2); break; case '\\': es_addBuf(str, "\\\\", 2); break; case '\010': es_addBuf(str, "\\b", 2); break; case '\014': es_addBuf(str, "\\f", 2); break; case '\n': es_addBuf(str, "\\n", 2); break; case '\r': es_addBuf(str, "\\r", 2); break; case '\t': es_addBuf(str, "\\t", 2); break; default: /* TODO : proper Unicode encoding (see header comment) */ for(j = 0 ; j < 4 ; ++j) { numbuf[3-j] = hexdigit[c % 16]; c = c / 16; } es_addBuf(str, "\\u", 2); es_addBuf(str, numbuf, 4); break; } } } r = 0; return r; } static int ln_addField_CSV(struct json_object *field, es_str_t **str) { int r, i; struct json_object *obj; int needComma = 0; const char *value; assert(field != NULL); assert(str != NULL); assert(*str != NULL); switch(json_object_get_type(field)) { case json_type_array: CHKR(es_addChar(str, '[')); for (i = json_object_array_length(field) - 1; i >= 0; i--) { if(needComma) es_addChar(str, ','); else needComma = 1; CHKN(obj = json_object_array_get_idx(field, i)); CHKN(value = json_object_get_string(obj)); CHKR(ln_addValue_CSV(value, str)); } CHKR(es_addChar(str, ']')); break; case json_type_string: case json_type_int: CHKN(value = json_object_get_string(field)); CHKR(ln_addValue_CSV(value, str)); break; case json_type_null: case json_type_boolean: case json_type_double: case json_type_object: CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1)); break; default: CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1)); } r = 0; done: return r; } int ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData) { int r = -1; int needComma = 0; struct json_object *field; char *namelist = NULL, *name, *nn; assert(json != NULL); assert(json_object_is_type(json, json_type_object)); if((*str = es_newStr(256)) == NULL) goto done; if(extraData == NULL) goto done; CHKN(namelist = es_str2cstr(extraData, NULL)); for (name = namelist; name != NULL; name = nn) { for (nn = name; *nn != '\0' && *nn != ',' && *nn != ' '; nn++) { /* do nothing */ } if (*nn == '\0') { nn = NULL; } else { *nn = '\0'; nn++; } json_object_object_get_ex(json, name, &field); if (needComma) { CHKR(es_addChar(str, ',')); } else { needComma = 1; } if (field != NULL) { CHKR(es_addChar(str, '"')); ln_addField_CSV(field, str); CHKR(es_addChar(str, '"')); } } r = 0; done: if (namelist != NULL) free(namelist); return r; } liblognorm-2.1.0/src/enc_syslog.c000066400000000000000000000123711520037563000167630ustar00rootroot00000000000000/** * @file enc_syslog.c * Encoder for syslog format. * This file contains code from all related objects that is required in * order to encode syslog format. The core idea of putting all of this into * a single file is that this makes it very straightforward to write * encoders for different encodings, as all is in one place. */ /* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2016 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include "internal.h" #include "liblognorm.h" #include "enc.h" static int ln_addValue_Syslog(const char *value, es_str_t **str) { int r; es_size_t i; assert(str != NULL); assert(*str != NULL); assert(value != NULL); for(i = 0; i < strlen(value); i++) { switch(value[i]) { case '\0': es_addChar(str, '\\'); es_addChar(str, '0'); break; case '\n': es_addChar(str, '\\'); es_addChar(str, 'n'); break; /* TODO : add rest of control characters here... */ case ',': /* comma is CEE-reserved for lists */ es_addChar(str, '\\'); es_addChar(str, ','); break; #if 0 /* alternative encoding for discussion */ case '^': /* CEE-reserved for lists */ es_addChar(str, '\\'); es_addChar(str, '^'); break; #endif /* at this layer ... do we need to think about transport * encoding at all? Or simply leave it to the transport agent? */ case '\\': /* RFC5424 reserved */ es_addChar(str, '\\'); es_addChar(str, '\\'); break; case ']': /* RFC5424 reserved */ es_addChar(str, '\\'); es_addChar(str, ']'); break; case '\"': /* RFC5424 reserved */ es_addChar(str, '\\'); es_addChar(str, '\"'); break; default: es_addChar(str, value[i]); break; } } r = 0; return r; } static int ln_addField_Syslog(char *name, struct json_object *field, es_str_t **str) { int r; const char *value; int needComma = 0; struct json_object *obj; int i; assert(field != NULL); assert(str != NULL); assert(*str != NULL); CHKR(es_addBuf(str, name, strlen(name))); CHKR(es_addBuf(str, "=\"", 2)); switch(json_object_get_type(field)) { case json_type_array: for (i = json_object_array_length(field) - 1; i >= 0; i--) { if(needComma) es_addChar(str, ','); else needComma = 1; CHKN(obj = json_object_array_get_idx(field, i)); CHKN(value = json_object_get_string(obj)); CHKR(ln_addValue_Syslog(value, str)); } break; case json_type_string: case json_type_int: CHKN(value = json_object_get_string(field)); CHKR(ln_addValue_Syslog(value, str)); break; case json_type_null: case json_type_boolean: case json_type_double: case json_type_object: CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1)); break; default: CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1)); } CHKR(es_addChar(str, '\"')); r = 0; done: return r; } static inline int ln_addTags_Syslog(struct json_object *taglist, es_str_t **str) { int r = 0; struct json_object *tagObj; int needComma = 0; const char *tagCstr; int i; assert(json_object_is_type(taglist, json_type_array)); CHKR(es_addBuf(str, " event.tags=\"", 13)); for (i = json_object_array_length(taglist) - 1; i >= 0; i--) { if(needComma) es_addChar(str, ','); else needComma = 1; CHKN(tagObj = json_object_array_get_idx(taglist, i)); CHKN(tagCstr = json_object_get_string(tagObj)); CHKR(es_addBuf(str, (char*)tagCstr, strlen(tagCstr))); } es_addChar(str, '"'); done: return r; } int ln_fmtEventToRFC5424(struct json_object *json, es_str_t **str) { int r = -1; struct json_object *tags; assert(json != NULL); assert(json_object_is_type(json, json_type_object)); if((*str = es_newStr(256)) == NULL) goto done; es_addBuf(str, "[cee@115", 8); if(json_object_object_get_ex(json, "event.tags", &tags)) { CHKR(ln_addTags_Syslog(tags, str)); } struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { char *const name = (char*)json_object_iter_peek_name(&it); if (strcmp(name, "event.tags")) { es_addChar(str, ' '); ln_addField_Syslog(name, json_object_iter_peek_value(&it), str); } json_object_iter_next(&it); } es_addChar(str, ']'); done: return r; } liblognorm-2.1.0/src/enc_xml.c000066400000000000000000000131041520037563000162360ustar00rootroot00000000000000/** * @file enc-xml.c * Encoder for XML format. * * This file contains code from all related objects that is required in * order to encode this format. The core idea of putting all of this into * a single file is that this makes it very straightforward to write * encoders for different encodings, as all is in one place. * */ /* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2016 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include "lognorm.h" #include "internal.h" #include "enc.h" #if 0 static char hexdigit[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; #endif /* TODO: XML encoding for Unicode characters is as of RFC4627 not fully * supported. The algorithm is that we must build the wide character from * UTF-8 (if char > 127) and build the full 4-octet Unicode character out * of it. Then, this needs to be encoded. Currently, we work on a * byte-by-byte basis, which simply is incorrect. * rgerhards, 2010-11-09 */ static int ln_addValue_XML(const char *value, es_str_t **str) { int r; unsigned char c; es_size_t i; #if 0 char numbuf[4]; int j; #endif assert(str != NULL); assert(*str != NULL); assert(value != NULL); // TODO: support other types! es_addBuf(str, "", 7); for(i = 0 ; i < strlen(value) ; ++i) { c = value[i]; switch(c) { case '\0': es_addBuf(str, "�", 5); break; #if 0 case '\n': es_addBuf(str, " ", 5); break; case '\r': es_addBuf(str, " ", 5); break; case '\t': es_addBuf(str, "&x08;", 5); break; case '\"': es_addBuf(str, """, 6); break; #endif case '<': es_addBuf(str, "<", 4); break; case '&': es_addBuf(str, "&", 5); break; #if 0 case ',': es_addBuf(str, "\\,", 2); break; case '\'': es_addBuf(str, "'", 6); break; #endif default: es_addChar(str, c); #if 0 /* TODO : proper Unicode encoding (see header comment) */ for(j = 0 ; j < 4 ; ++j) { numbuf[3-j] = hexdigit[c % 16]; c = c / 16; } es_addBuf(str, "\\u", 2); es_addBuf(str, numbuf, 4); break; #endif } } es_addBuf(str, "", 8); r = 0; return r; } static int ln_addField_XML(char *name, struct json_object *field, es_str_t **str) { int r; int i; const char *value; struct json_object *obj; assert(field != NULL); assert(str != NULL); assert(*str != NULL); CHKR(es_addBuf(str, "", 2)); switch(json_object_get_type(field)) { case json_type_array: for (i = json_object_array_length(field) - 1; i >= 0; i--) { CHKN(obj = json_object_array_get_idx(field, i)); CHKN(value = json_object_get_string(obj)); CHKR(ln_addValue_XML(value, str)); } break; case json_type_string: case json_type_int: CHKN(value = json_object_get_string(field)); CHKR(ln_addValue_XML(value, str)); break; case json_type_null: case json_type_boolean: case json_type_double: case json_type_object: CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1)); break; default: CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1)); } CHKR(es_addBuf(str, "", 8)); r = 0; done: return r; } static inline int ln_addTags_XML(struct json_object *taglist, es_str_t **str) { int r = 0; struct json_object *tagObj; const char *tagCstr; int i; CHKR(es_addBuf(str, "", 12)); for (i = json_object_array_length(taglist) - 1; i >= 0; i--) { CHKR(es_addBuf(str, "", 5)); CHKN(tagObj = json_object_array_get_idx(taglist, i)); CHKN(tagCstr = json_object_get_string(tagObj)); CHKR(es_addBuf(str, (char*)tagCstr, strlen(tagCstr))); CHKR(es_addBuf(str, "", 6)); } CHKR(es_addBuf(str, "", 13)); done: return r; } int ln_fmtEventToXML(struct json_object *json, es_str_t **str) { int r = -1; struct json_object *tags; assert(json != NULL); assert(json_object_is_type(json, json_type_object)); if((*str = es_newStr(256)) == NULL) goto done; es_addBuf(str, "", 7); if(json_object_object_get_ex(json, "event.tags", &tags)) { CHKR(ln_addTags_XML(tags, str)); } struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { char *const name = (char*) json_object_iter_peek_name(&it); if (strcmp(name, "event.tags")) { ln_addField_XML(name, json_object_iter_peek_value(&it), str); } json_object_iter_next(&it); } es_addBuf(str, "", 8); done: return r; } liblognorm-2.1.0/src/helpers.h000066400000000000000000000005111520037563000162560ustar00rootroot00000000000000/** * @file pdag.c * @brief Implementation of the parse dag object. * @class ln_pdag pdag.h *//* * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_HELPERS_H #define LIBLOGNORM_HELPERS_H static inline int myisdigit(char c) { return (c >= '0' && c <= '9'); } #endif liblognorm-2.1.0/src/internal.h000066400000000000000000000107071520037563000164400ustar00rootroot00000000000000/** * @file internal.h * @brief Internal things just needed for building the library, but * not to be installed. *//** * @mainpage * Liblognorm is an easy to use and fast samples-based log normalization * library. * * It can be passed a stream of arbitrary log messages, one at a time, and for * each message it will output well-defined name-value pairs and a set of * tags describing the message. * * For further details, see it's initial announcement available at * https://rainer.gerhards.net/2010/10/introducing-liblognorm.html * * The public interface of this library is describe in liblognorm.h. * * Liblognorm fully supports Unicode. Like most Linux tools, it operates * on UTF-8 natively, called "passive mode". This was decided because we * so can keep the size of data structures small while still supporting * all of the world's languages (actually more than when we did UCS-2). * * At the technical level, we can handle UTF-8 multibyte sequences transparently. * Liblognorm needs to look at a few US-ASCII characters to do the * sample base parsing (things to indicate fields), so this is no * issue. Inside the parse tree, a multibyte sequence can simple be processed * as if it were a sequence of different characters that make up a their * own symbol each. In fact, this even allows for somewhat greater parsing * speed. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010-2026 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef INTERNAL_H_INCLUDED #define INTERNAL_H_INCLUDED /* the jump-misses-init gcc warning is overdoing when we jump to the * exit of a function to get proper finalization. So let's disable it. * rgerhards, 2018-04-25 */ #if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic ignored "-Wjump-misses-init" #endif #include "liblognorm.h" #include /* we need to turn off this warning, as it also comes up in C99 mode, which * we use. */ #ifndef _AIX #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #endif /* support for simple error checking */ #define CHKR(x) \ if((r = (x)) != 0) goto done #define CHKN(x) \ if((x) == NULL) { \ r = LN_NOMEM; \ goto done; \ } #define FAIL(e) {r = (e); goto done;} /** * @brief Convert an es_str_t to a null-terminated C string in-place. * * This function ensures that the internal buffer of the es_str_t has * enough space for a null terminator and adds it at the end of the string. * It returns a pointer to the internal buffer. * * @note The returned string is NOT a new allocation and MUST NOT be freed by the caller. * The memory is still owned by the es_str_t object and will be freed when * the es_str_t object is destroyed. * * @param[in,out] str Pointer to the es_str_t pointer to convert. * @return A pointer to the null-terminated string, or NULL on error. */ static inline char* ln_es_str2cstr(es_str_t **str) { int r = -1; char *buf; if (es_strlen(*str) == (*str)->lenBuf) { CHKR(es_extendBuf(str, 1)); } CHKN(buf = (char*)es_getBufAddr(*str)); buf[es_strlen(*str)] = '\0'; return buf; done: return NULL; } const char * ln_DataForDisplayCharTo(__attribute__((unused)) ln_ctx ctx, void *const pdata); const char * ln_DataForDisplayLiteral(__attribute__((unused)) ln_ctx ctx, void *const pdata); const char * ln_JsonConfLiteral(__attribute__((unused)) ln_ctx ctx, void *const pdata); /* here we add some stuff from the compatibility layer */ #ifndef HAVE_STRNDUP char * strndup(const char *s, size_t n); #endif #endif /* #ifndef INTERNAL_H_INCLUDED */ liblognorm-2.1.0/src/liblognorm.c000066400000000000000000000120371520037563000167610ustar00rootroot00000000000000/* This file implements the liblognorm API. * See header file for descriptions. * * liblognorm - a fast samples-based log normalization library * Copyright 2013 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include "liblognorm.h" #include "lognorm.h" #include "annot.h" #include "samp.h" #include "v1_liblognorm.h" #include "v1_ptree.h" #ifdef ENABLE_TURBO #include "turbo.h" #endif #define CHECK_CTX \ if(ctx->objID != LN_ObjID_CTX) { \ r = -1; \ goto done; \ } const char * ln_version(void) { return VERSION; } int ln_hasAdvancedStats(void) { #ifdef ADVANCED_STATS return 1; #else return 0; #endif } ln_ctx ln_initCtx(void) { ln_ctx ctx; if((ctx = calloc(1, sizeof(struct ln_ctx_s))) == NULL) goto done; #ifdef HAVE_JSON_GLOBAL_SET_STRING_HASH json_global_set_string_hash(JSON_C_STR_HASH_PERLLIKE); #endif #ifdef HAVE_JSON_GLOBAL_SET_PRINTBUF_INITIAL_SIZE json_global_set_printbuf_initial_size(2048); #endif ctx->objID = LN_ObjID_CTX; ctx->dbgCB = NULL; ctx->opts = 0; /* we add an root for the empty word, this simplifies parse * dag handling. */ if((ctx->pdag = ln_newPDAG(ctx)) == NULL) { free(ctx); ctx = NULL; goto done; } /* same for annotation set */ if((ctx->pas = ln_newAnnotSet(ctx)) == NULL) { ln_pdagDelete(ctx->pdag); free(ctx); ctx = NULL; goto done; } #ifdef ENABLE_TURBO /* Turbo ctx is deferred until LN_CTXOPT_TURBO is set via ln_setCtxOpts() */ ctx->turbo = NULL; #endif done: return ctx; } void ln_setCtxOpts(ln_ctx ctx, const unsigned opts) { ctx->opts |= opts; #ifdef ENABLE_TURBO /* Lazy-init turbo context on first LN_CTXOPT_TURBO request */ if((opts & LN_CTXOPT_TURBO) && ctx->turbo == NULL) { ctx->turbo = ln_turbo_ctx_init(); /* Non-fatal if NULL — will fall back to recursive walker */ } #endif } unsigned ln_getCtxOpts(ln_ctx ctx) { return ctx->opts; } int ln_exitCtx(ln_ctx ctx) { int r = 0; CHECK_CTX; ln_dbgprintf(ctx, "exitCtx %p", ctx); ctx->objID = LN_ObjID_None; /* prevent double free */ #ifdef ENABLE_TURBO if(ctx->turbo != NULL) { ln_turbo_ctx_free(ctx->turbo); ctx->turbo = NULL; } #endif /* support for old cruft */ if(ctx->ptree != NULL) ln_deletePTree(ctx->ptree); /* end support for old cruft */ if(ctx->pdag != NULL) ln_pdagDelete(ctx->pdag); for(int i = 0 ; i < ctx->nTypes ; ++i) { free((void*)ctx->type_pdags[i].name); ln_pdagDelete(ctx->type_pdags[i].pdag); } free(ctx->type_pdags); if(ctx->rulePrefix != NULL) es_deleteStr(ctx->rulePrefix); if(ctx->pas != NULL) ln_deleteAnnotSet(ctx->pas); free(ctx); done: return r; } int ln_setDebugCB(ln_ctx ctx, void (*cb)(void*, const char*, size_t), void *cookie) { int r = 0; CHECK_CTX; ctx->dbgCB = cb; ctx->dbgCookie = cookie; done: return r; } int ln_setErrMsgCB(ln_ctx ctx, void (*cb)(void*, const char*, size_t), void *cookie) { int r = 0; CHECK_CTX; ctx->errmsgCB = cb; ctx->errmsgCookie = cookie; done: return r; } /** * Attempt TurboVM compilation after rulebase load. * No-op when turbo is disabled or not requested. */ static void ln_tryTurboCompile(ln_ctx ctx, int load_result) { #ifdef ENABLE_TURBO if(load_result == 0 && ctx->turbo != NULL && (ctx->opts & LN_CTXOPT_TURBO)) { int r = ln_turbo_compile(ctx); if(r != 0) { ln_dbgprintf(ctx, "turbo VM compilation failed, " "using recursive walker"); } else { ln_dbgprintf(ctx, "turbo VM ready"); } } #else (void)ctx; (void)load_result; #endif } int ln_loadSamples(ln_ctx ctx, const char *file) { int r = 0; const char *tofree; CHECK_CTX; ctx->conf_file = tofree = strdup(file); ctx->conf_ln_nbr = 0; ++ctx->include_level; r = ln_sampLoad(ctx, file); if (r != -1) { --ctx->include_level; ctx->conf_file = NULL; } free((void*)tofree); ln_tryTurboCompile(ctx, r); done: return r; } int ln_loadSamplesFromString(ln_ctx ctx, const char *string) { int r = 0; const char *tofree; CHECK_CTX; ctx->conf_file = tofree = strdup("--NO-FILE--"); ctx->conf_ln_nbr = 0; ++ctx->include_level; r = ln_sampLoadFromString(ctx, string); --ctx->include_level; free((void*)tofree); ctx->conf_file = NULL; ln_tryTurboCompile(ctx, r); done: return r; } liblognorm-2.1.0/src/liblognorm.h000066400000000000000000000234201520037563000167640ustar00rootroot00000000000000/** * @file liblognorm.h * @brief The public liblognorm API. * * Functions other than those defined here MUST not be called by * a liblognorm "user" application. * * This file is meant to be included by applications using liblognorm. * For lognorm library files themselves, include "lognorm.h". *//** * @mainpage * Liblognorm is an easy to use and fast samples-based log normalization * library. * * It can be passed a stream of arbitrary log messages, one at a time, and for * each message it will output well-defined name-value pairs and a set of * tags describing the message. * * For further details, see it's initial announcement available at * https://rainer.gerhards.net/2010/10/introducing-liblognorm.html * * The public interface of this library is describe in liblognorm.h. * * Liblognorm fully supports Unicode. Like most Linux tools, it operates * on UTF-8 natively, called "passive mode". This was decided because we * so can keep the size of data structures small while still supporting * all of the world's languages (actually more than when we did UCS-2). * * At the technical level, we can handle UTF-8 multibyte sequences transparently. * Liblognorm needs to look at a few US-ASCII characters to do the * sample base parsing (things to indicate fields), so this is no * issue. Inside the parse tree, a multibyte sequence can simple be processed * as if it were a sequence of different characters that make up a their * own symbol each. In fact, this even allows for somewhat greater parsing * speed. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010-2017 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_H_INCLUDED #define LIBLOGNORM_H_INCLUDED #include /* we need size_t */ #include /* error codes */ #define LN_NOMEM -1 #define LN_INVLDFDESCR -1 #define LN_BADCONFIG -250 #define LN_BADPARSERSTATE -500 #define LN_WRONGPARSER -1000 #define LN_RB_LINE_TOO_LONG -1001 #define LN_OVER_SIZE_LIMIT -1002 /** * The library context descriptor. * This is used to permit multiple independent instances of the * library to be called within a single program. This is most * useful for plugin-based architectures. */ typedef struct ln_ctx_s* ln_ctx; /* API */ /** * Return library version string. * * Returns the version of the currently used library. * * @return Zero-Terminated library version string. */ /* Note: this MUST NOT be inline to make sure the actual library * has the right version, not just what was used to compile! */ const char *ln_version(void); /** * Return if library is build with advanced statistics * activated. * * @return 1 if advanced stats are active, 0 if not */ int ln_hasAdvancedStats(void); /** * Initialize a library context. * * To prevent memory leaks, ln_exitCtx() must be called on a library * context that is no longer needed. * * @return new library context or NULL if an error occurred */ ln_ctx ln_initCtx(void); /** * Inherit control attributes from a library context. * * This does not copy the parse-tree, but does copy * behaviour-controlling attributes such as enableRegex. * * Just as with ln_initCtx, ln_exitCtx() must be called on a library * context that is no longer needed. * * @return new library context or NULL if an error occurred */ ln_ctx ln_inherittedCtx(ln_ctx parent); /** * Discard a library context. * * Free's the resources associated with the given library context. It * MUST NOT be accessed after calling this function. * * @param ctx The context to be discarded. * * @return Returns zero on success, something else otherwise. */ int ln_exitCtx(ln_ctx ctx); /* binary values, so that we can "or" them together */ #define LN_CTXOPT_ALLOW_REGEX 0x01 /**< permit regex matching */ #define LN_CTXOPT_ADD_EXEC_PATH 0x02 /**< add exec_path attribute (time-consuming!) */ #define LN_CTXOPT_ADD_ORIGINALMSG 0x04 /**< always add original message to output (not just in error case) */ #define LN_CTXOPT_ADD_RULE 0x08 /**< add mockup rule */ #define LN_CTXOPT_ADD_RULE_LOCATION 0x10 /**< add rule location (file, lineno) to metadata */ #define LN_CTXOPT_TURBO 0x20 /**< enable TurboVM parsing */ /** * Set options on ctx. * * @param ctx The context to be modified. * @param opts a potentially or-ed list of options, see LN_CTXOPT_* */ void ln_setCtxOpts(ln_ctx ctx, unsigned opts); /** * Set a debug message handler (callback). * * Liblognorm can provide helpful information for debugging * - it's internal processing * - the way a log message is being normalized * * It does so by emitting "interesting" information about its processing * at various stages. A caller can obtain this information by registering * an entry point. When done so, liblognorm will call the entry point * whenever it has something to emit. Note that debugging can be rather * verbose. * * The callback will be called with the following three parameters in that order: * - the caller-provided cookie * - a zero-terminated string buffer * - the length of the string buffer, without the trailing NUL byte * * @note * The provided callback function must not call any liblognorm * APIs except when specifically flagged as safe for calling by a debug * callback handler. * * @param[in] ctx The library context to apply callback to. * @param[in] cb The function to be called for debugging * @param[in] cookie Opaque cookie to be passed down to debug handler. Can be * used for some state tracking by the caller. This is defined as * void* to support pointers. To play it safe, a pointer should be * passed (but adventurous folks may also use an unsigned). * * @return Returns zero on success, something else otherwise. */ int ln_setDebugCB(ln_ctx ctx, void (*cb)(void*, const char*, size_t), void *cookie); /** * Set a error message handler (callback). * * If set, this is used to emit error messages of interest to the user, e.g. * on failures during rulebase load. It is suggested that a caller uses this * feedback to aid its users in resolving issues. * Its semantics are otherwise exactly the same like ln_setDebugCB(). */ int ln_setErrMsgCB(ln_ctx ctx, void (*cb)(void*, const char*, size_t), void *cookie); /** * enable or disable debug mode. * * @param[in] ctx context * @param[in] b boolean 0 - disable debug mode, 1 - enable debug mode */ void ln_enableDebug(ln_ctx ctx, int i); /** * Load a (log) sample file. * * The file must contain log samples in syntactically correct format. Samples are added * to set already loaded in the current context. If there is a sample with duplicate * semantics, this sample will be ignored. Most importantly, this can \b not be used * to change tag assignments for a given sample. * * @param[in] ctx The library context to apply callback to. * @param[in] file Name of file to be loaded. * * @return Returns zero on success, something else otherwise. */ int ln_loadSamples(ln_ctx ctx, const char *file); /** * Load a rulebase via a string. * * Note: this can only load v2 samples, v1 is NOT supported. * * @param[in] ctx The library context to apply callback to. * @param[in] string The string with the actual rulebase. * * @return Returns zero on success, something else otherwise. */ int ln_loadSamplesFromString(ln_ctx ctx, const char *string); /** * Normalize a message. * * This is the main library entry point. It is called with a message * to normalize and will return a normalized in-memory representation * of it. * * If an error occurs, the function returns -1. In that case, an * in-memory event representation has been generated if event is * non-NULL. In that case, the event contains further error details in * normalized form. * * @note * This function works on byte-counted strings and as such is able to * process NUL bytes if they occur inside the message. On the other hand, * this means the the correct messages size, \b excluding the NUL byte, * must be provided. * * @param[in] ctx The library context to use. * @param[in] str The message string (see note above). * @param[in] strLen The length of the message in bytes. * @param[out] json_p A new event record or NULL if an error occurred. Must be * destructed if no longer needed. * * @return Returns zero on success, something else otherwise. */ int ln_normalize(ln_ctx ctx, const char *str, const size_t strLen, struct json_object **json_p); /** * Get current context options bitmask. * @return current options */ unsigned ln_getCtxOpts(ln_ctx ctx); /** * Normalize and produce JSON string directly. * Uses TurboVM fast-path when available, falls back to recursive walker. * Caller must free *json_str via free() on success. * @return 0 on success */ int ln_normalize_to_str(ln_ctx ctx, const char *str, const size_t strLen, char **json_str, size_t *json_len); #endif /* #ifndef LOGNORM_H_INCLUDED */ liblognorm-2.1.0/src/lognorm-features.h.in000066400000000000000000000001741520037563000205170ustar00rootroot00000000000000#if @FEATURE_REGEXP@ #define LOGNORM_REGEX_SUPPORTED 1 #endif #if @FEATURE_TURBO@ #define LOGNORM_TURBO_SUPPORTED 1 #endif liblognorm-2.1.0/src/lognorm.c000066400000000000000000000067441520037563000163020ustar00rootroot00000000000000/* liblognorm - a fast samples-based log normalization library * Copyright 2010 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * Released under ASL 2.0 */ #include "config.h" #include #include #include #include #include "liblognorm.h" #include "lognorm.h" /* Code taken from rsyslog ASL 2.0 code. * From varmojfekoj's mail on why he provided rs_strerror_r(): * There are two problems with strerror_r(): * I see you've rewritten some of the code which calls it to use only * the supplied buffer; unfortunately the GNU implementation sometimes * doesn't use the buffer at all and returns a pointer to some * immutable string instead, as noted in the man page. * * The other problem is that on some systems strerror_r() has a return * type of int. * * So I've written a wrapper function rs_strerror_r(), which should * take care of all this and be used instead. */ static char * rs_strerror_r(const int errnum, char *const buf, const size_t buflen) { #ifndef HAVE_STRERROR_R char *pszErr; pszErr = strerror(errnum); snprintf(buf, buflen, "%s", pszErr); #else # ifdef STRERROR_R_CHAR_P char *p = strerror_r(errnum, buf, buflen); if (p != buf) { strncpy(buf, p, buflen); buf[buflen - 1] = '\0'; } # else strerror_r(errnum, buf, buflen); # endif #endif return buf; } /** * Generate some debug message and call the caller provided callback. * * Will first check if a user callback is registered. If not, returns * immediately. */ void ln_dbgprintf(ln_ctx ctx, const char *fmt, ...) { va_list ap; char buf[8*1024]; size_t lenBuf; if(ctx->dbgCB == NULL) goto done; va_start(ap, fmt); lenBuf = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); if(lenBuf >= sizeof(buf)) { /* prevent buffer overruns and garbage display */ buf[sizeof(buf) - 5] = '.'; buf[sizeof(buf) - 4] = '.'; buf[sizeof(buf) - 3] = '.'; buf[sizeof(buf) - 2] = '\n'; buf[sizeof(buf) - 1] = '\0'; lenBuf = sizeof(buf) - 1; } ctx->dbgCB(ctx->dbgCookie, buf, lenBuf); done: return; } /** * Generate error message and call the caller provided callback. * eno is the OS errno. If non-zero, the OS error description * will be added after the user-provided string. * * Will first check if a user callback is registered. If not, returns * immediately. */ void ln_errprintf(const ln_ctx ctx, const int eno, const char *fmt, ...) { va_list ap; char buf[8*1024]; char errbuf[1024]; char finalbuf[9*1024]; size_t lenBuf; char *msg; if(ctx->errmsgCB == NULL) goto done; va_start(ap, fmt); lenBuf = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); if(lenBuf >= sizeof(buf)) { /* prevent buffer overruns and garbage display */ buf[sizeof(buf) - 5] = '.'; buf[sizeof(buf) - 4] = '.'; buf[sizeof(buf) - 3] = '.'; buf[sizeof(buf) - 2] = '\n'; buf[sizeof(buf) - 1] = '\0'; lenBuf = sizeof(buf) - 1; } if(eno != 0) { rs_strerror_r(eno, errbuf, sizeof(errbuf)-1); lenBuf = snprintf(finalbuf, sizeof(finalbuf), "%s: %s", buf, errbuf); msg = finalbuf; } else { msg = buf; } if(ctx->conf_file != NULL) { /* error during config processing, add line info */ const char *const m = strdup(msg); lenBuf = snprintf(finalbuf, sizeof(finalbuf), "rulebase file %s[%d]: %s", ctx->conf_file, ctx->conf_ln_nbr, m); msg = finalbuf; free((void*) m); } ctx->errmsgCB(ctx->errmsgCookie, msg, lenBuf); ln_dbgprintf(ctx, "%s", msg); done: return; } void ln_enableDebug(ln_ctx ctx, int i) { ctx->debug = i & 0x01; } liblognorm-2.1.0/src/lognorm.h000066400000000000000000000070301520037563000162740ustar00rootroot00000000000000/** * @file lognorm.h * @brief Private data structures used by the liblognorm API. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_LOGNORM_HINCLUDED #define LIBLOGNORM_LOGNORM_HINCLUDED #include /* we need size_t */ #include "liblognorm.h" #include "pdag.h" #include "annot.h" /* some limits */ #define MAX_FIELDNAME_LEN 1024 #define MAX_TYPENAME_LEN 1024 #define LN_ObjID_None 0xFEFE0001 #define LN_ObjID_CTX 0xFEFE0001 struct ln_type_pdag { const char *name; ln_pdag *pdag; }; struct ln_ctx_s { unsigned objID; /**< a magic number to prevent some memory addressing errors */ void (*dbgCB)(void *cookie, const char *msg, size_t lenMsg); /**< user-provided debug output callback */ void *dbgCookie; /**< cookie to be passed to debug callback */ void (*errmsgCB)(void *cookie, const char *msg, size_t lenMsg); /**< user-provided error message callback */ void *errmsgCookie; /**< cookie to be passed to error message callback */ ln_pdag *pdag; /**< parse dag being used by this context */ ln_annotSet *pas; /**< associated set of annotations */ unsigned nNodes; /**< number of nodes in our parse tree */ unsigned char debug; /**< boolean: are we in debug mode? */ es_str_t *rulePrefix; /**< work variable for loading rule bases * this is the prefix string that will be prepended * to all rules before they are submitted to tree * building. */ unsigned opts; /**< specific options, see LN_CTXOPTS_* defines */ #ifdef ENABLE_TURBO void *turbo; /**< TurboVM context (ln_turbo_ctx_t), NULL if disabled */ #endif struct ln_type_pdag *type_pdags; /**< array of our type pdags */ int nTypes; /**< number of type pdags */ int version; /**< 1 or 2, depending on rulebase/algo version */ /* here follows stuff for the v1 subsystem -- do NOT make any changes * down here. This is strictly read-only. May also be removed some time in * the future. */ struct ln_ptree *ptree; /* end old cruft */ /* things for config processing / error message during it */ int include_level; /**< 1 for main rulebase file, higher for include levels */ const char *conf_file; /**< currently open config file or NULL, if none */ unsigned int conf_ln_nbr; /**< current config file line number */ }; void ln_dbgprintf(ln_ctx ctx, const char *fmt, ...) __attribute__((format(printf, 2, 3))); void ln_errprintf(ln_ctx ctx, const int eno, const char *fmt, ...) __attribute__((format(printf, 3, 4))); #define LN_DBGPRINTF(ctx, ...) if(ctx->dbgCB != NULL) { ln_dbgprintf(ctx, __VA_ARGS__); } //#define LN_DBGPRINTF(ctx, ...) #endif /* #ifndef LIBLOGNORM_LOGNORM_HINCLUDED */ liblognorm-2.1.0/src/lognormalizer.c000066400000000000000000000357461520037563000175150ustar00rootroot00000000000000/** * @file normalizer.c * @brief A small tool to normalize data. * * This is the most basic example demonstrating how to use liblognorm. * It loads log samples from the files specified on the command line, * reads to-be-normalized data from stdin and writes the normalized * form to stdout. Besides being an example, it also carries out useful * processing. * * @author Rainer Gerhards * *//* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2016 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #ifdef _AIX #include #else #include #endif #include #include "liblognorm.h" #include "lognorm.h" #include "enc.h" /* we need to turn off this warning, as it also comes up in C99 mode, which * we use. */ #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" static ln_ctx ctx; static int verbose = 0; #define OUTPUT_PARSED_RECS 0x01 #define OUTPUT_UNPARSED_RECS 0x02 static int recOutput = OUTPUT_PARSED_RECS | OUTPUT_UNPARSED_RECS; /**< controls which records to output */ static int outputSummaryLine = 0; static int outputNbrUnparsed = 0; static int addErrLineNbr = 0; /**< add line number info to unparsed events */ static int flatTags = 0; /**< print event.tags in JSON? */ static FILE *fpDOT; static es_str_t *encFmt = NULL; /**< a format string for encoder use */ static es_str_t *mandatoryTag = NULL; /**< tag which must be given so that mesg will be output. NULL=all */ static enum { f_syslog, f_json, f_xml, f_csv, f_raw } outfmt = f_json; static void errCallBack(void __attribute__((unused)) *cookie, const char *msg, size_t __attribute__((unused)) lenMsg) { fprintf(stderr, "liblognorm error: %s\n", msg); } static void dbgCallBack(void __attribute__((unused)) *cookie, const char *msg, size_t __attribute__((unused)) lenMsg) { fprintf(stderr, "liblognorm: %s\n", msg); } static void complain(const char *errmsg) { fprintf(stderr, "%s\n", errmsg); } /* rawmsg is, as the name says, the raw message, in case we have * "raw" formatter requested. */ static void outputEvent(struct json_object *json, const char *const rawmsg) { char *cstr = NULL; es_str_t *str = NULL; if(outfmt == f_raw) { printf("%s\n", rawmsg); return; } switch(outfmt) { case f_json: if(!flatTags) { json_object_object_del(json, "event.tags"); } cstr = (char*)json_object_to_json_string(json); break; case f_syslog: ln_fmtEventToRFC5424(json, &str); break; case f_xml: ln_fmtEventToXML(json, &str); break; case f_csv: ln_fmtEventToCSV(json, &str, encFmt); break; case f_raw: fprintf(stderr, "program error: f_raw should not occur " "here (file %s, line %d)\n", __FILE__, __LINE__); abort(); break; default: fprintf(stderr, "program error: default case should not occur " "here (file %s, line %d)\n", __FILE__, __LINE__); abort(); break; } if (str != NULL) cstr = es_str2cstr(str, NULL); if(verbose > 0) fprintf(stderr, "normalized: '%s'\n", cstr); printf("%s\n", cstr); if (str != NULL) free(cstr); es_deleteStr(str); } /* test if the tag exists */ static int eventHasTag(struct json_object *json, const char *tag) { struct json_object *tagbucket, *tagObj; int i; const char *tagCstr; if (tag == NULL) return 1; if (json_object_object_get_ex(json, "event.tags", &tagbucket)) { if (json_object_get_type(tagbucket) == json_type_array) { for (i = json_object_array_length(tagbucket) - 1; i >= 0; i--) { tagObj = json_object_array_get_idx(tagbucket, i); tagCstr = json_object_get_string(tagObj); if (!strcmp(tag, tagCstr)) return 1; } } } if (verbose > 1) printf("Mandatory tag '%s' has not been found\n", tag); return 0; } static void amendLineNbr(json_object *const json, const int line_nbr) { if(addErrLineNbr) { struct json_object *jval; jval = json_object_new_int(line_nbr); json_object_object_add(json, "lognormalizer.line_nbr", jval); } } #define DEFAULT_LINE_SIZE (10 * 1024) static char * read_line(FILE *fp) { size_t line_capacity = DEFAULT_LINE_SIZE; char *line = NULL; size_t line_len = 0; int ch = 0; do { ch = fgetc(fp); if (ch == EOF) break; if (line == NULL) { line = malloc(line_capacity); } else if (line_len == line_capacity) { line_capacity *= 2; line = realloc(line, line_capacity); } if (line == NULL) { fprintf(stderr, "Couldn't allocate working-buffer for log-line\n"); return NULL; } line[line_len++] = ch; } while(ch != '\n'); if (line != NULL) { line[--line_len] = '\0'; if(line_len > 0 && line[line_len - 1] == '\r') line[--line_len] = '\0'; } return line; } /* normalize input data */ static void normalize(void) { FILE *fp = stdin; char *line = NULL; ssize_t line_len; struct json_object *json = NULL; long long unsigned numParsed = 0; long long unsigned numUnparsed = 0; long long unsigned numWrongTag = 0; char *mandatoryTagCstr = NULL; int line_nbr = 0; /* must be int to keep compatible with older json-c */ int turbo_mode = 0; if (ln_getCtxOpts(ctx) & LN_CTXOPT_TURBO) { turbo_mode = 1; } if(verbose > 0) fprintf(stderr, "Turbo mode: '%d'\n", turbo_mode); if (mandatoryTag != NULL) { mandatoryTagCstr = es_str2cstr(mandatoryTag, NULL); } if (turbo_mode) { /* --- TURBO FAST PATH --- */ while ((line = read_line(fp)) != NULL) { char *json_str; size_t json_len; int r; line_len = (ssize_t)strlen(line); ++line_nbr; if (line_len == 0) { free(line); line = NULL; continue; } if(verbose > 0) fprintf(stderr, "To normalize: '%s'\n", line); json_str = NULL; json_len = 0; r = ln_normalize_to_str(ctx, line, (size_t)line_len, &json_str, &json_len); if (r == 0 && json_str) { numParsed++; if (recOutput & OUTPUT_PARSED_RECS) { printf("%s\n", json_str); } free(json_str); } else { numUnparsed++; if (recOutput & OUTPUT_UNPARSED_RECS) { struct json_object *unp = json_object_new_object(); json_object_object_add(unp, "unparsed-data", json_object_new_string(line)); printf("%s\n", json_object_to_json_string(unp)); json_object_put(unp); } } free(line); line = NULL; } } else { /* --- LEGACY PATH --- */ while((line = read_line(fp)) != NULL) { ++line_nbr; if(verbose > 0) fprintf(stderr, "To normalize: '%s'\n", line); ln_normalize(ctx, line, strlen(line), &json); if(json != NULL) { if(eventHasTag(json, mandatoryTagCstr)) { struct json_object *dummy; const int parsed = !json_object_object_get_ex( json, "unparsed-data", &dummy); if(parsed) { numParsed++; if(recOutput & OUTPUT_PARSED_RECS) { outputEvent(json, line); } } else { numUnparsed++; amendLineNbr(json, line_nbr); if(recOutput & OUTPUT_UNPARSED_RECS) { outputEvent(json, line); } } } else { numWrongTag++; } json_object_put(json); json = NULL; } free(line); } } if(outputNbrUnparsed && numUnparsed > 0) fprintf(stderr, "%llu unparsable entries\n", numUnparsed); if(numWrongTag > 0) fprintf(stderr, "%llu entries with wrong tag dropped\n", numWrongTag); if(outputSummaryLine) { fprintf(stderr, "%llu records processed, %llu parsed, %llu unparsed\n", numParsed+numUnparsed, numParsed, numUnparsed); } free(mandatoryTagCstr); } /** * Generate a command file for the GNU DOT tools. */ static void genDOT(void) { es_str_t *str; str = es_newStr(1024); ln_genDotPDAGGraph(ctx->pdag, &str); fwrite(es_getBufAddr(str), 1, es_strlen(str), fpDOT); } static void printVersion(void) { fprintf(stderr, "lognormalizer version: " VERSION "\n"); fprintf(stderr, "liblognorm version: %s\n", ln_version()); fprintf(stderr, "\tadvanced stats: %s\n", ln_hasAdvancedStats() ? "available" : "not available"); } static void handle_generic_option(const char* opt) { if (strcmp("allowRegex", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ALLOW_REGEX); } else if (strcmp("addExecPath", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_EXEC_PATH); } else if (strcmp("addOriginalMsg", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_ORIGINALMSG); } else if (strcmp("addRule", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE); } else if (strcmp("addRuleLocation", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE_LOCATION); #ifdef ENABLE_TURBO } else if (strcmp("turbo", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_TURBO); #endif } else { fprintf(stderr, "invalid -o option '%s'\n", opt); exit(1); } } static void usage(void) { fprintf(stderr, "Options:\n" " -r Rulebase to use. This is required option\n" " -H print summary line (nbr of msgs Handled)\n" " -U print number of unparsed messages (only if non-zero)\n" " -e\n" " Change output format. By default, json is used\n" " Raw is exactly like the input. It is useful in combination\n" " with -p/-P options to extract known good/bad messages\n" " -E Encoder-specific format (used for CSV, read docs)\n" " -T Include 'event.tags' in JSON format\n" " -oallowRegex Allow regexp matching (read docs about performance penalty)\n" " -oaddRule Add a mockup of the matching rule.\n" " -oaddRuleLocation Add location of matching rule to metadata\n" " -oaddExecPath Add exec_path attribute to output\n" " -oaddOriginalMsg Always add original message to output, not just in error case\n" #ifdef ENABLE_TURBO " -oturbo Enable TurboVM fast path for JSON output\n" #endif " -p Print back only if the message has been parsed successfully\n" " -P Print back only if the message has NOT been parsed successfully\n" " -L Add source file line number information to unparsed line output\n" " -t Print back only messages matching the tag\n" " -v Print debug. When used 3 times, prints parse DAG\n" " -V Print version information\n" " -d Print DOT file to stdout and exit\n" " -d Save DOT file to the filename\n" " -s Print parse dag statistics and exit\n" " -S Print extended parse dag statistics and exit (includes -s)\n" " -x Print statistics as dot file (called only)\n" "\n" ); } int main(int argc, char *argv[]) { int opt; char *repository = NULL; int usedRB = 0; /* 0=no rule; 1=rule from rulebase; 2=rule from string */ int ret = 0; FILE *fpStats = NULL; FILE *fpStatsDOT = NULL; int extendedStats = 0; if((ctx = ln_initCtx()) == NULL) { complain("Could not initialize liblognorm context"); ret = 1; goto exit; } while((opt = getopt(argc, argv, "d:s:S:e:r:R:E:vVpPt:To:hHULx:")) != -1) { switch (opt) { case 'V': printVersion(); exit(0); break; case 'd': /* generate DOT file */ if(!strcmp(optarg, "")) { fpDOT = stdout; } else { if((fpDOT = fopen(optarg, "w")) == NULL) { perror(optarg); complain("Cannot open DOT file"); ret = 1; goto exit; } } break; case 'x': /* generate statistics DOT file */ if(!strcmp(optarg, "")) { fpStatsDOT = stdout; } else { if((fpStatsDOT = fopen(optarg, "w")) == NULL) { perror(optarg); complain("Cannot open statistics DOT file"); ret = 1; goto exit; } } break; case 'S': /* generate pdag statistic file */ extendedStats = 1; /* INTENTIONALLY NO BREAK! - KEEP order! */ /*FALLTHROUGH*/ case 's': /* generate pdag statistic file */ if(!strcmp(optarg, "-")) { fpStats = stdout; } else { if((fpStats = fopen(optarg, "w")) == NULL) { perror(optarg); complain("Cannot open parser statistics file"); ret = 1; goto exit; } } break; case 'v': verbose++; break; case 'E': /* encoder-specific format string (will be validated by encoder) */ encFmt = es_newStrFromCStr(optarg, strlen(optarg)); break; case 'p': recOutput = OUTPUT_PARSED_RECS; break; case 'P': recOutput = OUTPUT_UNPARSED_RECS; break; case 'H': outputSummaryLine = 1; break; case 'U': outputNbrUnparsed = 1; break; case 'L': addErrLineNbr = 1; break; case 'T': flatTags = 1; break; case 'e': /* encoder to use */ if(!strcmp(optarg, "json")) { outfmt = f_json; } else if(!strcmp(optarg, "xml")) { outfmt = f_xml; } else if(!strcmp(optarg, "cee-syslog")) { outfmt = f_syslog; } else if(!strcmp(optarg, "csv")) { outfmt = f_csv; } else if(!strcmp(optarg, "raw")) { outfmt = f_raw; } break; case 'r': /* rule base to use */ if(usedRB != 2) { repository = optarg; usedRB = 1; } else { usedRB = -1; } break; case 'R': if(usedRB != 1) { repository = optarg; usedRB = 2; } else { usedRB = -1; } break; case 't': /* if given, only messages tagged with the argument are output */ mandatoryTag = es_newStrFromCStr(optarg, strlen(optarg)); break; case 'o': handle_generic_option(optarg); break; case 'h': default: usage(); ret = 1; goto exit; break; } } if(repository == NULL) { complain("Samples repository or String must be given (-r or -R)"); ret = 1; goto exit; } if(usedRB == -1) { complain("Only use one rulebase (-r or -R)"); ret = 1; goto exit; } ln_setErrMsgCB(ctx, errCallBack, NULL); if(verbose) { ln_setDebugCB(ctx, dbgCallBack, NULL); ln_enableDebug(ctx, 1); } if(usedRB == 1) { if(ln_loadSamples(ctx, repository)) { fprintf(stderr, "fatal error: cannot load rulebase\n"); exit(1); } } else if(usedRB == 2) { if(ln_loadSamplesFromString(ctx, repository)) { fprintf(stderr, "fatal error: cannot load rule from String\n"); exit(1); } } if(verbose > 0) fprintf(stderr, "number of tree nodes: %d\n", ctx->nNodes); if(fpDOT != NULL) { genDOT(); ret=1; goto exit; } if(verbose > 2) ln_displayPDAG(ctx); normalize(); if(fpStats != NULL) { ln_fullPdagStats(ctx, fpStats, extendedStats); } if(fpStatsDOT != NULL) { ln_fullPDagStatsDOT(ctx, fpStatsDOT); } exit: if (ctx) ln_exitCtx(ctx); if (encFmt != NULL) free(encFmt); return ret; } liblognorm-2.1.0/src/parser.c000066400000000000000000003247721520037563000161250ustar00rootroot00000000000000/* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "liblognorm.h" #include "lognorm.h" #include "internal.h" #include "parser.h" #include "samp.h" #include "helpers.h" /* how should output values be formatted? */ enum FMT_MODE { FMT_AS_STRING = 0, FMT_AS_NUMBER = 1, FMT_AS_TIMESTAMP_UX = 2, FMT_AS_TIMESTAMP_UX_MS = 3 }; /* some helpers */ static inline int hParseInt(const unsigned char **buf, size_t *lenBuf) { const unsigned char *p = *buf; size_t len = *lenBuf; int i = 0; while(len > 0 && myisdigit(*p)) { i = i * 10 + *p - '0'; ++p; --len; } *buf = p; *lenBuf = len; return i; } /* parser _parse interface * * All parsers receive * * @param[in] npb->str the to-be-parsed string * @param[in] npb->strLen length of the to-be-parsed string * @param[in] offs an offset into the string * @param[in] pointer to parser data block * @param[in] pointer to current parser's name * @param[out] parsed bytes * @param[out] value ptr to json object containing parsed data * (can be unused, but if used *value MUST be NULL on entry) * * They will try to parse out "their" object from the string. If they * succeed, they: * * return 0 on success and LN_WRONGPARSER if this parser could * not successfully parse (but all went well otherwise) and something * else in case of an error. */ #define PARSER_Parse(ParserName) \ int ln_v2_parse##ParserName( \ npb_t *const npb, \ size_t *const offs, \ __attribute__((unused)) void *const pdata, \ __attribute__((unused)) const char *parser_name, \ size_t *parsed, \ struct json_object **value) \ { \ int r = LN_WRONGPARSER; \ *parsed = 0; #define FAILParser \ goto parserdone; /* suppress warnings */ \ parserdone: \ r = 0; \ goto done; /* suppress warnings */ \ done: #define ENDFailParser \ return r; \ } /* Return printable representation of parser content for * display purposes. This must not be 100% exact, but provide * a good indication of what it contains for a human. * @param[data] data parser data block * @return pointer to c string, NOT to be freed */ #define PARSER_DataForDisplay(ParserName) \ const char * ln_DataForDisplay##ParserName(__attribute__((unused)) ln_ctx ctx, void *const pdata) /* Return JSON parser config. This is primarily for comparison * of parser equalness. * @param[data] data parser data block * @return pointer to c string, NOT to be freed */ #define PARSER_JsonConf(ParserName) \ const char * ln_JsonConf##ParserName(__attribute__((unused)) ln_ctx ctx, void *const pdata) /* parser constructor * @param[in] json config json items * @param[out] data parser data block (to be allocated) * At minimum, *data must be set to NULL * @return error status (0 == OK) */ #define PARSER_Construct(ParserName) \ int ln_construct##ParserName( \ __attribute__((unused)) ln_ctx ctx, \ __attribute__((unused)) json_object *const json, \ void **pdata) /* parser destructor * @param[data] data parser data block (to be de-allocated) */ #define PARSER_Destruct(ParserName) \ void ln_destruct##ParserName(__attribute__((unused)) ln_ctx ctx, void *const pdata) /* the following table saves us from computing an additional date to get * the ordinal day of the year - at least from 1967-2099 * Note: non-2038+ compliant systems (Solaris) will generate compiler * warnings on the post 2038-rollover years. */ static const int yearInSec_startYear = 1967; /* for x in $(seq 1967 2099) ; do * printf %s', ' $(date --date="Dec 31 ${x} UTC 23:59:59" +%s) * done |fold -w 70 -s */ static const time_t yearInSecs[] = { -63158401, -31536001, -1, 31535999, 63071999, 94694399, 126230399, 157766399, 189302399, 220924799, 252460799, 283996799, 315532799, 347155199, 378691199, 410227199, 441763199, 473385599, 504921599, 536457599, 567993599, 599615999, 631151999, 662687999, 694223999, 725846399, 757382399, 788918399, 820454399, 852076799, 883612799, 915148799, 946684799, 978307199, 1009843199, 1041379199, 1072915199, 1104537599, 1136073599, 1167609599, 1199145599, 1230767999, 1262303999, 1293839999, 1325375999, 1356998399, 1388534399, 1420070399, 1451606399, 1483228799, 1514764799, 1546300799, 1577836799, 1609459199, 1640995199, 1672531199, 1704067199, 1735689599, 1767225599, 1798761599, 1830297599, 1861919999, 1893455999, 1924991999, 1956527999, 1988150399, 2019686399, 2051222399, 2082758399, 2114380799, 2145916799, 2177452799, 2208988799, 2240611199, 2272147199, 2303683199, 2335219199, 2366841599, 2398377599, 2429913599, 2461449599, 2493071999, 2524607999, 2556143999, 2587679999, 2619302399, 2650838399, 2682374399, 2713910399, 2745532799, 2777068799, 2808604799, 2840140799, 2871763199, 2903299199, 2934835199, 2966371199, 2997993599, 3029529599, 3061065599, 3092601599, 3124223999, 3155759999, 3187295999, 3218831999, 3250454399, 3281990399, 3313526399, 3345062399, 3376684799, 3408220799, 3439756799, 3471292799, 3502915199, 3534451199, 3565987199, 3597523199, 3629145599, 3660681599, 3692217599, 3723753599, 3755375999, 3786911999, 3818447999, 3849983999, 3881606399, 3913142399, 3944678399, 3976214399, 4007836799, 4039372799, 4070908799, 4102444799}; /** * convert syslog timestamp to time_t * Note: it would be better to use something similar to mktime() here. * Unfortunately, mktime() semantics are problematic: first of all, it * works on local time, on the machine's time zone. In syslog, we have * to deal with multiple time zones at once, so we cannot plainly rely * on the local zone, and so we cannot rely on mktime(). One solution would * be to refactor all time-related functions so that they are all guarded * by a mutex to ensure TZ consistency (which would also enable us to * change the TZ at will for specific function calls). But that would * potentially mean a lot of overhead. * Also, mktime() has some side effects, at least setting of tzname. With * a refactoring as described above that should probably not be a problem, * but would also need more work. For some more thoughts on this topic, * have a look here: * http://stackoverflow.com/questions/18355101/is-standard-c-mktime-thread-safe-on-linux * In conclusion, we keep our own code for generating the unix timestamp. * rgerhards, 2016-03-02 (taken from rsyslog sources) */ static time_t syslogTime2time_t(const int year, const int month, const int day, const int hour, const int minute, const int second, const int OffsetHour, const int OffsetMinute, const char OffsetMode) { long MonthInDays, NumberOfYears, NumberOfDays; int utcOffset; time_t TimeInUnixFormat; if(year < 1970 || year > 2100) { TimeInUnixFormat = 0; goto done; } /* Counting how many Days have passed since the 01.01 of the * selected Year (Month level), according to the selected Month*/ switch(month) { case 1: MonthInDays = 0; //until 01 of January break; case 2: MonthInDays = 31; //until 01 of February - leap year handling down below! break; case 3: MonthInDays = 59; //until 01 of March break; case 4: MonthInDays = 90; //until 01 of April break; case 5: MonthInDays = 120; //until 01 of Mai break; case 6: MonthInDays = 151; //until 01 of June break; case 7: MonthInDays = 181; //until 01 of July break; case 8: MonthInDays = 212; //until 01 of August break; case 9: MonthInDays = 243; //until 01 of September break; case 10: MonthInDays = 273; //until 01 of Oktober break; case 11: MonthInDays = 304; //until 01 of November break; case 12: MonthInDays = 334; //until 01 of December break; default: /* this cannot happen (and would be a program error) * but we need the code to keep the compiler silent. */ MonthInDays = 0; /* any value fits ;) */ break; } /* adjust for leap years */ if((year % 100 != 0 && year % 4 == 0) || (year == 2000)) { if(month > 2) MonthInDays++; } /* 1) Counting how many Years have passed since 1970 2) Counting how many Days have passed since the 01.01 of the selected Year (Day level) according to the Selected Month and Day. Last day doesn't count, it should be until last day 3) Calculating this period (NumberOfDays) in seconds*/ NumberOfYears = year - yearInSec_startYear - 1; NumberOfDays = MonthInDays + day - 1; TimeInUnixFormat = (yearInSecs[NumberOfYears] + 1) + NumberOfDays * 86400; /*Add Hours, minutes and seconds */ TimeInUnixFormat += hour*60*60; TimeInUnixFormat += minute*60; TimeInUnixFormat += second; /* do UTC offset */ utcOffset = OffsetHour*3600 + OffsetMinute*60; if(OffsetMode == '+') utcOffset *= -1; /* if timestamp is ahead, we need to "go back" to UTC */ TimeInUnixFormat += utcOffset; done: return TimeInUnixFormat; } struct data_RFC5424Date { enum FMT_MODE fmt_mode; }; /** * Parse a TIMESTAMP as specified in RFC5424 (subset of RFC3339). */ PARSER_Parse(RFC5424Date) const unsigned char *pszTS; struct data_RFC5424Date *const data = (struct data_RFC5424Date*) pdata; /* variables to temporarily hold time information while we parse */ int year; int month; int day; int hour; /* 24 hour clock */ int minute; int second; int secfrac; /* fractional seconds (must be 32 bit!) */ int secfracPrecision; int OffsetHour; /* UTC offset in hours */ int OffsetMinute; /* UTC offset in minutes */ char OffsetMode; size_t len; size_t orglen; /* end variables to temporarily hold time information while we parse */ pszTS = (unsigned char*) npb->str + *offs; len = orglen = npb->strLen - *offs; year = hParseInt(&pszTS, &len); /* We take the liberty to accept slightly malformed timestamps e.g. in * the format of 2003-9-1T1:0:0. */ if(len == 0 || *pszTS++ != '-') goto done; --len; month = hParseInt(&pszTS, &len); if(month < 1 || month > 12) goto done; if(len == 0 || *pszTS++ != '-') goto done; --len; day = hParseInt(&pszTS, &len); if(day < 1 || day > 31) goto done; if(len == 0 || *pszTS++ != 'T') goto done; --len; hour = hParseInt(&pszTS, &len); if(hour < 0 || hour > 23) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; minute = hParseInt(&pszTS, &len); if(minute < 0 || minute > 59) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; second = hParseInt(&pszTS, &len); if(second < 0 || second > 60) goto done; /* Now let's see if we have secfrac */ if(len > 0 && *pszTS == '.') { --len; const unsigned char *pszStart = ++pszTS; secfrac = hParseInt(&pszTS, &len); secfracPrecision = (int) (pszTS - pszStart); } else { secfracPrecision = 0; secfrac = 0; } /* check the timezone */ if(len == 0) goto done; if(*pszTS == 'Z') { OffsetHour = 0; OffsetMinute = 0; OffsetMode = '+'; --len; pszTS++; /* eat Z */ } else if((*pszTS == '+') || (*pszTS == '-')) { OffsetMode = *pszTS; --len; pszTS++; OffsetHour = hParseInt(&pszTS, &len); if(OffsetHour < 0 || OffsetHour > 23) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; OffsetMinute = hParseInt(&pszTS, &len); if(OffsetMinute < 0 || OffsetMinute > 59) goto done; } else { /* there MUST be TZ information */ goto done; } if(len > 0) { if(*pszTS != ' ') /* if it is not a space, it can not be a "good" time */ goto done; } /* we had success, so update parse pointer */ *parsed = orglen - len; if(value != NULL) { if(data->fmt_mode == FMT_AS_STRING) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } else { int64_t timestamp = syslogTime2time_t(year, month, day, hour, minute, second, OffsetHour, OffsetMinute, OffsetMode); if(data->fmt_mode == FMT_AS_TIMESTAMP_UX_MS) { timestamp *= 1000; /* simulate pow(), do not use math lib! */ int div = 1; if(secfracPrecision == 1) { secfrac *= 100; } else if(secfracPrecision == 2) { secfrac *= 10; } else if(secfracPrecision > 3) { for(int i = 0 ; i < (secfracPrecision - 3) ; ++i) div *= 10; } timestamp += secfrac / div; } *value = json_object_new_int64(timestamp); } } r = 0; /* success */ done: return r; } PARSER_Construct(RFC5424Date) { int r = 0; struct data_RFC5424Date *data = (struct data_RFC5424Date*) calloc(1, sizeof(struct data_RFC5424Date)); data->fmt_mode = FMT_AS_STRING; if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "format")) { const char *fmtmode = json_object_get_string(val); if(!strcmp(fmtmode, "timestamp-unix")) { data->fmt_mode = FMT_AS_TIMESTAMP_UX; } else if(!strcmp(fmtmode, "timestamp-unix-ms")) { data->fmt_mode = FMT_AS_TIMESTAMP_UX_MS; } else if(!strcmp(fmtmode, "string")) { data->fmt_mode = FMT_AS_STRING; } else { ln_errprintf(ctx, 0, "invalid value for date-rfc5424:format %s", fmtmode); } } else { if(!(strcmp(key, "name") == 0 && strcmp(json_object_get_string(val), "-") == 0)) { ln_errprintf(ctx, 0, "invalid param for date-rfc5424 %s", key); } } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(RFC5424Date) { free(pdata); } struct data_RFC3164Date { enum FMT_MODE fmt_mode; }; /** * Parse a RFC3164 Date. */ PARSER_Parse(RFC3164Date) const unsigned char *p; size_t len, orglen; struct data_RFC3164Date *const data = (struct data_RFC3164Date*) pdata; /* variables to temporarily hold time information while we parse */ int year; int month; int day; int hour; /* 24 hour clock */ int minute; int second; p = (unsigned char*) npb->str + *offs; orglen = len = npb->strLen - *offs; /* If we look at the month (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec), * we may see the following character sequences occur: * * J(an/u(n/l)), Feb, Ma(r/y), A(pr/ug), Sep, Oct, Nov, Dec * * We will use this for parsing, as it probably is the * fastest way to parse it. */ if(len < 3) goto done; switch(*p++) { case 'j': case 'J': if(*p == 'a' || *p == 'A') { ++p; if(*p == 'n' || *p == 'N') { ++p; month = 1; } else goto done; } else if(*p == 'u' || *p == 'U') { ++p; if(*p == 'n' || *p == 'N') { ++p; month = 6; } else if(*p == 'l' || *p == 'L') { ++p; month = 7; } else goto done; } else goto done; break; case 'f': case 'F': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'b' || *p == 'B') { ++p; month = 2; } else goto done; } else goto done; break; case 'm': case 'M': if(*p == 'a' || *p == 'A') { ++p; if(*p == 'r' || *p == 'R') { ++p; month = 3; } else if(*p == 'y' || *p == 'Y') { ++p; month = 5; } else goto done; } else goto done; break; case 'a': case 'A': if(*p == 'p' || *p == 'P') { ++p; if(*p == 'r' || *p == 'R') { ++p; month = 4; } else goto done; } else if(*p == 'u' || *p == 'U') { ++p; if(*p == 'g' || *p == 'G') { ++p; month = 8; } else goto done; } else goto done; break; case 's': case 'S': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'p' || *p == 'P') { ++p; month = 9; } else goto done; } else goto done; break; case 'o': case 'O': if(*p == 'c' || *p == 'C') { ++p; if(*p == 't' || *p == 'T') { ++p; month = 10; } else goto done; } else goto done; break; case 'n': case 'N': if(*p == 'o' || *p == 'O') { ++p; if(*p == 'v' || *p == 'V') { ++p; month = 11; } else goto done; } else goto done; break; case 'd': case 'D': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'c' || *p == 'C') { ++p; month = 12; } else goto done; } else goto done; break; default: goto done; } len -= 3; /* done month */ if(len == 0 || *p++ != ' ') goto done; --len; /* we accept a slightly malformed timestamp with one-digit days. */ if(*p == ' ') { --len; ++p; } day = hParseInt(&p, &len); if(day < 1 || day > 31) goto done; if(len == 0 || *p++ != ' ') goto done; --len; /* time part */ hour = hParseInt(&p, &len); if(hour > 1970 && hour < 2100) { /* if so, we assume this actually is a year. This is a format found * e.g. in Cisco devices. * year = hour; */ /* re-query the hour, this time it must be valid */ if(len == 0 || *p++ != ' ') goto done; --len; hour = hParseInt(&p, &len); } if(hour < 0 || hour > 23) goto done; if(len == 0 || *p++ != ':') goto done; --len; minute = hParseInt(&p, &len); if(minute < 0 || minute > 59) goto done; if(len == 0 || *p++ != ':') goto done; --len; second = hParseInt(&p, &len); if(second < 0 || second > 60) goto done; /* we provide support for an extra ":" after the date. While this is an * invalid format, it occurs frequently enough (e.g. with Cisco devices) * to permit it as a valid case. -- rgerhards, 2008-09-12 */ if(len > 0 && *p == ':') { ++p; /* just skip past it */ --len; } /* we had success, so update parse pointer */ *parsed = orglen - len; if(value != NULL) { if(data->fmt_mode == FMT_AS_STRING) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } else { /* we assume year == current year, so let's obtain current year */ struct tm tm; const time_t curr = time(NULL); gmtime_r(&curr, &tm); year = tm.tm_year + 1900; int64_t timestamp = syslogTime2time_t(year, month, day, hour, minute, second, 0, 0, '+'); if(data->fmt_mode == FMT_AS_TIMESTAMP_UX_MS) { /* we do not have more precise info, just bring * into common format! */ timestamp *= 1000; } *value = json_object_new_int64(timestamp); } } r = 0; /* success */ done: return r; } PARSER_Construct(RFC3164Date) { int r = 0; struct data_RFC3164Date *data = (struct data_RFC3164Date*) calloc(1, sizeof(struct data_RFC3164Date)); data->fmt_mode = FMT_AS_STRING; if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "format")) { const char *fmtmode = json_object_get_string(val); if(!strcmp(fmtmode, "timestamp-unix")) { data->fmt_mode = FMT_AS_TIMESTAMP_UX; } else if(!strcmp(fmtmode, "timestamp-unix-ms")) { data->fmt_mode = FMT_AS_TIMESTAMP_UX_MS; } else if(!strcmp(fmtmode, "string")) { data->fmt_mode = FMT_AS_STRING; } else { ln_errprintf(ctx, 0, "invalid value for date-rfc3164:format %s", fmtmode); } } else { if(!(strcmp(key, "name") == 0 && strcmp(json_object_get_string(val), "-") == 0)) { ln_errprintf(ctx, 0, "invalid param for date-rfc3164 %s", key); } } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(RFC3164Date) { free(pdata); } struct data_Number { int64_t maxval; enum FMT_MODE fmt_mode; }; /** * Parse a Number. * Note that a number is an abstracted concept. We always represent it * as 64 bits (but may later change our mind if performance dictates so). */ PARSER_Parse(Number) const char *c; size_t i; int64_t val = 0; struct data_Number *const data = (struct data_Number*) pdata; enum FMT_MODE fmt_mode = FMT_AS_STRING; int64_t maxval = 0; if(data != NULL) { fmt_mode = data->fmt_mode; maxval = data->maxval; } assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; for (i = *offs; i < npb->strLen && myisdigit(c[i]); i++) val = val * 10 + c[i] - '0'; if(maxval > 0 && val > maxval) { LN_DBGPRINTF(npb->ctx, "number parser: val too large (max %" PRIu64 ", actual %" PRIu64 ")", maxval, val); goto done; } if (i == *offs) goto done; /* success, persist */ *parsed = i - *offs; if(value != NULL) { if(fmt_mode == FMT_AS_STRING) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } else { *value = json_object_new_int64(val); } } r = 0; /* success */ done: return r; } PARSER_Construct(Number) { int r = 0; struct data_Number *data = (struct data_Number*) calloc(1, sizeof(struct data_Number)); data->fmt_mode = FMT_AS_STRING; if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "maxval")) { errno = 0; data->maxval = json_object_get_int64(val); if(errno != 0) { ln_errprintf(ctx, errno, "param 'maxval' must be integer but is: %s", json_object_to_json_string(val)); } } else if(!strcmp(key, "format")) { const char *fmtmode = json_object_get_string(val); if(!strcmp(fmtmode, "number")) { data->fmt_mode = FMT_AS_NUMBER; } else if(!strcmp(fmtmode, "string")) { data->fmt_mode = FMT_AS_STRING; } else { ln_errprintf(ctx, 0, "invalid value for number:format %s", fmtmode); } } else { if(!(strcmp(key, "name") == 0 && strcmp(json_object_get_string(val), "-") == 0)) { ln_errprintf(ctx, 0, "invalid param for number: %s", key); } } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(Number) { free(pdata); } struct data_Float { enum FMT_MODE fmt_mode; }; /** * Parse a Real-number in floating-pt form. */ PARSER_Parse(Float) const char *c; size_t i; const struct data_Float *const data = (struct data_Float*) pdata; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; int isNeg = 0; double val = 0; int seen_point = 0; double frac = 10; i = *offs; if (c[i] == '-') { isNeg = 1; i++; } for (; i < npb->strLen; i++) { if (c[i] == '.') { if (seen_point != 0) break; seen_point = 1; } else if (myisdigit(c[i])) { if(seen_point) { val += (c[i] - '0') / frac; frac *= 10; } else { val = val * 10 + c[i] - '0'; } } else { break; } } if (i == *offs) goto done; if(isNeg) val *= -1; /* success, persist */ *parsed = i - *offs; if(value != NULL) { if(data->fmt_mode == FMT_AS_STRING) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } else { char *serialized = strndup(npb->str+(*offs), *parsed); *value = json_object_new_double_s(val, serialized); free(serialized); } } r = 0; /* success */ done: return r; } PARSER_Construct(Float) { int r = 0; struct data_Float *data = (struct data_Float*) calloc(1, sizeof(struct data_Float)); data->fmt_mode = FMT_AS_STRING; if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "format")) { const char *fmtmode = json_object_get_string(val); if(!strcmp(fmtmode, "number")) { data->fmt_mode = FMT_AS_NUMBER; } else if(!strcmp(fmtmode, "string")) { data->fmt_mode = FMT_AS_STRING; } else { ln_errprintf(ctx, 0, "invalid value for float:format %s", fmtmode); } } else { if(!(strcmp(key, "name") == 0 && strcmp(json_object_get_string(val), "-") == 0)) { ln_errprintf(ctx, 0, "invalid param for float: %s", key); } } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(Float) { free(pdata); } struct data_HexNumber { uint64_t maxval; enum FMT_MODE fmt_mode; }; /** * Parse a hex Number. * A hex number begins with 0x and contains only hex digits until the terminating * whitespace. Note that if a non-hex character is detected inside the number string, * this is NOT considered to be a number. */ PARSER_Parse(HexNumber) const char *c; size_t i = *offs; struct data_HexNumber *const data = (struct data_HexNumber*) pdata; uint64_t maxval = data->maxval; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; if(c[i] != '0' || c[i+1] != 'x') goto done; uint64_t val = 0; for (i += 2 ; i < npb->strLen && isxdigit(c[i]); i++) { const char digit = tolower(c[i]); val *= 16; if(digit >= 'a' && digit <= 'f') val += digit - 'a' + 10; else val += digit - '0'; } if (i == *offs || !isspace(c[i])) goto done; if(maxval > 0 && val > maxval) { LN_DBGPRINTF(npb->ctx, "hexnumber parser: val too large (max %" PRIu64 ", actual %" PRIu64 ")", maxval, val); goto done; } /* success, persist */ *parsed = i - *offs; if(value != NULL) { if(data->fmt_mode == FMT_AS_STRING) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } else { *value = json_object_new_int64((int64_t) val); } } r = 0; /* success */ done: return r; } PARSER_Construct(HexNumber) { int r = 0; struct data_HexNumber *data = (struct data_HexNumber*) calloc(1, sizeof(struct data_HexNumber)); data->fmt_mode = FMT_AS_STRING; if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "maxval")) { errno = 0; data->maxval = json_object_get_int64(val); if(errno != 0) { ln_errprintf(ctx, errno, "param 'maxval' must be integer but is: %s", json_object_to_json_string(val)); } } else if(!strcmp(key, "format")) { const char *fmtmode = json_object_get_string(val); if(!strcmp(fmtmode, "number")) { data->fmt_mode = FMT_AS_NUMBER; } else if(!strcmp(fmtmode, "string")) { data->fmt_mode = FMT_AS_STRING; } else { ln_errprintf(ctx, 0, "invalid value for hexnumber:format %s", fmtmode); } } else { if(!(strcmp(key, "name") == 0 && strcmp(json_object_get_string(val), "-") == 0)) { ln_errprintf(ctx, 0, "invalid param for hexnumber: %s", key); } } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(HexNumber) { free(pdata); } /** * Parse a kernel timestamp. * This is a fixed format, see * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/printk/printk.c?id=refs/tags/v4.0#n1011 * This is the code that generates it: * sprintf(buf, "[%5lu.%06lu] ", (unsigned long)ts, rem_nsec / 1000); * We accept up to 12 digits for ts, everything above that for sure is * no timestamp. */ #define LEN_KERNEL_TIMESTAMP 14 PARSER_Parse(KernelTimestamp) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(c[i] != '[' || i+LEN_KERNEL_TIMESTAMP > npb->strLen || !myisdigit(c[i+1]) || !myisdigit(c[i+2]) || !myisdigit(c[i+3]) || !myisdigit(c[i+4]) || !myisdigit(c[i+5]) ) goto done; i += 6; for(int j = 0 ; j < 7 && i < npb->strLen && myisdigit(c[i]) ; ) ++i, ++j; /* just scan */ if(i >= npb->strLen || c[i] != '.') goto done; ++i; /* skip over '.' */ if( i+7 > npb->strLen || !myisdigit(c[i+0]) || !myisdigit(c[i+1]) || !myisdigit(c[i+2]) || !myisdigit(c[i+3]) || !myisdigit(c[i+4]) || !myisdigit(c[i+5]) || c[i+6] != ']' ) goto done; i += 7; /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /** * Parse whitespace. * This parses all whitespace until the first non-whitespace character * is found. This is primarily a tool to skip to the next "word" if * the exact number of whitespace characters (and type of whitespace) * is not known. The current parsing position MUST be on a whitespace, * else the parser does not match. * This parser is also a forward-compatibility tool for the upcoming * slsa (simple log structure analyser) tool. */ PARSER_Parse(Whitespace) const char *c; size_t i = *offs; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; if(!isspace(c[i])) goto done; for (i++ ; i < npb->strLen && isspace(c[i]); i++); /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /** * Parse a word. * A word is a SP-delimited entity. The parser always works, except if * the offset is position on a space upon entry. */ PARSER_Parse(Word) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; /* search end of word */ while(i < npb->strLen && c[i] != ' ') i++; if(i == *offs) goto done; /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } struct data_StringTo { const char *toFind; size_t len; }; /** * Parse everything up to a specific string. * swisskid, 2015-01-21 */ PARSER_Parse(StringTo) const char *c; size_t i, j, m; int chkstr; struct data_StringTo *const data = (struct data_StringTo*) pdata; const char *const toFind = data->toFind; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; chkstr = 0; /* Total hunt for letter */ while(chkstr == 0 && i < npb->strLen ) { i++; if(c[i] == toFind[0]) { /* Found the first letter, now find the rest of the string */ j = 1; m = i+1; while(m < npb->strLen && j < data->len ) { if(c[m] != toFind[j]) break; if(j == data->len - 1) { /* full match? */ chkstr = 1; break; } j++; m++; } } } if(i == *offs || i == npb->strLen || chkstr != 1) goto done; /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } PARSER_Construct(StringTo) { int r = 0; struct data_StringTo *data = (struct data_StringTo*) calloc(1, sizeof(struct data_StringTo)); struct json_object *ed; if(json_object_object_get_ex(json, "extradata", &ed) == 0) { ln_errprintf(ctx, 0, "string-to type needs 'extradata' parameter"); r = LN_BADCONFIG ; goto done; } data->toFind = strdup(json_object_get_string(ed)); data->len = strlen(data->toFind); *pdata = data; done: if(r != 0) free(data); return r; } PARSER_Destruct(StringTo) { struct data_StringTo *data = (struct data_StringTo*) pdata; free((void*)data->toFind); free(pdata); } /** * Parse a alphabetic word. * A alpha word is composed of characters for which isalpha returns true. * The parser dones if there is no alpha character at all. */ PARSER_Parse(Alpha) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; /* search end of word */ while(i < npb->strLen && isalpha(c[i])) i++; if(i == *offs) { goto done; } /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } struct data_CharTo { char *term_chars; size_t n_term_chars; char *data_for_display; }; /** * Parse everything up to a specific character. * The character must be the only char inside extra data passed to the parser. * It is considered a format error if * a) the to-be-parsed buffer is already positioned on the terminator character * b) there is no terminator until the end of the buffer * In those cases, the parsers declares itself as not being successful, in all * other cases a string is extracted. */ PARSER_Parse(CharTo) size_t i; struct data_CharTo *const data = (struct data_CharTo*) pdata; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; /* search end of word */ int found = 0; while(i < npb->strLen && !found) { for(size_t j = 0 ; j < data->n_term_chars ; ++j) { if(npb->str[i] == data->term_chars[j]) { found = 1; break; } } if(!found) ++i; } if(i == *offs || i == npb->strLen || !found) goto done; /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; done: return r; } PARSER_Construct(CharTo) { int r = 0; LN_DBGPRINTF(ctx, "in parser_construct charTo"); struct data_CharTo *data = (struct data_CharTo*) calloc(1, sizeof(struct data_CharTo)); struct json_object *ed; if(json_object_object_get_ex(json, "extradata", &ed) == 0) { ln_errprintf(ctx, 0, "char-to type needs 'extradata' parameter"); r = LN_BADCONFIG ; goto done; } data->term_chars = strdup(json_object_get_string(ed)); data->n_term_chars = strlen(data->term_chars); *pdata = data; done: if(r != 0) free(data); return r; } PARSER_DataForDisplay(CharTo) { struct data_CharTo *data = (struct data_CharTo*) pdata; if(data->data_for_display == NULL) { data->data_for_display = malloc(8+data->n_term_chars+2); if(data->data_for_display != NULL) { memcpy(data->data_for_display, "char-to{", 8); size_t i, j; for(j = 0, i = 8 ; j < data->n_term_chars ; ++j, ++i) { data->data_for_display[i] = data->term_chars[j]; } data->data_for_display[i++] = '}'; data->data_for_display[i] = '\0'; } } return (data->data_for_display == NULL ) ? "malloc error" : data->data_for_display; } PARSER_Destruct(CharTo) { struct data_CharTo *const data = (struct data_CharTo*) pdata; free(data->data_for_display); free(data->term_chars); free(pdata); } struct data_Literal { const char *lit; const char *json_conf; }; /** * Parse a specific literal. */ PARSER_Parse(Literal) struct data_Literal *const data = (struct data_Literal*) pdata; const char *const lit = data->lit; size_t i = *offs; size_t j; for(j = 0 ; i < npb->strLen ; ++j) { if(lit[j] != npb->str[i]) break; ++i; } *parsed = j; /* we must always return how far we parsed! */ if(lit[j] == '\0') { if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; } return r; } PARSER_DataForDisplay(Literal) { struct data_Literal *data = (struct data_Literal*) pdata; return data->lit; } PARSER_JsonConf(Literal) { struct data_Literal *data = (struct data_Literal*) pdata; return data->json_conf; } PARSER_Construct(Literal) { int r = 0; struct data_Literal *data = (struct data_Literal*) calloc(1, sizeof(struct data_Literal)); struct json_object *text; if(json_object_object_get_ex(json, "text", &text) == 0) { ln_errprintf(ctx, 0, "literal type needs 'text' parameter"); r = LN_BADCONFIG ; goto done; } data->lit = strdup(json_object_get_string(text)); data->json_conf = strdup(json_object_to_json_string(json)); *pdata = data; done: if(r != 0) free(data); return r; } PARSER_Destruct(Literal) { struct data_Literal *data = (struct data_Literal*) pdata; free((void*)data->lit); free((void*)data->json_conf); free(pdata); } /* for path compaction, we need a special handler to combine two * literal data elements. */ int ln_combineData_Literal(void *const porg, void *const padd) { struct data_Literal *const __restrict__ org = porg; struct data_Literal *const __restrict__ add = padd; int r = 0; const size_t len = strlen(org->lit); const size_t add_len = strlen(add->lit); char *const newlit = (char*)realloc((void*)org->lit, len+add_len+1); CHKN(newlit); org->lit = newlit; memcpy((char*)org->lit+len, add->lit, add_len+1); done: return r; } struct data_CharSeparated { char *term_chars; size_t n_term_chars; }; /** * Parse everything up to a specific character, or up to the end of string. * The character must be the only char inside extra data passed to the parser. * This parser always returns success. * By nature of the parser, it is required that end of string or the separator * follows this field in rule. */ PARSER_Parse(CharSeparated) struct data_CharSeparated *const data = (struct data_CharSeparated*) pdata; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; /* search end of word */ int found = 0; while(i < npb->strLen && !found) { for(size_t j = 0 ; j < data->n_term_chars ; ++j) { if(npb->str[i] == data->term_chars[j]) { found = 1; break; } } if(!found) ++i; } /* success, persist */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ return r; } PARSER_Construct(CharSeparated) { int r = 0; struct data_CharSeparated *data = (struct data_CharSeparated*) calloc(1, sizeof(struct data_CharSeparated)); struct json_object *ed; if(json_object_object_get_ex(json, "extradata", &ed) == 0) { ln_errprintf(ctx, 0, "char-separated type needs 'extradata' parameter"); r = LN_BADCONFIG ; goto done; } data->term_chars = strdup(json_object_get_string(ed)); data->n_term_chars = strlen(data->term_chars); *pdata = data; done: if(r != 0) free(data); return r; } PARSER_Destruct(CharSeparated) { struct data_CharSeparated *const data = (struct data_CharSeparated*) pdata; free(data->term_chars); free(pdata); } /** * Just get everything till the end of string. */ PARSER_Parse(Rest) assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); /* silence the warning about unused variable */ (void)npb->str; /* success, persist */ *parsed = npb->strLen - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; return r; } struct data_OpQuotedString { bool escape; }; /** * Parse a possibly quoted string. In this initial implementation, escaping of the quote * char is not supported. A quoted string is one start starts with a double quote, * has some text (not containing double quotes) and ends with the first double * quote character seen. The extracted string does NOT include the quote characters. * swisskid, 2015-01-21 */ PARSER_Parse(OpQuotedString) const char *c; size_t i; char *cstr = NULL; struct data_OpQuotedString *const data = (struct data_OpQuotedString*) pdata; bool escape = false; if(data != NULL) { escape = data->escape; } assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(i == npb->strLen) goto done; if(c[i] != '"') { while(i < npb->strLen && c[i] != ' ') i++; if(i == *offs) goto done; /* success, persist */ *parsed = i - *offs; /* create JSON value to save quoted string contents */ CHKN(cstr = strndup((char*)c + *offs, *parsed)); } else { ++i; /* search end of string */ if (escape) { /* * Fix by HSoszynski & KGuillemot to handle escaped quote & backslash infinitely * Continue while we don't encounter the ending quote, and while it's not escaped * a" => end * a\" => continue * a\\" => end * a\\\" => continue * ... */ int continuous_backslash = 0; while(i < npb->strLen && (npb->str[i] != '"' || (continuous_backslash & 1) == 1 )) { if ( npb->str[i] == '\\' ) { continuous_backslash++; } else { continuous_backslash = 0; } ++i; } if(i == npb->strLen || c[i] != '"') goto done; size_t end = i; i = *offs + 1; // Eat starting quote /* Once we have identified the correct end, unescape escaped characters */ CHKN(cstr = malloc(end - i + 1)); int cpt_dst = 0; while(i < end) { if( (npb->str[i] == '\\') && ((npb->str[i+1] == '\\') || (npb->str[i+1] == '"')) ) { i++; } *(cstr+(cpt_dst++)) = *(npb->str+(i++)); } cstr[cpt_dst] = '\0'; /* success, persist */ *parsed = i + 1 - *offs; /* "eat" terminal double quote */ } else { // old behaviour /* search end of string */ while(i < npb->strLen && c[i] != '"') i++; if(i == npb->strLen || c[i] != '"') goto done; /* success, persist */ *parsed = i + 1 - *offs; /* "eat" terminal double quote */ /* create JSON value to save quoted string contents */ CHKN(cstr = strndup((char*)c + *offs + 1, *parsed - 2)); } } if (value != NULL) { CHKN(*value = json_object_new_string(cstr)); } r = 0; /* success */ done: free(cstr); return r; } PARSER_Construct(OpQuotedString) { int r = 0; LN_DBGPRINTF(ctx, "in parser_construct OpQuotedString"); struct data_OpQuotedString *data = (struct data_OpQuotedString*) calloc(1, sizeof(struct data_OpQuotedString)); struct json_object *obj; json_bool bool_obj; CHKN(data); if(json_object_object_get_ex(json, "escape", &obj) != 0) { LN_DBGPRINTF(ctx, "found 'escape' in fields"); if(json_object_is_type(obj, json_type_boolean) == 1) { bool_obj = json_object_get_boolean(obj); data->escape = (bool)bool_obj; } else { ln_errprintf(ctx, 0, "op-quoted-string's 'escape' field should be boolean"); r = LN_BADCONFIG; goto done; } } *pdata = data; done: if(r != 0) free(data); return r; } PARSER_Destruct(OpQuotedString) { free(pdata); } /** * Parse a quoted string. In this initial implementation, escaping of the quote * char is not supported. A quoted string is one start starts with a double quote, * has some text (not containing double quotes) and ends with the first double * quote character seen. The extracted string does NOT include the quote characters. * rgerhards, 2011-01-14 */ PARSER_Parse(QuotedString) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(i + 2 > npb->strLen) goto done; /* needs at least 2 characters */ if(c[i] != '"') goto done; ++i; /* search end of string */ while(i < npb->strLen && c[i] != '"') i++; if(i == npb->strLen || c[i] != '"') goto done; /* success, persist */ *parsed = i + 1 - *offs; /* "eat" terminal double quote */ /* create JSON value to save quoted string contents */ if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs) + 1, *parsed - 2); } r = 0; /* success */ done: return r; } /** * Parse an ISO date, that is YYYY-MM-DD (exactly this format). * Note: we do manual loop unrolling -- this is fast AND efficient. * rgerhards, 2011-01-14 */ PARSER_Parse(ISODate) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(*offs+10 > npb->strLen) goto done; /* if it is not 10 chars, it can't be an ISO date */ /* year */ if(!myisdigit(c[i])) goto done; if(!myisdigit(c[i+1])) goto done; if(!myisdigit(c[i+2])) goto done; if(!myisdigit(c[i+3])) goto done; if(c[i+4] != '-') goto done; /* month */ if(c[i+5] == '0') { if(c[i+6] < '1' || c[i+6] > '9') goto done; } else if(c[i+5] == '1') { if(c[i+6] < '0' || c[i+6] > '2') goto done; } else { goto done; } if(c[i+7] != '-') goto done; /* day */ if(c[i+8] == '0') { if(c[i+9] < '1' || c[i+9] > '9') goto done; } else if(c[i+8] == '1' || c[i+8] == '2') { if(!myisdigit(c[i+9])) goto done; } else if(c[i+8] == '3') { if(c[i+9] != '0' && c[i+9] != '1') goto done; } else { goto done; } /* success, persist */ *parsed = 10; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /** * Parse a Cisco interface spec. Sample for such a spec are: * outside:192.168.52.102/50349 * inside:192.168.1.15/56543 (192.168.1.112/54543) * outside:192.168.1.13/50179 (192.168.1.13/50179)(LOCAL\some.user) * outside:192.168.1.25/41850(LOCAL\RG-867G8-DEL88D879BBFFC8) * inside:192.168.1.25/53 (192.168.1.25/53) (some.user) * 192.168.1.15/0(LOCAL\RG-867G8-DEL88D879BBFFC8) * From this, we conclude the format is: * [interface:]ip/port [SP (ip2/port2)] [[SP](username)] * In order to match, this syntax must start on a non-whitespace char * other than colon. */ PARSER_Parse(CiscoInterfaceSpec) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(c[i] == ':' || isspace(c[i])) goto done; /* first, check if we have an interface. We do this by trying * to detect if we have an IP. If we have, obviously no interface * is present. Otherwise, we check if we have a valid interface. */ int bHaveInterface = 0; size_t idxInterface = 0; size_t lenInterface = 0; int bHaveIP = 0; size_t lenIP; size_t idxIP = i; if(ln_v2_parseIPv4(npb, &i, NULL, parser_name, &lenIP, NULL) == 0) { bHaveIP = 1; i += lenIP - 1; /* position on delimiter */ } else { idxInterface = i; while(i < npb->strLen) { if(isspace(c[i])) goto done; if(c[i] == ':') break; ++i; } lenInterface = i - idxInterface; bHaveInterface = 1; } if(i == npb->strLen) goto done; ++i; /* skip over colon */ /* we now utilize our other parser helpers */ if(!bHaveIP) { idxIP = i; if(ln_v2_parseIPv4(npb, &i, NULL, parser_name, &lenIP, NULL) != 0) goto done; i += lenIP; } if(i == npb->strLen || c[i] != '/') goto done; ++i; /* skip slash */ const size_t idxPort = i; size_t lenPort; if(ln_v2_parseNumber(npb, &i, NULL, parser_name, &lenPort, NULL) != 0) goto done; i += lenPort; /* check if optional second ip/port is present * We assume we must at least have 5 chars [" (::1)"] */ int bHaveIP2 = 0; size_t idxIP2 = 0, lenIP2 = 0; size_t idxPort2 = 0, lenPort2 = 0; if(i+5 < npb->strLen && c[i] == ' ' && c[i+1] == '(') { size_t iTmp = i+2; /* skip over " (" */ idxIP2 = iTmp; if(ln_v2_parseIPv4(npb, &iTmp, NULL, parser_name, &lenIP2, NULL) == 0) { iTmp += lenIP2; if(i < npb->strLen || c[iTmp] == '/') { ++iTmp; /* skip slash */ idxPort2 = iTmp; if(ln_v2_parseNumber(npb, &iTmp, NULL, parser_name, &lenPort2, NULL) == 0) { iTmp += lenPort2; if(iTmp < npb->strLen && c[iTmp] == ')') { i = iTmp + 1; /* match, so use new index */ bHaveIP2 = 1; } } } } } /* check if optional username is present * We assume we must at least have 3 chars ["(n)"] */ int bHaveUser = 0; size_t idxUser = 0; size_t lenUser = 0; if( (i+2 < npb->strLen && c[i] == '(' && !isspace(c[i+1]) ) || (i+3 < npb->strLen && c[i] == ' ' && c[i+1] == '(' && !isspace(c[i+2])) ) { idxUser = i + ((c[i] == ' ') ? 2 : 1); /* skip [SP]'(' */ size_t iTmp = idxUser; while(iTmp < npb->strLen && !isspace(c[iTmp]) && c[iTmp] != ')') ++iTmp; /* just scan */ if(iTmp < npb->strLen && c[iTmp] == ')') { i = iTmp + 1; /* we have a match, so use new index */ bHaveUser = 1; lenUser = iTmp - idxUser; } } /* all done, save data */ if(value == NULL) goto success; CHKN(*value = json_object_new_object()); json_object *json; if(bHaveInterface) { CHKN(json = json_object_new_string_len(c+idxInterface, lenInterface)); json_object_object_add_ex(*value, "interface", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } CHKN(json = json_object_new_string_len(c+idxIP, lenIP)); json_object_object_add_ex(*value, "ip", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); CHKN(json = json_object_new_string_len(c+idxPort, lenPort)); json_object_object_add_ex(*value, "port", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); if(bHaveIP2) { CHKN(json = json_object_new_string_len(c+idxIP2, lenIP2)); json_object_object_add_ex(*value, "ip2", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); CHKN(json = json_object_new_string_len(c+idxPort2, lenPort2)); json_object_object_add_ex(*value, "port2", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } if(bHaveUser) { CHKN(json = json_object_new_string_len(c+idxUser, lenUser)); json_object_object_add_ex(*value, "user", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } success: /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; /* to be on the save side */ } return r; } /** * Parse a duration. A duration is similar to a timestamp, except that * it tells about time elapsed. As such, hours can be larger than 23 * and hours may also be specified by a single digit (this, for example, * is commonly done in Cisco software). * Note: we do manual loop unrolling -- this is fast AND efficient. */ PARSER_Parse(Duration) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; /* hour is a bit tricky */ if(!myisdigit(c[i])) goto done; ++i; if(myisdigit(c[i])) ++i; if(c[i] == ':') ++i; else goto done; if(i+5 > npb->strLen) goto done;/* if it is not 5 chars from here, it can't be us */ if(c[i] < '0' || c[i] > '5') goto done; if(!myisdigit(c[i+1])) goto done; if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!myisdigit(c[i+4])) goto done; /* success, persist */ *parsed = (i + 5) - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /** * Parse a timestamp in 24hr format (exactly HH:MM:SS). * Note: we do manual loop unrolling -- this is fast AND efficient. * rgerhards, 2011-01-14 */ PARSER_Parse(Time24hr) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(*offs+8 > npb->strLen) goto done; /* if it is not 8 chars, it can't be us */ /* hour */ if(c[i] == '0' || c[i] == '1') { if(!myisdigit(c[i+1])) goto done; } else if(c[i] == '2') { if(c[i+1] < '0' || c[i+1] > '3') goto done; } else { goto done; } /* TODO: the code below is a duplicate of 24hr parser - create common function */ if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!myisdigit(c[i+4])) goto done; if(c[i+5] != ':') goto done; if(c[i+6] < '0' || c[i+6] > '5') goto done; if(!myisdigit(c[i+7])) goto done; /* success, persist */ *parsed = 8; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /** * Parse a timestamp in 12hr format (exactly HH:MM:SS). * Note: we do manual loop unrolling -- this is fast AND efficient. * TODO: the code below is a duplicate of 24hr parser - create common function? * rgerhards, 2011-01-14 */ PARSER_Parse(Time12hr) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); c = npb->str; i = *offs; if(*offs+8 > npb->strLen) goto done; /* if it is not 8 chars, it can't be us */ /* hour */ if(c[i] == '0') { if(!myisdigit(c[i+1])) goto done; } else if(c[i] == '1') { if(c[i+1] < '0' || c[i+1] > '2') goto done; } else { goto done; } if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!myisdigit(c[i+4])) goto done; if(c[i+5] != ':') goto done; if(c[i+6] < '0' || c[i+6] > '5') goto done; if(!myisdigit(c[i+7])) goto done; /* success, persist */ *parsed = 8; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /* helper to IPv4 address parser, checks the next set of numbers. * Syntax 1 to 3 digits, value together not larger than 255. * @param[in] npb->str parse buffer * @param[in/out] offs offset into buffer, updated if successful * @return 0 if OK, 1 otherwise */ static int chkIPv4AddrByte(npb_t *const npb, size_t *offs) { int val = 0; int r = 1; /* default: done -- simplifies things */ const char *c; size_t i = *offs; c = npb->str; if(i == npb->strLen || !myisdigit(c[i])) goto done; val = c[i++] - '0'; if(i < npb->strLen && myisdigit(c[i])) { val = val * 10 + c[i++] - '0'; if(i < npb->strLen && myisdigit(c[i])) val = val * 10 + c[i++] - '0'; } if(val > 255) /* cannot be a valid IP address byte! */ goto done; *offs = i; r = 0; done: return r; } /** * Parser for IPv4 addresses. */ PARSER_Parse(IPv4) const char *c; size_t i; assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; if(i + 7 > npb->strLen) { /* IPv4 addr requires at least 7 characters */ goto done; } c = npb->str; /* byte 1*/ if(chkIPv4AddrByte(npb, &i) != 0) goto done; if(i == npb->strLen || c[i++] != '.') goto done; /* byte 2*/ if(chkIPv4AddrByte(npb, &i) != 0) goto done; if(i == npb->strLen || c[i++] != '.') goto done; /* byte 3*/ if(chkIPv4AddrByte(npb, &i) != 0) goto done; if(i == npb->strLen || c[i++] != '.') goto done; /* byte 4 - we do NOT need any char behind it! */ if(chkIPv4AddrByte(npb, &i) != 0) goto done; /* if we reach this point, we found a valid IP address */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /* skip past the IPv6 address block, parse pointer is set to * first char after the block. Returns an error if already at end * of string. * @param[in] npb->str parse buffer * @param[in/out] offs offset into buffer, updated if successful * @return 0 if OK, 1 otherwise */ static int skipIPv6AddrBlock(npb_t *const npb, size_t *const __restrict__ offs) { int j; if(*offs == npb->strLen) return 1; for(j = 0 ; j < 4 && *offs+j < npb->strLen && isxdigit(npb->str[*offs+j]) ; ++j) /*just skip*/ ; *offs += j; return 0; } /** * Parser for IPv6 addresses. * Bases on RFC4291 Section 2.2. The address must be followed * by whitespace or end-of-string, else it is not considered * a valid address. This prevents false positives. */ PARSER_Parse(IPv6) const char *c; size_t i; size_t beginBlock; /* last block begin in case we need IPv4 parsing */ int hasIPv4 = 0; int nBlocks = 0; /* how many blocks did we already have? */ int bHad0Abbrev = 0; /* :: already used? */ assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; if(i + 2 > npb->strLen) { /* IPv6 addr requires at least 2 characters ("::") */ goto done; } c = npb->str; /* check that first block is non-empty */ if(! ( isxdigit(c[i]) || (c[i] == ':' && c[i+1] == ':') ) ) goto done; /* try for all potential blocks plus one more (so we see errors!) */ for(int j = 0 ; j < 9 ; ++j) { beginBlock = i; if(skipIPv6AddrBlock(npb, &i) != 0) goto done; nBlocks++; if(i == npb->strLen) goto chk_ok; /* no more valid chars, check address */ if(c[i] != ':' && c[i] != '.') goto chk_ok; if(c[i] == '.'){ /* IPv4 processing! */ hasIPv4 = 1; break; } /* maximum blocks consumed and not ipv4, check if valid */ if (nBlocks == 8) goto chk_ok; if(c[i] != ':') goto done; i++; /* "eat" ':' */ if(i == npb->strLen) goto chk_ok; /* check for :: */ if(bHad0Abbrev) { if(c[i] == ':') goto done; } else { if(c[i] == ':') { bHad0Abbrev = 1; ++i; if(i == npb->strLen) goto chk_ok; } } } if(hasIPv4) { size_t ipv4_parsed; --nBlocks; /* prevent pure IPv4 address to be recognized */ if(beginBlock == *offs) goto done; i = beginBlock; if(ln_v2_parseIPv4(npb, &i, NULL, parser_name, &ipv4_parsed, NULL) != 0) goto done; i += ipv4_parsed; } chk_ok: /* we are finished parsing, check if things are ok */ if(nBlocks > 8) goto done; if(bHad0Abbrev && nBlocks >= 8) goto done; /* now check if trailing block is missing. Note that i is already * on next character, so we need to go two back. Two are always * present, else we would not reach this code here. */ if(c[i-1] == ':' && c[i-2] != ':') goto done; /* if we reach this point, we found a valid IP address */ *parsed = i - *offs; if(value != NULL) { *value = json_object_new_string_len(npb->str+(*offs), *parsed); } r = 0; /* success */ done: return r; } /* check if a char is valid inside a name of the iptables motif. * We try to keep the set as slim as possible, because the iptables * parser may otherwise create a very broad match (especially the * inclusion of simple words like "DF" cause grief here). * Note: we have taken the permitted set from iptables log samples. * Report bugs if we missed some additional rules. */ static inline int isValidIPTablesNameChar(const char c) { /* right now, upper case only is valid */ return ('A' <= c && c <= 'Z') ? 1 : 0; } /* helper to iptables parser, parses out a a single name=value pair */ static int parseIPTablesNameValue(npb_t *const npb, size_t *const __restrict__ offs, struct json_object *const __restrict__ valroot) { int r = LN_WRONGPARSER; size_t i = *offs; char *name = NULL; const size_t iName = i; while(i < npb->strLen && isValidIPTablesNameChar(npb->str[i])) ++i; if(i == iName || (i < npb->strLen && npb->str[i] != '=' && npb->str[i] != ' ')) goto done; /* no name at all! */ const ssize_t lenName = i - iName; ssize_t iVal = -1; size_t lenVal = i - iVal; if(i < npb->strLen && npb->str[i] != ' ') { /* we have a real value (not just a flag name like "DF") */ ++i; /* skip '=' */ iVal = i; while(i < npb->strLen && !isspace(npb->str[i])) ++i; lenVal = i - iVal; } /* parsing OK */ *offs = i; r = 0; if(valroot == NULL) goto done; CHKN(name = malloc(lenName+1)); memcpy(name, npb->str+iName, lenName); name[lenName] = '\0'; json_object *json; if(iVal == -1) { json = NULL; } else { CHKN(json = json_object_new_string_len(npb->str+iVal, lenVal)); } json_object_object_add(valroot, name, json); done: free(name); return r; } /** * Parser for iptables logs (the structured part). * This parser is named "v2-iptables" because of a traditional * parser named "iptables", which we do not want to replace, at * least right now (we may re-think this before the first release). * For performance reasons, this works in two stages. In the first * stage, we only detect if the motif is correct. The second stage is * only called when we know it is. In it, we go once again over the * message again and actually extract the data. This is done because * data extraction is relatively expensive and in most cases we will * have much more frequent mismatches than matches. * Note that this motif must have at least one field, otherwise it * could detect things that are not iptables to be it. Further limits * may be imposed in the future as we see additional need. * added 2015-04-30 rgerhards */ PARSER_Parse(v2IPTables) size_t i = *offs; int nfields = 0; /* stage one */ while(i < npb->strLen) { CHKR(parseIPTablesNameValue(npb, &i, NULL)); ++nfields; /* exactly one SP is permitted between fields */ if(i < npb->strLen && npb->str[i] == ' ') ++i; } if(nfields < 2) { FAIL(LN_WRONGPARSER); } /* success, persist */ *parsed = i - *offs; r = 0; /* stage two */ if(value == NULL) goto done; i = *offs; CHKN(*value = json_object_new_object()); while(i < npb->strLen) { CHKR(parseIPTablesNameValue(npb, &i, *value)); while(i < npb->strLen && isspace(npb->str[i])) ++i; } done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; } return r; } /* * Build a pruned copy of a json object with empty strings, arrays, and * objects removed recursively. This avoids relying on newer libfastjson * array deletion APIs that are not available in all supported CI images. * * return 0 if a pruned value is returned in *result * return 1 if the value becomes empty and should be skipped * return < 0 on error */ struct prune_repl_s { char *key; struct json_object *val; }; static int jsonPruneEmpty(struct json_object *__restrict__ json, struct json_object **result); /** * Free temporary state collected while pruning a JSON object in place. * * @param[in] delkeys list of keys scheduled for deletion * @param[in] ndel number of entries in delkeys * @param[in] repl list of replacement values scheduled for insertion * @param[in] nrepl number of entries in repl */ static void jsonPruneObjectCleanup(char **delkeys, const size_t ndel, struct prune_repl_s *repl, const size_t nrepl) { for(size_t i = 0 ; i < ndel ; ++i) free(delkeys[i]); free(delkeys); for(size_t i = 0 ; i < nrepl ; ++i) { free(repl[i].key); json_object_put(repl[i].val); } free(repl); } /** * Prune a scalar JSON value. * * Empty strings are treated as removable. All other scalar values are kept * by returning an additional reference to the original value. * * @param[in] json scalar JSON value to inspect * @param[out] result retained value when the scalar is kept * @returns 0 if result contains a kept value, 1 if the scalar is empty, * or a negative liblognorm error code on failure */ static int jsonPruneScalar(struct json_object *json, struct json_object **result) { if(json_object_get_type(json) == json_type_string && json_object_get_string_len(json) == 0) { return 1; } json_object_get(json); *result = json; return 0; } /** * Build a pruned copy of a JSON array. * * Array entries that become empty are dropped. Non-empty entries are appended * to a new array, which becomes the result. * * @param[in] json source array to prune * @param[out] result pruned array when non-empty * @returns 0 if result contains a kept array, 1 if the array becomes empty, * or a negative liblognorm error code on failure */ static int jsonPruneArray(struct json_object *json, struct json_object **result) { int rc; struct json_object *out = json_object_new_array(); if(out == NULL) return LN_NOMEM; for(int i = 0 ; i < json_object_array_length(json) ; ++i) { struct json_object *elem = NULL; rc = jsonPruneEmpty(json_object_array_get_idx(json, i), &elem); if(rc < 0) { json_object_put(out); return rc; } if(rc == 0) json_object_array_add(out, elem); } if(json_object_array_length(out) == 0) { json_object_put(out); return 1; } *result = out; return 0; } /** * Prune a JSON object in place while preserving compatibility with older * libfastjson APIs. * * The helper first records keys to delete and values to replace, then applies * those updates after iteration has completed so the object iterator remains * valid. * * @param[in] json source object to prune * @param[out] result retained object when non-empty * @returns 0 if result contains a kept object, 1 if the object becomes empty, * or a negative liblognorm error code on failure */ static int jsonPruneObject(struct json_object *json, struct json_object **result) { int rc = 0; struct json_object *val = NULL; char **delkeys = NULL; size_t ndel = 0; struct prune_repl_s *repl = NULL; size_t nrepl = 0; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { struct json_object *child = NULL; const char *name = json_object_iter_peek_name(&it); val = json_object_iter_peek_value(&it); rc = jsonPruneEmpty(val, &child); if(rc < 0) goto done; if(rc > 0) { char **newdel = realloc(delkeys, sizeof(char*) * (ndel + 1)); if(newdel == NULL) { rc = LN_NOMEM; goto done; } delkeys = newdel; delkeys[ndel] = strdup(name); if(delkeys[ndel] == NULL) { rc = LN_NOMEM; goto done; } ++ndel; } else if(child != val) { struct prune_repl_s *newrepl = realloc(repl, sizeof(struct prune_repl_s) * (nrepl + 1)); if(newrepl == NULL) { json_object_put(child); rc = LN_NOMEM; goto done; } repl = newrepl; repl[nrepl].key = strdup(name); if(repl[nrepl].key == NULL) { json_object_put(child); rc = LN_NOMEM; goto done; } repl[nrepl].val = child; ++nrepl; } else { json_object_put(child); } json_object_iter_next(&it); } for(size_t i = 0 ; i < ndel ; ++i) { json_object_object_del(json, delkeys[i]); free(delkeys[i]); } free(delkeys); delkeys = NULL; ndel = 0; for(size_t i = 0 ; i < nrepl ; ++i) { json_object_object_del(json, repl[i].key); json_object_object_add(json, repl[i].key, repl[i].val); free(repl[i].key); } free(repl); repl = NULL; nrepl = 0; if(json_object_object_length(json) == 0) { rc = 1; goto done; } json_object_get(json); *result = json; rc = 0; done: jsonPruneObjectCleanup(delkeys, ndel, repl, nrepl); return rc; } static int jsonPruneEmpty(struct json_object *__restrict__ json, struct json_object **result) { *result = NULL; if(json == NULL) return 1; switch (json_object_get_type(json)) { case json_type_string: return jsonPruneScalar(json, result); case json_type_array: return jsonPruneArray(json, result); case json_type_object: return jsonPruneObject(json, result); case json_type_null: case json_type_boolean: case json_type_double: case json_type_int: default: return jsonPruneScalar(json, result); } } /* * Parameters for field type json * skipempty - skips empty json objects. * - %field_name:json:skipempty% */ struct data_JSON { int skipempty; }; /** * Parse JSON. This parser tries to find JSON data inside a message. * If it finds valid JSON, it will extract it. Extra data after the * JSON is permitted. * Note: the json-c JSON parser treats whitespace after the actual * json to be part of the json. So in essence, any whitespace is * processed by this parser. We use the same semantics to keep things * neatly in sync. If json-c changes for some reason or we switch to * an alternate json lib, we probably need to be sure to keep that * behaviour, and probably emulate it. * added 2015-04-28 by rgerhards, v1.1.2 */ PARSER_Parse(JSON) const size_t i = *offs; struct json_tokener *tokener = NULL; struct data_JSON *const data = (struct data_JSON*) pdata; if(i == npb->strLen) goto done; if(npb->str[i] != '{' && npb->str[i] != '[') { /* this can't be json, see RFC4627, Sect. 2 * see this bug in json-c: * https://github.com/json-c/json-c/issues/181 * In any case, it's better to do this quick check, * even if json-c did not have the bug because this * check here is much faster than calling the parser. */ goto done; } if((tokener = json_tokener_new()) == NULL) goto done; struct json_object *json = json_tokener_parse_ex(tokener, npb->str+i, (int) (npb->strLen - i)); if(json == NULL) goto done; /* success, persist */ *parsed = (i + tokener->char_offset) - *offs; r = 0; /* success */ if(value == NULL) { json_object_put(json); } else { if (data && data->skipempty) { struct json_object *pruned = NULL; int rc = jsonPruneEmpty(json, &pruned); json_object_put(json); if (rc < 0) { FAIL(LN_WRONGPARSER); } else if (rc > 0) { /* * json value is empty. * E.g., {"message":""}, {"message":[]}, {"message":{}} */ FAIL(0); } json = pruned; } *value = json; } done: if(tokener != NULL) json_tokener_free(tokener); return r; } PARSER_Construct(JSON) { int r = 0; struct json_object *ed; struct data_JSON *data = NULL; const char *flag; if(json == NULL) goto done; if(json_object_object_get_ex(json, "extradata", &ed) == 0) { /* No JSON parameter */ goto done; } data = (struct data_JSON*) calloc(1, sizeof(struct data_JSON)); flag = json_object_get_string(ed); if (strcasecmp(flag, "skipempty") == 0) { data->skipempty = 1; } else { ln_errprintf(ctx, 0, "invalid flag for JSON parser: %s", flag); r = LN_BADCONFIG; goto done; } *pdata = data; done: if(r != 0) { free(data); } return r; } PARSER_Destruct(JSON) { free(pdata); } /* check if a char is valid inside a name of a NameValue list * The set of valid characters may be extended if there is good * need to do so. We have selected the current set carefully, but * may have overlooked some cases. */ static inline int isValidNameChar(const char c) { return (isalnum(c) || c == '.' || c == '_' || c == '-' ) ? 1 : 0; } /* helper to NameValue parser, parses out a a single name=value pair * * name must be alphanumeric characters, value must be non-whitespace * characters, if quoted than with symmetric quotes. Supported formats * - name=value * - name="value" * - name='value' * Note "name=" is valid and means a field with empty value. * TODO: so far, quote characters are not permitted WITHIN quoted values. */ static int parseNameValue(npb_t *const npb, size_t *const __restrict__ offs, struct json_object *const __restrict__ valroot, const char sep, const char ass, const bool ignore_ws) { int r = LN_WRONGPARSER; size_t i = *offs; char *name = NULL; if(ignore_ws) { while(i < npb->strLen && isspace((unsigned char) npb->str[i])) i++; } const size_t iName = i; /* If the assignator character is specified, search for it If it's not, check key name validity */ while(i < npb->strLen && ((ass != 0) ? (npb->str[i] != ass) : isValidNameChar(npb->str[i]))) ++i; if(i == iName || ((ass != 0) ? (npb->str[i] != ass) : (npb->str[i] != '='))) goto done; /* no name at all! */ size_t lenName = i - iName; if(ignore_ws) { while(lenName > 0 && isspace((unsigned char) npb->str[iName + lenName - 1])) { lenName--; } } ++i; /* skip assignator */ if(ignore_ws) { while(i < npb->strLen && isspace((unsigned char) npb->str[i])) i++; } char quoting = (i < npb->strLen) ? npb->str[i] : 0; if(i < npb->strLen && (quoting == '"' || quoting == '\'')) i++; else quoting = 0; //no quoting detected const size_t iVal = i; if(quoting) { // wait on an unescaped matching quoting /* * Fix by HSoszynski & KGuillemot to handle escaped quote & backslash infinitly * Continue while we don't encounter the ending quote, and while it's not escaped * a" => end * a\" => continue * a\\" => end * a\\\" => continue * ... */ int continuous_backslash = 0; while(i < npb->strLen && (npb->str[i] != quoting || continuous_backslash%2 == 1 )) { if ( npb->str[i] == '\\' ) { continuous_backslash++; } else { continuous_backslash = 0; } ++i; } } else { /* We seek characters as long as: - we have characters remaining - the character is NOT a whitespace (default separator) - the character is NOT the separator set explicitely by the user (sep) - the character IS the separator (sep), BUT is escaped */ /* * Fix by HSoszynski & KGuillemot to handle escaped separator & backslash infinitly * Continue while we don't encounter the ending separator, and while it's not escaped * a, => end * a\, => continue * a\\, => end * a\\\, => continue * ... */ int continuous_backslash = 0; while(i < npb->strLen && ((sep == 0 ? (!isspace(npb->str[i])) : (npb->str[i] != sep)) || continuous_backslash%2 == 1)) { if ( npb->str[i] == '\\' ) { continuous_backslash++; } else { continuous_backslash = 0; } ++i; } } const size_t iValEnd = i; // in case of quoting, ensure we skip it if(i < npb->strLen && npb->str[i] == quoting) ++i; else if(quoting) goto done; size_t lenVal = quoting ? (iValEnd - iVal) : (i - iVal); if(ignore_ws && !quoting) { while(lenVal > 0 && isspace((unsigned char) npb->str[iVal + lenVal - 1])) { lenVal--; } } /* parsing OK */ *offs = i; r = 0; if(valroot == NULL) goto done; CHKN(name = malloc(lenName+1)); memcpy(name, npb->str+iName, lenName); name[lenName] = '\0'; json_object *json; CHKN(json = json_object_new_string_len(npb->str+iVal, lenVal)); json_object_object_add(valroot, name, json); done: free(name); return r; } /** * Parse CEE syslog. * This essentially is a JSON parser, with additional restrictions: * The message must start with "@cee:" and json must immediately follow (whitespace permitted). * after the JSON, there must be no other non-whitespace characters. * In other words: the message must consist of a single JSON object, * only. * added 2015-04-28 by rgerhards, v1.1.2 */ PARSER_Parse(CEESyslog) size_t i = *offs; struct json_tokener *tokener = NULL; struct json_object *json = NULL; if(npb->strLen < i + 7 || /* "@cee:{}" is minimum text */ npb->str[i] != '@' || npb->str[i+1] != 'c' || npb->str[i+2] != 'e' || npb->str[i+3] != 'e' || npb->str[i+4] != ':') goto done; /* skip whitespace */ for(i += 5 ; i < npb->strLen && isspace(npb->str[i]) ; ++i) /* just skip */; if(i == npb->strLen || npb->str[i] != '{') goto done; /* note: we do not permit arrays in CEE mode */ if((tokener = json_tokener_new()) == NULL) goto done; json = json_tokener_parse_ex(tokener, npb->str+i, (int) (npb->strLen - i)); if(json == NULL) goto done; if(i + tokener->char_offset != npb->strLen) goto done; /* success, persist */ *parsed = npb->strLen; r = 0; /* success */ if(value != NULL) { *value = json; json = NULL; /* do NOT free below! */ } done: if(tokener != NULL) json_tokener_free(tokener); if(json != NULL) json_object_put(json); return r; } struct data_NameValue { char sep; /* separator (between key/value couples) */ char ass; /* assignator (between key and value) */ bool ignore_whitespaces; /* trim surrounding whitespace for key/value */ }; /** * Parser for name/value pairs. * On entry must point to alnum char. All following chars must be * name/value pairs delimited by whitespace up until the end of string. * For performance reasons, this works in two stages. In the first * stage, we only detect if the motif is correct. The second stage is * only called when we know it is. In it, we go once again over the * message again and actually extract the data. This is done because * data extraction is relatively expensive and in most cases we will * have much more frequent mismatches than matches. * added 2015-04-25 rgerhards */ PARSER_Parse(NameValue) size_t i = *offs; struct data_NameValue *const data = (struct data_NameValue*) pdata; const char sep = (data != NULL) ? data->sep : 0; const char ass = (data != NULL) ? data->ass : 0; const bool ignore_ws = (data != NULL) ? data->ignore_whitespaces : false; LN_DBGPRINTF(npb->ctx, "in parse_NameValue, separator is '%c'(0x%02x) assignator is '%c'(0x%02x) " "ignore_whitespaces is '%s'(%d)", sep, sep, ass, ass, ignore_ws ? "true" : "false", ignore_ws); /* stage one */ while(i < npb->strLen) { if (parseNameValue(npb, &i, NULL, sep, ass, ignore_ws) == 0 ) { if(ignore_ws && sep != 0) { while(i < npb->strLen && isspace((unsigned char) npb->str[i])) ++i; } // Check if there is at least one time the separator after value if( i < npb->strLen && !(sep == 0 ? (isspace(npb->str[i])) : (npb->str[i] == sep)) ) break; while(i < npb->strLen && (sep == 0 ? (isspace(npb->str[i])) : (npb->str[i] == sep))) ++i; } else { break; } } /* success, persist */ *parsed = i - *offs; r = 0; /* success */ /* stage two */ if(value == NULL) goto done; i = *offs; CHKN(*value = json_object_new_object()); while(i < npb->strLen) { if (parseNameValue(npb, &i, *value, sep, ass, ignore_ws) == 0 ) { if(ignore_ws && sep != 0) { while(i < npb->strLen && isspace((unsigned char) npb->str[i])) ++i; } // Check if there is at least one time the separator after value if( i < npb->strLen && !(sep == 0 ? (isspace(npb->str[i])) : (npb->str[i] == sep)) ) break; while(i < npb->strLen && ((sep == 0) ? (isspace(npb->str[i])) : (npb->str[i] == sep))) ++i; } else { break; } } /* TODO: fix mem leak if alloc json fails */ done: return r; } PARSER_Construct(NameValue) { int r = 0; LN_DBGPRINTF(ctx, "in parser_construct NameValue"); struct data_NameValue *data = (struct data_NameValue*) calloc(1, sizeof(struct data_NameValue)); struct json_object *obj; const char *str; if(json_object_object_get_ex(json, "extradata", &obj) != 0) { LN_DBGPRINTF(ctx, "found 'extradata' in fields, assigning to 'separator'"); if(json_object_get_string_len(obj) == 1) { str = json_object_get_string(obj); data->sep = str[0]; } else { ln_errprintf(ctx, 0, "name-value-list's extradata should only be 1 character"); r = LN_BADCONFIG; goto done; } } if(json_object_object_get_ex(json, "separator", &obj) != 0) { LN_DBGPRINTF(ctx, "found 'separator' in fields"); if(json_object_get_string_len(obj) == 1) { str = json_object_get_string(obj); data->sep = str[0]; } else { ln_errprintf(ctx, 0, "name-value-list's 'separator' field should only be 1 character"); r = LN_BADCONFIG; goto done; } } if(json_object_object_get_ex(json, "assignator", &obj) != 0) { LN_DBGPRINTF(ctx, "found 'assignator' in fields"); if(json_object_get_string_len(obj) == 1) { str = json_object_get_string(obj); data->ass = str[0]; } else { ln_errprintf(ctx, 0, "name-value-list's 'assignator' field should only be 1 character"); r = LN_BADCONFIG; goto done; } } if(json_object_object_get_ex(json, "ignore_whitespaces", &obj) != 0) { LN_DBGPRINTF(ctx, "found 'ignore_whitespaces' in fields"); if(json_object_is_type(obj, json_type_boolean) == 1) { data->ignore_whitespaces = json_object_get_boolean(obj); } else { ln_errprintf(ctx, 0, "name-value-list's 'ignore_whitespaces' field should be boolean"); r = LN_BADCONFIG; goto done; } } *pdata = data; done: if(r != 0) free(data); return r; } PARSER_Destruct(NameValue) { free(pdata); } /** * Parse a MAC layer address. * The standard (IEEE 802) format for printing MAC-48 addresses in * human-friendly form is six groups of two hexadecimal digits, * separated by hyphens (-) or colons (:), in transmission order * (e.g. 01-23-45-67-89-ab or 01:23:45:67:89:ab ). * This form is also commonly used for EUI-64. * from: http://en.wikipedia.org/wiki/MAC_address * * This parser must start on a hex digit. * added 2015-05-04 by rgerhards, v1.1.2 */ PARSER_Parse(MAC48) size_t i = *offs; char delim; if(npb->strLen < i + 17 || /* this motif has exactly 17 characters */ !isxdigit(npb->str[i]) || !isxdigit(npb->str[i+1]) ) FAIL(LN_WRONGPARSER); if(npb->str[i+2] == ':') delim = ':'; else if(npb->str[i+2] == '-') delim = '-'; else FAIL(LN_WRONGPARSER); /* first byte ok */ if(!isxdigit(npb->str[i+3]) || !isxdigit(npb->str[i+4]) || npb->str[i+5] != delim || /* 2nd byte ok */ !isxdigit(npb->str[i+6]) || !isxdigit(npb->str[i+7]) || npb->str[i+8] != delim || /* 3rd byte ok */ !isxdigit(npb->str[i+9]) || !isxdigit(npb->str[i+10]) || npb->str[i+11] != delim || /* 4th byte ok */ !isxdigit(npb->str[i+12]) || !isxdigit(npb->str[i+13]) || npb->str[i+14] != delim || /* 5th byte ok */ !isxdigit(npb->str[i+15]) || !isxdigit(npb->str[i+16]) /* 6th byte ok */ ) FAIL(LN_WRONGPARSER); /* success, persist */ *parsed = 17; r = 0; /* success */ if(value != NULL) { CHKN(*value = json_object_new_string_len(npb->str+i, 17)); } done: return r; } /* This parses the extension value and updates the index * to point to the end of it. */ static int cefParseExtensionValue(npb_t *const npb, size_t *__restrict__ iEndVal) { int r = 0; size_t i = *iEndVal; size_t iLastWordBegin; /* first find next unquoted equal sign and record begin of * last word in front of it - this is the actual end of the * current name/value pair and the begin of the next one. */ int hadSP = 0; int inEscape = 0; for(iLastWordBegin = 0 ; i < npb->strLen ; ++i) { if(inEscape) { if(npb->str[i] != '=' && npb->str[i] != '\\' && npb->str[i] != 'r' && npb->str[i] != 'n' && npb->str[i] != '/') FAIL(LN_WRONGPARSER); inEscape = 0; } else { if(npb->str[i] == '=') { break; } else if(npb->str[i] == '\\') { inEscape = 1; } else if(npb->str[i] == ' ') { hadSP = 1; } else { if(hadSP) { iLastWordBegin = i; hadSP = 0; } } } } /* Note: iLastWordBegin can never be at offset zero, because * the CEF header starts there! */ if(i < npb->strLen) { *iEndVal = (iLastWordBegin == 0) ? i : iLastWordBegin - 1; } else { *iEndVal = i; } done: return r; } /* must be positioned on first char of name, returns index * of end of name. * Note: ArcSight violates the CEF spec ifself: they generate * leading underscores in their extension names, which are * definitely not alphanumeric. We still accept them... * They also seem to use dots. */ static int cefParseName(npb_t *const npb, size_t *const __restrict__ i) { int r = 0; while(*i < npb->strLen && npb->str[*i] != '=') { if(!(isalnum(npb->str[*i]) || npb->str[*i] == '_' || npb->str[*i] == '.')) FAIL(LN_WRONGPARSER); ++(*i); } done: return r; } /* parse CEF extensions. They are basically name=value * pairs with the ugly exception that values may contain * spaces but need NOT to be quoted. Thankfully, at least * names are specified as being alphanumeric without spaces * in them. So we must add a lookahead parser to check if * a word is a name (and thus the begin of a new pair) or * not. This is done by subroutines. */ static int cefParseExtensions(npb_t *const npb, size_t *const __restrict__ offs, json_object *const __restrict__ jroot) { int r = 0; size_t i = *offs; size_t iName, lenName; size_t iValue, lenValue; char *name = NULL; char *value = NULL; while(i < npb->strLen) { while(i < npb->strLen && npb->str[i] == ' ') ++i; iName = i; CHKR(cefParseName(npb, &i)); if(npb->str[i] != '=') FAIL(LN_WRONGPARSER); lenName = i - iName; /* Init if the last value is empty */ lenValue = 0; if(i < npb->strLen){ ++i; /* skip '=' */ iValue = i; CHKR(cefParseExtensionValue(npb, &i)); lenValue = i - iValue; ++i; /* skip past value */ } if(jroot != NULL) { CHKN(name = malloc(sizeof(char) * (lenName + 1))); memcpy(name, npb->str+iName, lenName); name[lenName] = '\0'; CHKN(value = malloc(sizeof(char) * (lenValue + 1))); /* copy value but escape it */ size_t iDst = 0; for(size_t iSrc = 0 ; iSrc < lenValue ; ++iSrc) { if(npb->str[iValue+iSrc] == '\\') { ++iSrc; /* we know the next char must exist! */ switch(npb->str[iValue+iSrc]) { case '=': value[iDst] = '='; break; case 'n': value[iDst] = '\n'; break; case 'r': value[iDst] = '\r'; break; case '\\': value[iDst] = '\\'; break; case '/': value[iDst] = '/'; break; default: break; } } else { value[iDst] = npb->str[iValue+iSrc]; } ++iDst; } value[iDst] = '\0'; json_object *json; CHKN(json = json_object_new_string(value)); json_object_object_add(jroot, name, json); free(name); name = NULL; free(value); value = NULL; } } *offs = npb->strLen; /* this parser consume everything or fails */ done: free(name); free(value); return r; } /* gets a CEF header field. Must be positioned on the * first char after the '|' in front of field. * Note that '|' may be escaped as "\|", which also means * we need to supprot "\\" (see CEF spec for details). * We return the string in *val, if val is non-null. In * that case we allocate memory that the caller must free. * This is necessary because there are potentially escape * sequences inside the string. */ static int cefGetHdrField(npb_t *const npb, size_t *const __restrict__ offs, char **val) { int r = 0; size_t i = *offs; assert(npb->str[i] != '|'); while(i < npb->strLen && npb->str[i] != '|') { if(npb->str[i] == '\\') { ++i; /* skip esc char */ if(npb->str[i] != '\\' && npb->str[i] != '|') FAIL(LN_WRONGPARSER); } ++i; /* scan to next delimiter */ } if(npb->str[i] != '|') FAIL(LN_WRONGPARSER); const size_t iBegin = *offs; /* success, persist */ *offs = i + 1; if(val == NULL) { r = 0; goto done; } const size_t len = i - iBegin; CHKN(*val = malloc(len + 1)); size_t iDst = 0; for(size_t iSrc = 0 ; iSrc < len ; ++iSrc) { if(npb->str[iBegin+iSrc] == '\\') ++iSrc; /* we already checked above that this is OK! */ (*val)[iDst++] = npb->str[iBegin+iSrc]; } (*val)[iDst] = 0; r = 0; done: return r; } /** * Parser for ArcSight Common Event Format (CEF) version 0. * added 2015-05-05 by rgerhards, v1.1.2 */ PARSER_Parse(CEF) size_t i = *offs; char *vendor = NULL; char *product = NULL; char *version = NULL; char *sigID = NULL; char *name = NULL; char *severity = NULL; /* minimum header: "CEF:0|x|x|x|x|x|x|" --> 17 chars */ if(npb->strLen < i + 17 || npb->str[i] != 'C' || npb->str[i+1] != 'E' || npb->str[i+2] != 'F' || npb->str[i+3] != ':' || npb->str[i+4] != '0' || npb->str[i+5] != '|' ) FAIL(LN_WRONGPARSER); i += 6; /* position on '|' */ CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &vendor)); CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &product)); CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &version)); CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &sigID)); CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &name)); CHKR(cefGetHdrField(npb, &i, (value == NULL) ? NULL : &severity)); while(i < npb->strLen && npb->str[i] == ' ') /* skip leading SP */ ++i; /* OK, we now know we have a good header. Now, we need * to process extensions. * This time, we do NOT pre-process the extension, but rather * persist them directly to JSON. This is contrary to other * parsers, but as the CEF header is pretty unique, this time * it is extremely unlikely we will get a no-match during * extension processing. Even if so, nothing bad happens, as * the extracted data is discarded. But the regular case saves * us processing time and complexity. The only time when we * cannot directly process it is when the caller asks us not * to persist the data. So this must be handled differently. */ size_t iBeginExtensions = i; CHKR(cefParseExtensions(npb, &i, NULL)); /* success, persist */ *parsed = i - *offs; r = 0; /* success */ if(value != NULL) { CHKN(*value = json_object_new_object()); json_object *json; CHKN(json = json_object_new_string(vendor)); json_object_object_add(*value, "DeviceVendor", json); CHKN(json = json_object_new_string(product)); json_object_object_add(*value, "DeviceProduct", json); CHKN(json = json_object_new_string(version)); json_object_object_add(*value, "DeviceVersion", json); CHKN(json = json_object_new_string(sigID)); json_object_object_add(*value, "SignatureID", json); CHKN(json = json_object_new_string(name)); json_object_object_add(*value, "Name", json); CHKN(json = json_object_new_string(severity)); json_object_object_add(*value, "Severity", json); json_object *jext; CHKN(jext = json_object_new_object()); json_object_object_add(*value, "Extensions", jext); i = iBeginExtensions; cefParseExtensions(npb, &i, jext); } done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); value = NULL; } free(vendor); free(product); free(version); free(sigID); free(name); free(severity); return r; } struct data_CheckpointLEA { char terminator; /* '\0' - do not use */ }; /** * Parser for Checkpoint LEA on-disk format. * added 2015-06-18 by rgerhards, v1.1.2 */ PARSER_Parse(CheckpointLEA) size_t i = *offs; size_t iName, lenName; size_t iValue, lenValue; int foundFields = 0; char *name = NULL; char *val = NULL; struct data_CheckpointLEA *const data = (struct data_CheckpointLEA*) pdata; while(i < npb->strLen) { while(i < npb->strLen && npb->str[i] == ' ') /* skip leading SP */ ++i; if(i == npb->strLen) { /* OK if just trailing space */ if(foundFields == 0) FAIL(LN_WRONGPARSER); break; /* we are done with the loop, all processed */ } else { ++foundFields; } iName = i; /* TODO: do a stricter check? ... but we don't have a spec */ if(i < npb->strLen && npb->str[i] == data->terminator) { break; } while(i < npb->strLen && npb->str[i] != ':') { ++i; } if(i+1 >= npb->strLen || npb->str[i] != ':') { FAIL(LN_WRONGPARSER); } /* Sometimes there is multiple colons */ while( i + 1 < npb->strLen && npb->str[i+1] == ':' ) { i++; } lenName = i - iName; ++i; /* skip ':' */ while(i < npb->strLen && npb->str[i] == ' ') { /* skip leading SP */ ++i; } if(i == npb->strLen) FAIL(LN_WRONGPARSER); /* Improvement by KGuillemot & M4jr0 to support quoted values */ if( npb->str[i] == '"' ) { int continuous_backslash = 0; iValue = i+1; i++; while( i < npb->strLen && ( npb->str[i] != '"' || (continuous_backslash & 1) == 1 ) ) { if(npb->str[i] == '\\') { ++continuous_backslash; } else { continuous_backslash = 0; } ++i; } // Do not take the " in value lenValue = i - iValue; if(i == npb->strLen) FAIL(LN_WRONGPARSER); // Skip " ++i; } else { iValue = i; while (i < npb->strLen && npb->str[i] != ';' && npb->str[i] != data->terminator) { ++i; } lenValue = i - iValue; while(lenValue > 0 && npb->str[iValue + lenValue - 1] == ' ') { --lenValue; } } while(i < npb->strLen && npb->str[i] == ' ') { ++i; } if(i >= npb->strLen || (npb->str[i] != ';' && npb->str[i] != data->terminator)) FAIL(LN_WRONGPARSER); if(npb->str[i] == ';') ++i; /* skip ';' */ if(value != NULL) { CHKN(name = malloc(sizeof(char) * (lenName + 1))); memcpy(name, npb->str+iName, lenName); name[lenName] = '\0'; CHKN(val = malloc(sizeof(char) * (lenValue + 1))); memcpy(val, npb->str+iValue, lenValue); val[lenValue] = '\0'; if(*value == NULL) CHKN(*value = json_object_new_object()); json_object *json; CHKN(json = json_object_new_string(val)); json_object_object_add(*value, name, json); free(name); name = NULL; free(val); val = NULL; } } /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: free(name); free(val); if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; } return r; } PARSER_Construct(CheckpointLEA) { int r = 0; struct data_CheckpointLEA *data = (struct data_CheckpointLEA*) calloc(1, sizeof(struct data_CheckpointLEA)); if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "terminator")) { const char *const optval = json_object_get_string(val); if(strlen(optval) != 1) { ln_errprintf(ctx, 0, "terminator must be exactly one character " "but is: '%s'", optval); r = LN_BADCONFIG; goto done; } data->terminator = *optval; } json_object_iter_next(&it); } done: *pdata = data; return r; } PARSER_Destruct(CheckpointLEA) { free(pdata); } /* helper to repeat parser constructor: checks that dot field name * is only present if there is one field inside the "parser" list. * returns 1 if ok, 0 otherwise. */ static int chkNoDupeDotInParserDefs(ln_ctx ctx, struct json_object *parsers) { int r = 1; int nParsers = 0; int nDots = 0; if(json_object_get_type(parsers) == json_type_array) { const int maxparsers = json_object_array_length(parsers); for(int i = 0 ; i < maxparsers ; ++i) { struct json_object *const parser = json_object_array_get_idx(parsers, i); struct json_object *fname; json_object_object_get_ex(parser, "name", &fname); if(fname != NULL) { ++nParsers; if(!strcmp(json_object_get_string(fname), ".")) ++nDots; } } } if(nParsers > 1 && nDots > 0) { ln_errprintf(ctx, 0, "'repeat' parser supports dot name only " "if single parser is used in 'parser' part, invalid " "construct: %s", json_object_get_string(parsers)); r = 0; } return r; } /** * "repeat" special parser. */ PARSER_Parse(Repeat) struct data_Repeat *const data = (struct data_Repeat*) pdata; struct ln_pdag *endNode = NULL; size_t strtoffs = *offs; size_t lastMatch = strtoffs; size_t lastKnownGood = strtoffs; struct json_object *json_arr = NULL; struct json_object *parsed_value = NULL; const size_t parsedTo_save = npb->parsedTo; const size_t longestParsedTo_save = npb->longestParsedTo; int mergeResults = parser_name != NULL && parser_name[0] == '.' && parser_name[1] == '\0'; do { if(parsed_value == NULL) { parsed_value = json_object_new_object(); } r = ln_normalizeRec(npb, data->parser, strtoffs, 1, parsed_value, &endNode, data->failOnDuplicate, parsed_value, parser_name); strtoffs = npb->parsedTo; LN_DBGPRINTF(npb->ctx, "repeat parser returns %d, parsed %zu, json: %s", r, npb->parsedTo, json_object_to_json_string(parsed_value)); if(r != 0) { json_object_put(parsed_value); parsed_value = NULL; if(data->permitMismatchInParser) { strtoffs = lastKnownGood; /* go back to final match */ LN_DBGPRINTF(npb->ctx, "mismatch in repeat, " "parse ptr back to %zd", strtoffs); goto success; } else { // Reset longest match npb->longestParsedTo = lastMatch > longestParsedTo_save ? lastMatch : longestParsedTo_save; goto done; } } if (!mergeResults) { if(json_arr == NULL) { json_arr = json_object_new_array(); } /* check for name=".", which means we need to place the * value only into to array. As we do not have direct * access to the key, we loop over our result as a work- * around. */ struct json_object *toAdd = parsed_value; struct json_object_iterator it = json_object_iter_begin(parsed_value); struct json_object_iterator itEnd = json_object_iter_end(parsed_value); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(key[0] == '.' && key[1] == '\0') { json_object_get(val); /* inc refcount! */ toAdd = val; } json_object_iter_next(&it); } json_object_array_add(json_arr, toAdd); if(toAdd != parsed_value) json_object_put(parsed_value); LN_DBGPRINTF(npb->ctx, "arr: %s", json_object_to_json_string(json_arr)); // If we are an array, we need to get a new value and we don't want to free at end parsed_value = NULL; } /* now check if we shall continue */ npb->parsedTo = 0; lastMatch = lastKnownGood; lastKnownGood = strtoffs; /* record pos in case of fail in while */ r = ln_normalizeRec(npb, data->while_cond, strtoffs, 1, NULL, &endNode, 0, NULL, parser_name); LN_DBGPRINTF(npb->ctx, "repeat while returns %d, parsed %zu", r, npb->parsedTo); if(r == 0) strtoffs = npb->parsedTo; } while(r == 0); success: /* success, persist */ *parsed = strtoffs - *offs; if(value == NULL) { if (json_arr != NULL) { json_object_put(json_arr); } if (parsed_value != NULL) { json_object_put(parsed_value); } } else { *value = !mergeResults ? json_arr : parsed_value; } npb->parsedTo = parsedTo_save; r = 0; /* success */ done: if(r != 0) { if (json_arr != NULL) { json_object_put(json_arr); } if (parsed_value != NULL) { json_object_put(parsed_value); } } return r; } PARSER_Construct(Repeat) { int r = 0; struct data_Repeat *data = (struct data_Repeat*) calloc(1, sizeof(struct data_Repeat)); struct ln_pdag *endnode; /* we need this fo ln_pdagAddParser, which updates its param! */ if(json == NULL) goto done; struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcmp(key, "parser")) { if(chkNoDupeDotInParserDefs(ctx, val) != 1) { r = LN_BADCONFIG; goto done; } endnode = data->parser = ln_newPDAG(ctx); json_object_get(val); /* prevent free in pdagAddParser */ CHKR(ln_pdagAddParser(ctx, &endnode, val)); endnode->flags.isTerminal = 1; } else if(!strcmp(key, "while")) { endnode = data->while_cond = ln_newPDAG(ctx); json_object_get(val); /* prevent free in pdagAddParser */ CHKR(ln_pdagAddParser(ctx, &endnode, val)); endnode->flags.isTerminal = 1; } else if(!strcasecmp(key, "option.permitMismatchInParser")) { data->permitMismatchInParser = json_object_get_boolean(val); } else if(!strcasecmp(key, "option.failOnDuplicate")) { data->failOnDuplicate = json_object_get_boolean(val); } else { ln_errprintf(ctx, 0, "invalid param for hexnumber: %s", json_object_to_json_string(val)); } json_object_iter_next(&it); } done: if(data->parser == NULL || data->while_cond == NULL) { ln_errprintf(ctx, 0, "repeat parser needs 'parser','while' parameters"); ln_destructRepeat(ctx, data); r = LN_BADCONFIG; } else { *pdata = data; } return r; } PARSER_Destruct(Repeat) { struct data_Repeat *const data = (struct data_Repeat*) pdata; if(data->parser != NULL) ln_pdagDelete(data->parser); if(data->while_cond != NULL) ln_pdagDelete(data->while_cond); free(pdata); } /* string escaping modes */ #define ST_ESC_NONE 0 #define ST_ESC_BACKSLASH 1 #define ST_ESC_DOUBLE 2 #define ST_ESC_BOTH 3 struct data_String { enum { ST_QUOTE_AUTO = 0, ST_QUOTE_NONE = 1, ST_QUOTE_REQD = 2 } quoteMode; struct { unsigned strip_quotes : 1; unsigned esc_md : 2; } flags; enum { ST_MATCH_EXACT = 0, ST_MATCH_LAZY = 1} matching; int dashIsEmpty; char qchar_begin; char qchar_end; char perm_chars[256]; // TODO: make this bit-wise, so we need only 32 bytes }; static inline void stringSetPermittedChar(struct data_String *const data, char c, int val) { #if 0 const int i = (unsigned) c / 8; const int shft = (unsigned) c % 8; const unsigned mask = ~(1 << shft); perm_arr[i] = (perm_arr[i] & (0xff #endif data->perm_chars[(unsigned)c] = val; } static inline int stringIsPermittedChar(struct data_String *const data, char c) { return data->perm_chars[(unsigned char)c]; } static void stringAddPermittedCharArr(struct data_String *const data, const char *const optval) { const size_t nchars = strlen(optval); for(size_t i = 0 ; i < nchars ; ++i) { stringSetPermittedChar(data, optval[i], 1); } } static void stringAddPermittedFromTo(struct data_String *const data, const unsigned char from, const unsigned char to) { assert(from <= to); for(size_t i = from ; i <= to ; ++i) { stringSetPermittedChar(data, (char) i, 1); } } static inline void stringAddPermittedChars(struct data_String *const data, struct json_object *const val) { const char *const optval = json_object_get_string(val); if(optval == NULL) return; stringAddPermittedCharArr(data, optval); } static void stringAddPermittedCharsViaArray(ln_ctx ctx, struct data_String *const data, struct json_object *const arr) { const int nelem = json_object_array_length(arr); for(int i = 0 ; i < nelem ; ++i) { struct json_object *const elem = json_object_array_get_idx(arr, i); struct json_object_iterator it = json_object_iter_begin(elem); struct json_object_iterator itEnd = json_object_iter_end(elem); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcasecmp(key, "chars")) { stringAddPermittedChars(data, val); } else if(!strcasecmp(key, "class")) { const char *const optval = json_object_get_string(val); if(!strcasecmp(optval, "digit")) { stringAddPermittedCharArr(data, "0123456789"); } else if(!strcasecmp(optval, "hexdigit")) { stringAddPermittedCharArr(data, "0123456789aAbBcCdDeEfF"); } else if(!strcasecmp(optval, "alpha")) { stringAddPermittedFromTo(data, 'a', 'z'); stringAddPermittedFromTo(data, 'A', 'Z'); } else if(!strcasecmp(optval, "alnum")) { stringAddPermittedCharArr(data, "0123456789"); stringAddPermittedFromTo(data, 'a', 'z'); stringAddPermittedFromTo(data, 'A', 'Z'); } else { ln_errprintf(ctx, 0, "invalid character class '%s'", optval); } } json_object_iter_next(&it); } } } /** * generic string parser */ PARSER_Parse(String) assert(npb->str != NULL); assert(offs != NULL); assert(parsed != NULL); struct data_String *const data = (struct data_String*) pdata; size_t i = *offs; int bHaveQuotes = 0; int bHadEndQuote = 0; int bHadEscape = 0; if(i == npb->strLen) goto done; if((data->quoteMode == ST_QUOTE_AUTO) && (npb->str[i] == data->qchar_begin)) { bHaveQuotes = 1; ++i; } else if(data->quoteMode == ST_QUOTE_REQD) { if(npb->str[i] == data->qchar_begin) { bHaveQuotes = 1; ++i; } else { goto done; } } /* scan string */ while(i < npb->strLen) { if(bHaveQuotes) { if(npb->str[i] == data->qchar_end) { if(data->flags.esc_md == ST_ESC_DOUBLE || data->flags.esc_md == ST_ESC_BOTH) { /* may be escaped, need to check! */ if(i+1 < npb->strLen && npb->str[i+1] == data->qchar_end) { bHadEscape = 1; ++i; } else { /* not escaped -> terminal */ bHadEndQuote = 1; break; } } else { bHadEndQuote = 1; break; } } } if( npb->str[i] == '\\' && i+1 < npb->strLen && (data->flags.esc_md == ST_ESC_BACKSLASH || data->flags.esc_md == ST_ESC_BOTH) ) { bHadEscape = 1; i++; /* skip esc char */ } /* terminating conditions */ if(!bHaveQuotes && npb->str[i] == ' ') break; if(!stringIsPermittedChar(data, npb->str[i])) break; i++; } if(bHaveQuotes && !bHadEndQuote) goto done; if(i == *offs) goto done; if((i - *offs < 1) || (data->matching == ST_MATCH_EXACT)) { const size_t trmChkIdx = (bHaveQuotes) ? i+1 : i; if(trmChkIdx != npb->strLen && npb->str[trmChkIdx] != ' ') goto done; } /* success, persist */ *parsed = i - *offs; if(bHadEndQuote) ++(*parsed); /* skip quote */ if(value != NULL) { size_t strt; size_t len; if(data->dashIsEmpty) { if( (bHaveQuotes && *parsed == 3 && !strncmp(npb->str+(*offs), "\"-\"", 3)) || (!bHaveQuotes && *parsed == 1 && npb->str[*offs] == '-') ) { *value = json_object_new_string_len("", 0); r = 0; goto done; /* shortcut exit */ } } if(bHaveQuotes && data->flags.strip_quotes) { strt = *offs + 1; len = *parsed - 2; /* del begin AND end quote! */ } else { strt = *offs; len = *parsed; } char *const cstr = strndup(npb->str+strt, len); CHKN(cstr); if(bHadEscape) { /* need to post-process string... */ for(size_t j = 0 ; cstr[j] != '\0' ; j++) { if( ( cstr[j] == data->qchar_end && cstr[j+1] == data->qchar_end && (data->flags.esc_md == ST_ESC_DOUBLE || data->flags.esc_md == ST_ESC_BOTH) ) || ( cstr[j] == '\\' && (data->flags.esc_md == ST_ESC_BACKSLASH || data->flags.esc_md == ST_ESC_BOTH) ) ) { /* we need to remove the escape character */ memmove(cstr+j, cstr+j+1, len-j); } } } *value = json_object_new_string(cstr); free(cstr); } r = 0; /* success */ done: return r; } PARSER_Construct(String) { int r = 0; struct data_String *const data = (struct data_String*) calloc(1, sizeof(struct data_String)); data->quoteMode = ST_QUOTE_AUTO; data->flags.strip_quotes = 1; data->flags.esc_md = ST_ESC_BOTH; data->qchar_begin = '"'; data->qchar_end = '"'; data->matching = ST_MATCH_EXACT; memset(data->perm_chars, 0xff, sizeof(data->perm_chars)); struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(!strcasecmp(key, "quoting.mode")) { const char *const optval = json_object_get_string(val); if(!strcasecmp(optval, "auto")) { data->quoteMode = ST_QUOTE_AUTO; } else if(!strcasecmp(optval, "none")) { data->quoteMode = ST_QUOTE_NONE; } else if(!strcasecmp(optval, "required")) { data->quoteMode = ST_QUOTE_REQD; } else { ln_errprintf(ctx, 0, "invalid quoting.mode for string parser: %s", optval); r = LN_BADCONFIG; goto done; } } else if(!strcasecmp(key, "quoting.escape.mode")) { const char *const optval = json_object_get_string(val); if(!strcasecmp(optval, "none")) { data->flags.esc_md = ST_ESC_NONE; } else if(!strcasecmp(optval, "backslash")) { data->flags.esc_md = ST_ESC_BACKSLASH; } else if(!strcasecmp(optval, "double")) { data->flags.esc_md = ST_ESC_DOUBLE; } else if(!strcasecmp(optval, "both")) { data->flags.esc_md = ST_ESC_BOTH; } else { ln_errprintf(ctx, 0, "invalid quoting.escape.mode for string " "parser: %s", optval); r = LN_BADCONFIG; goto done; } } else if(!strcasecmp(key, "quoting.char.begin")) { const char *const optval = json_object_get_string(val); if(strlen(optval) != 1) { ln_errprintf(ctx, 0, "quoting.char.begin must " "be exactly one character but is: '%s'", optval); r = LN_BADCONFIG; goto done; } data->qchar_begin = *optval; } else if(!strcasecmp(key, "quoting.char.end")) { const char *const optval = json_object_get_string(val); if(strlen(optval) != 1) { ln_errprintf(ctx, 0, "quoting.char.end must " "be exactly one character but is: '%s'", optval); r = LN_BADCONFIG; goto done; } data->qchar_end = *optval; } else if(!strcasecmp(key, "matching.permitted")) { memset(data->perm_chars, 0x00, sizeof(data->perm_chars)); if(json_object_is_type(val, json_type_string)) { stringAddPermittedChars(data, val); } else if(json_object_is_type(val, json_type_array)) { stringAddPermittedCharsViaArray(ctx, data, val); } else { ln_errprintf(ctx, 0, "matching.permitted is invalid " "object type, given as '%s", json_object_to_json_string(val)); } } else if(!strcasecmp(key, "matching.mode")) { const char *const optval = json_object_get_string(val); if(!strcasecmp(optval, "strict")) { data->matching = ST_MATCH_EXACT; } else if(!strcasecmp(optval, "lazy")) { data->matching = ST_MATCH_LAZY; } else { ln_errprintf(ctx, 0, "invalid matching.mode for string " "parser: %s", optval); r = LN_BADCONFIG; goto done; } } else if(!strcasecmp(key, "option.dashIsEmpty")) { data->dashIsEmpty = json_object_get_boolean(val); } else { ln_errprintf(ctx, 0, "invalid param for hexnumber: %s", json_object_to_json_string(val)); } json_object_iter_next(&it); } if(data->quoteMode == ST_QUOTE_NONE) data->flags.esc_md = ST_ESC_NONE; *pdata = data; done: if(r != 0) { free(data); } return r; } PARSER_Destruct(String) { free(pdata); } liblognorm-2.1.0/src/parser.h000066400000000000000000000062241520037563000161170ustar00rootroot00000000000000/* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_PARSER_H_INCLUDED #define LIBLOGNORM_PARSER_H_INCLUDED #include "pdag.h" /** * Parser interface * @param[in] str the to-be-parsed string * @param[in] strLen length of the to-be-parsed string * @param[in] offs an offset into the string * @param[out] parsed bytes * @param[out] ptr to json object containing parsed data (can be unused) * if NULL on input, object is NOT persisted * @return 0 on success, something else otherwise */ // TODO #warning check how to handle "value" - does it need to be set to NULL? #define PARSERDEF_NO_DATA(parser) \ int ln_v2_parse##parser(npb_t *npb, size_t *offs, void *const, const char *parser_name, \ size_t *parsed, struct json_object **value) #define PARSERDEF(parser) \ int ln_construct##parser(ln_ctx ctx, json_object *const json, void **pdata); \ int ln_v2_parse##parser(npb_t *npb, size_t *offs, void *const, const char *parser_name, \ size_t *parsed, struct json_object **value); \ void ln_destruct##parser(ln_ctx ctx, void *const pdata) PARSERDEF(RFC5424Date); PARSERDEF(RFC3164Date); PARSERDEF(Number); PARSERDEF(Float); PARSERDEF(HexNumber); PARSERDEF_NO_DATA(KernelTimestamp); PARSERDEF_NO_DATA(Whitespace); PARSERDEF_NO_DATA(Word); PARSERDEF(StringTo); PARSERDEF_NO_DATA(Alpha); PARSERDEF(Literal); PARSERDEF(CharTo); PARSERDEF(CharSeparated); PARSERDEF(Repeat); PARSERDEF(String); PARSERDEF_NO_DATA(Rest); PARSERDEF(OpQuotedString); PARSERDEF_NO_DATA(QuotedString); PARSERDEF_NO_DATA(ISODate); PARSERDEF_NO_DATA(Time12hr); PARSERDEF_NO_DATA(Time24hr); PARSERDEF_NO_DATA(Duration); PARSERDEF_NO_DATA(IPv4); PARSERDEF_NO_DATA(IPv6); PARSERDEF(JSON); PARSERDEF_NO_DATA(CEESyslog); PARSERDEF_NO_DATA(v2IPTables); PARSERDEF_NO_DATA(CiscoInterfaceSpec); PARSERDEF_NO_DATA(MAC48); PARSERDEF_NO_DATA(CEF); PARSERDEF(CheckpointLEA); PARSERDEF(NameValue); #undef PARSERDEF_NO_DATA /* utility functions */ int ln_combineData_Literal(void *const org, void *const add); /* definitions for friends */ struct data_Repeat { ln_pdag *parser; ln_pdag *while_cond; int permitMismatchInParser; int failOnDuplicate; }; #endif /* #ifndef LIBLOGNORM_PARSER_H_INCLUDED */ liblognorm-2.1.0/src/pdag.c000066400000000000000000001534761520037563000155450ustar00rootroot00000000000000/** * @file pdag.c * @brief Implementation of the parse dag object. * @class ln_pdag pdag.h *//* * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #include "config.h" #include #include #include #include #include #include #include #include "liblognorm.h" #include "v1_liblognorm.h" #include "v1_ptree.h" #include "lognorm.h" #include "samp.h" #include "pdag.h" #include "annot.h" #include "internal.h" #include "parser.h" #include "helpers.h" #ifdef ENABLE_TURBO #include "turbo.h" #endif void ln_displayPDAGComponentAlternative(struct ln_pdag *dag, int level); void ln_displayPDAGComponent(struct ln_pdag *dag, int level); static void pdagDeletePrs(ln_ctx ctx, ln_parser_t *const __restrict__ prs); #ifdef ADVANCED_STATS uint64_t advstats_parsers_called = 0; uint64_t advstats_parsers_success = 0; int advstats_max_pathlen = 0; int advstats_pathlens[ADVSTATS_MAX_ENTITIES]; int advstats_max_backtracked = 0; int advstats_backtracks[ADVSTATS_MAX_ENTITIES]; int advstats_max_parser_calls = 0; int advstats_parser_calls[ADVSTATS_MAX_ENTITIES]; int advstats_max_lit_parser_calls = 0; int advstats_lit_parser_calls[ADVSTATS_MAX_ENTITIES]; #endif /* parser lookup table * This is a memory- and cache-optimized way of calling parsers. * VERY IMPORTANT: the initialization must be done EXACTLY in the * order of parser IDs (also see comment in pdag.h). * * Rough guideline for assigning priorities: * 0 is highest, 255 lowest. 255 should be reserved for things that * *really* should only be run as last resort --> rest. Also keep in * mind that the user-assigned priority is put in the upper 24 bits, so * parser-specific priorities only count when the user has assigned * no priorities (which is expected to be common) or user-assigned * priorities are equal for some parsers. */ #ifdef ADVANCED_STATS #define PARSER_ENTRY_NO_DATA(identifier, parser, prio) \ { identifier, prio, NULL, ln_v2_parse##parser, NULL, 0, 0 } #define PARSER_ENTRY(identifier, parser, prio) \ { identifier, prio, ln_construct##parser, ln_v2_parse##parser, ln_destruct##parser, 0, 0 } #else #define PARSER_ENTRY_NO_DATA(identifier, parser, prio) \ { identifier, prio, NULL, ln_v2_parse##parser, NULL } #define PARSER_ENTRY(identifier, parser, prio) \ { identifier, prio, ln_construct##parser, ln_v2_parse##parser, ln_destruct##parser } #endif static struct ln_parser_info parser_lookup_table[] = { PARSER_ENTRY("literal", Literal, 4), PARSER_ENTRY("repeat", Repeat, 4), PARSER_ENTRY("date-rfc3164", RFC3164Date, 8), PARSER_ENTRY("date-rfc5424", RFC5424Date, 8), PARSER_ENTRY("number", Number, 16), PARSER_ENTRY("float", Float, 16), PARSER_ENTRY("hexnumber", HexNumber, 16), PARSER_ENTRY_NO_DATA("kernel-timestamp", KernelTimestamp, 16), PARSER_ENTRY_NO_DATA("whitespace", Whitespace, 4), PARSER_ENTRY_NO_DATA("ipv4", IPv4, 4), PARSER_ENTRY_NO_DATA("ipv6", IPv6, 4), PARSER_ENTRY_NO_DATA("word", Word, 32), PARSER_ENTRY_NO_DATA("alpha", Alpha, 32), PARSER_ENTRY_NO_DATA("rest", Rest, 255), PARSER_ENTRY("op-quoted-string", OpQuotedString, 64), PARSER_ENTRY_NO_DATA("quoted-string", QuotedString, 64), PARSER_ENTRY_NO_DATA("date-iso", ISODate, 8), PARSER_ENTRY_NO_DATA("time-24hr", Time24hr, 8), PARSER_ENTRY_NO_DATA("time-12hr", Time12hr, 8), PARSER_ENTRY_NO_DATA("duration", Duration, 16), PARSER_ENTRY_NO_DATA("cisco-interface-spec", CiscoInterfaceSpec, 4), PARSER_ENTRY("json", JSON, 4), PARSER_ENTRY_NO_DATA("cee-syslog", CEESyslog, 4), PARSER_ENTRY_NO_DATA("mac48", MAC48, 16), PARSER_ENTRY_NO_DATA("cef", CEF, 4), PARSER_ENTRY_NO_DATA("v2-iptables", v2IPTables, 4), PARSER_ENTRY("name-value-list", NameValue, 8), PARSER_ENTRY("checkpoint-lea", CheckpointLEA, 4), PARSER_ENTRY("string-to", StringTo, 32), PARSER_ENTRY("char-to", CharTo, 32), PARSER_ENTRY("char-sep", CharSeparated, 32), PARSER_ENTRY("string", String, 32) }; #define NPARSERS (sizeof(parser_lookup_table)/sizeof(struct ln_parser_info)) #define DFLT_USR_PARSER_PRIO 30000 /**< default priority if user has not specified it */ static inline const char * parserName(const prsid_t id) { const char *name; if(id == PRS_CUSTOM_TYPE) name = "USER-DEFINED"; else name = parser_lookup_table[id].name; return name; } prsid_t ln_parserName2ID(const char *const __restrict__ name) { unsigned i; for( i = 0 ; i < sizeof(parser_lookup_table) / sizeof(struct ln_parser_info) ; ++i) { if(!strcmp(parser_lookup_table[i].name, name)) { return i; } } return PRS_INVALID; } /* find type pdag in table. If "bAdd" is set, add it if not * already present, a new entry will be added. * Returns NULL on error, ptr to type pdag entry otherwise * * We return the index here so that previously parsed rules/types before a realloc * don't end up with a pointer to freed memory */ int ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd) { int td = -1; int i; LN_DBGPRINTF(ctx, "ln_pdagFindType, name '%s', bAdd: %d, nTypes %d", name, bAdd, ctx->nTypes); for(i = 0 ; i < ctx->nTypes ; ++i) { if(!strcmp(ctx->type_pdags[i].name, name)) { td = i; goto done; } } if(!bAdd) { LN_DBGPRINTF(ctx, "custom type '%s' not found", name); goto done; } /* type does not yet exist -- create entry */ LN_DBGPRINTF(ctx, "custom type '%s' does not yet exist, adding...", name); struct ln_type_pdag *newarr; newarr = realloc(ctx->type_pdags, sizeof(struct ln_type_pdag) * (ctx->nTypes+1)); if(newarr == NULL) { LN_DBGPRINTF(ctx, "ln_pdagFindTypeAG: alloc newarr failed"); goto done; } ctx->type_pdags = newarr; /* td now is index of new member and nTypes is index+1 (count) */ td = ctx->nTypes++; ctx->type_pdags[td].name = strdup(name); ctx->type_pdags[td].pdag = ln_newPDAG(ctx); done: return td; } /* we clear some multiple times, but as long as we have no loops * (dag!) we have no real issue. */ static void ln_pdagComponentClearVisited(struct ln_pdag *const dag) { dag->flags.visited = 0; for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *prs = dag->parsers+i; ln_pdagComponentClearVisited(prs->node); } } static void ln_pdagClearVisited(ln_ctx ctx) { for(int i = 0 ; i < ctx->nTypes ; ++i) ln_pdagComponentClearVisited(ctx->type_pdags[i].pdag); ln_pdagComponentClearVisited(ctx->pdag); } /** * Process a parser definition. Note that a single definition can potentially * contain many parser instances. * @return parser node ptr or NULL (on error) */ ln_parser_t* ln_newParser(ln_ctx ctx, json_object *prscnf) { ln_parser_t *node = NULL; json_object *json; const char *val; prsid_t prsid; int custType = -1; const char *name = NULL; const char *textconf = json_object_to_json_string(prscnf); int assignedPrio = DFLT_USR_PARSER_PRIO; int parserPrio; json_object_object_get_ex(prscnf, "type", &json); if(json == NULL) { ln_errprintf(ctx, 0, "parser type missing in config: %s", json_object_to_json_string(prscnf)); goto done; } val = json_object_get_string(json); if(*val == '@') { prsid = PRS_CUSTOM_TYPE; custType = ln_pdagFindType(ctx, val, 0); parserPrio = 16; /* hopefully relatively specific... */ if(custType < 0) { ln_errprintf(ctx, 0, "unknown user-defined type '%s'", val); goto done; } } else { prsid = ln_parserName2ID(val); if(prsid == PRS_INVALID) { ln_errprintf(ctx, 0, "invalid field type '%s'", val); goto done; } parserPrio = parser_lookup_table[prsid].prio; } json_object_object_get_ex(prscnf, "name", &json); if(json == NULL || !strcmp(json_object_get_string(json), "-")) { name = NULL; } else { name = strdup(json_object_get_string(json)); } json_object_object_get_ex(prscnf, "priority", &json); if(json != NULL) { assignedPrio = json_object_get_int(json); } LN_DBGPRINTF(ctx, "assigned priority is %d", assignedPrio); /* we need to remove already processed items from the config, so * that we can pass the remaining parameters to the parser. */ json_object_object_del(prscnf, "type"); json_object_object_del(prscnf, "priority"); if(name != NULL) json_object_object_del(prscnf, "name"); /* got all data items */ if((node = calloc(1, sizeof(ln_parser_t))) == NULL) { LN_DBGPRINTF(ctx, "lnNewParser: alloc node failed"); free((void*)name); goto done; } node->node = NULL; node->prio = ((assignedPrio << 8) & 0xffffff00) | (parserPrio & 0xff); node->name = name; node->prsid = prsid; node->conf = strdup(textconf); if(prsid == PRS_CUSTOM_TYPE) { node->custType = custType; } else { if(parser_lookup_table[prsid].construct != NULL) { const int r = parser_lookup_table[prsid].construct(ctx, prscnf, &node->parser_data); if(r != 0) { pdagDeletePrs(ctx, node); free(node); node = NULL; goto done; } } } done: return node; } struct ln_pdag* ln_newPDAG(ln_ctx ctx) { struct ln_pdag *dag; if((dag = calloc(1, sizeof(struct ln_pdag))) == NULL) goto done; dag->refcnt = 1; dag->ctx = ctx; ctx->nNodes++; done: return dag; } /* note: we must NOT free the parser itself, because * it is stored inside a parser table (so no single * alloc for the parser!). */ static void pdagDeletePrs(ln_ctx ctx, ln_parser_t *const __restrict__ prs) { // TODO: be careful here: once we move to real DAG from tree, we // cannot simply delete the next node! (refcount? something else?) if(prs->node != NULL) ln_pdagDelete(prs->node); free((void*)prs->name); free((void*)prs->conf); if(prs->parser_data != NULL) parser_lookup_table[prs->prsid].destruct(ctx, prs->parser_data); } void ln_pdagDelete(struct ln_pdag *const __restrict__ pdag) { if(pdag == NULL) goto done; LN_DBGPRINTF(pdag->ctx, "delete %p[%d]: %s", pdag, pdag->refcnt, pdag->rb_id); --pdag->refcnt; if(pdag->refcnt > 0) goto done; if(pdag->tags != NULL) json_object_put(pdag->tags); for(int i = 0 ; i < pdag->nparsers ; ++i) { pdagDeletePrs(pdag->ctx, pdag->parsers+i); } free(pdag->parsers); free((void*)pdag->rb_id); free((void*)pdag->rb_file); free(pdag); done: return; } /** * pdag optimizer step: literal path compaction * * We compress as much as possible and evaluate the path down to * the first non-compressable element. Note that we must NOT * compact those literals that are either terminal nodes OR * contain names so that the literal is to be parsed out. */ static inline int optLitPathCompact(ln_ctx ctx, ln_parser_t *prs) { int r = 0; while(prs != NULL) { /* note the NOT prefix in the condition below! */ if(!( prs->prsid == PRS_LITERAL && prs->name == NULL && prs->node->flags.isTerminal == 0 && prs->node->refcnt == 1 && prs->node->nparsers == 1 /* we need to do some checks on the child as well */ && prs->node->parsers[0].prsid == PRS_LITERAL && prs->node->parsers[0].name == NULL && prs->node->parsers[0].node->refcnt == 1) ) goto done; /* ok, we have two compactable literals in a row, let's compact the nodes */ ln_parser_t *child_prs = prs->node->parsers; LN_DBGPRINTF(ctx, "opt path compact: add %p to %p", child_prs, prs); CHKR(ln_combineData_Literal(prs->parser_data, child_prs->parser_data)); ln_pdag *const node_del = prs->node; prs->node = child_prs->node; child_prs->node = NULL; /* remove, else this would be destructed! */ ln_pdagDelete(node_del); } done: return r; } static int qsort_parserCmp(const void *v1, const void *v2) { const ln_parser_t *const p1 = (const ln_parser_t *const) v1; const ln_parser_t *const p2 = (const ln_parser_t *const) v2; return p1->prio - p2->prio; } static int ln_pdagComponentOptimize(ln_ctx ctx, struct ln_pdag *const dag) { int r = 0; for(int i = 0 ; i < dag->nparsers ; ++i) { /* TODO: remove when confident enough */ ln_parser_t *prs = dag->parsers+i; LN_DBGPRINTF(ctx, "pre sort, parser %d:%s[%d]", i, prs->name, prs->prio); } /* first sort parsers in priority order */ if(dag->nparsers > 1) { qsort(dag->parsers, dag->nparsers, sizeof(ln_parser_t), qsort_parserCmp); } for(int i = 0 ; i < dag->nparsers ; ++i) { /* TODO: remove when confident enough */ ln_parser_t *prs = dag->parsers+i; LN_DBGPRINTF(ctx, "post sort, parser %d:%s[%d]", i, prs->name, prs->prio); } /* now on to rest of processing */ for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *prs = dag->parsers+i; LN_DBGPRINTF(dag->ctx, "optimizing %p: field %d type '%s', name '%s': '%s':", prs->node, i, parserName(prs->prsid), prs->name, (prs->prsid == PRS_LITERAL) ? ln_DataForDisplayLiteral(dag->ctx, prs->parser_data) : "UNKNOWN"); optLitPathCompact(ctx, prs); ln_pdagComponentOptimize(ctx, prs->node); } return r; } static void deleteComponentID(struct ln_pdag *const __restrict__ dag) { free((void*)dag->rb_id); dag->rb_id = NULL; for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *prs = dag->parsers+i; deleteComponentID(prs->node); } } /* fixes rb_ids for this node as well as it predecessors. * This is required if the ALTERNATIVE parser type is used, * which will create component IDs for each of it's invocations. * As such, we do not only fix the string, but know that all * children also need fixing. We do this be simply deleting * all of their rb_ids, as we know they will be visited again. * Note: if we introduce the same situation by new functionality, * we may need to review this code here as well. Also note * that the component ID will not be 100% correct after our fix, * because that ID could actually be created by two sets of rules. * But this is the best we can do. */ static void fixComponentID(struct ln_pdag *const __restrict__ dag, const char *const new) { char *updated; const char *const curr = dag->rb_id; int i; int len = (int) strlen(curr); for(i = 0 ; i < len ; ++i){ if(curr[i] != new [i]) break; } if(i >= 1 && curr[i-1] == '%') --i; if(asprintf(&updated, "%.*s[%s|%s]", i, curr, curr+i, new+i) == -1) goto done; deleteComponentID(dag); dag->rb_id = updated; done: return; } /** * Assign human-readable identifiers (names) to each node. These are * later used in stats, debug output and wherever else this may make * sense. */ static void ln_pdagComponentSetIDs(ln_ctx ctx, struct ln_pdag *const dag, const char *prefix) { char *id = NULL; if(prefix == NULL) goto done; if(dag->rb_id == NULL) { dag->rb_id = strdup(prefix); } else { LN_DBGPRINTF(ctx, "rb_id already exists - fixing as good as " "possible. This happens with ALTERNATIVE parser. " "old: '%s', new: '%s'", dag->rb_id, prefix); fixComponentID(dag, prefix); LN_DBGPRINTF(ctx, "\"fixed\" rb_id: %s", dag->rb_id); prefix = dag->rb_id; } /* now on to rest of processing */ for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *prs = dag->parsers+i; if(prs->prsid == PRS_LITERAL) { if(prs->name == NULL) { if(asprintf(&id, "%s%s", prefix, ln_DataForDisplayLiteral(dag->ctx, prs->parser_data)) == -1) goto done; } else { if(asprintf(&id, "%s%%%s:%s:%s%%", prefix, prs->name, parserName(prs->prsid), ln_DataForDisplayLiteral(dag->ctx, prs->parser_data)) == -1) goto done; } } else { if(asprintf(&id, "%s%%%s:%s%%", prefix, prs->name ? prs->name : "-", parserName(prs->prsid)) == -1) goto done; } ln_pdagComponentSetIDs(ctx, prs->node, id); free(id); } done: return; } /** * Optimize the pdag. * This includes all components. */ int ln_pdagOptimize(ln_ctx ctx) { int r = 0; for(int i = 0 ; i < ctx->nTypes ; ++i) { LN_DBGPRINTF(ctx, "optimizing component %s\n", ctx->type_pdags[i].name); ln_pdagComponentOptimize(ctx, ctx->type_pdags[i].pdag); ln_pdagComponentSetIDs(ctx, ctx->type_pdags[i].pdag, ""); } LN_DBGPRINTF(ctx, "optimizing main pdag component"); ln_pdagComponentOptimize(ctx, ctx->pdag); LN_DBGPRINTF(ctx, "finished optimizing main pdag component"); ln_pdagComponentSetIDs(ctx, ctx->pdag, ""); LN_DBGPRINTF(ctx, "---AFTER OPTIMIZATION------------------"); ln_displayPDAG(ctx); LN_DBGPRINTF(ctx, "======================================="); return r; } #define LN_INTERN_PDAG_STATS_NPARSERS 100 /* data structure for pdag statistics */ struct pdag_stats { int nodes; int term_nodes; int parsers; int max_nparsers; int nparsers_cnt[LN_INTERN_PDAG_STATS_NPARSERS]; int nparsers_100plus; int *prs_cnt; }; /** * Recursive step of statistics gatherer. */ static int ln_pdagStatsRec(ln_ctx ctx, struct ln_pdag *const dag, struct pdag_stats *const stats) { if(dag->flags.visited) return 0; dag->flags.visited = 1; stats->nodes++; if(dag->flags.isTerminal) stats->term_nodes++; if(dag->nparsers > stats->max_nparsers) stats->max_nparsers = dag->nparsers; if(dag->nparsers >= LN_INTERN_PDAG_STATS_NPARSERS) stats->nparsers_100plus++; else stats->nparsers_cnt[dag->nparsers]++; stats->parsers += dag->nparsers; int max_path = 0; for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *prs = dag->parsers+i; if(prs->prsid != PRS_CUSTOM_TYPE) stats->prs_cnt[prs->prsid]++; const int path_len = ln_pdagStatsRec(ctx, prs->node, stats); if(path_len > max_path) max_path = path_len; } return max_path + 1; } static void ln_pdagStatsExtended(ln_ctx ctx, struct ln_pdag *const dag, FILE *const fp, int level) { char indent[2048]; if(level > 1023) level = 1023; memset(indent, ' ', level * 2); indent[level * 2] = '\0'; if(dag->stats.called > 0) { fprintf(fp, "%u, %u, %s\n", dag->stats.called, dag->stats.backtracked, dag->rb_id); } for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *const prs = dag->parsers+i; if(prs->node->stats.called > 0) { ln_pdagStatsExtended(ctx, prs->node, fp, level+1); } } } /** * Gather pdag statistics for a *specific* pdag. * * Data is sent to given file ptr. */ static void ln_pdagStats(ln_ctx ctx, struct ln_pdag *const dag, FILE *const fp, const int extendedStats) { struct pdag_stats *const stats = calloc(1, sizeof(struct pdag_stats)); stats->prs_cnt = calloc(NPARSERS, sizeof(int)); //ln_pdagClearVisited(ctx); const int longest_path = ln_pdagStatsRec(ctx, dag, stats); fprintf(fp, "nodes.............: %4d\n", stats->nodes); fprintf(fp, "terminal nodes....: %4d\n", stats->term_nodes); fprintf(fp, "parsers entries...: %4d\n", stats->parsers); fprintf(fp, "longest path......: %4d\n", longest_path); fprintf(fp, "Parser Type Counts:\n"); for(prsid_t i = 0 ; i < NPARSERS ; ++i) { if(stats->prs_cnt[i] != 0) fprintf(fp, "\t%20s: %d\n", parserName(i), stats->prs_cnt[i]); } fprintf(fp, "Parsers per Node:\n"); fprintf(fp, "\tmax:\t%4d\n", stats->max_nparsers); for(int i = 0 ; i < 100 ; ++i) { if(stats->nparsers_cnt[i] != 0) fprintf(fp, "\t%d:\t%4d\n", i, stats->nparsers_cnt[i]); } free(stats->prs_cnt); free(stats); if(extendedStats) { fprintf(fp, "Usage Statistics:\n" "-----------------\n"); fprintf(fp, "called, backtracked, rule\n"); ln_pdagComponentClearVisited(dag); ln_pdagStatsExtended(ctx, dag, fp, 0); } } /** * Gather and output pdag statistics for the full pdag (ctx) * including all disconnected components (type defs). * * Data is sent to given file ptr. */ void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int extendedStats) { if(ctx->ptree != NULL) { /* we need to handle the old cruft */ ln_fullPTreeStats(ctx, fp, extendedStats); return; } fprintf(fp, "User-Defined Types\n" "==================\n"); fprintf(fp, "number types: %d\n", ctx->nTypes); for(int i = 0 ; i < ctx->nTypes ; ++i) fprintf(fp, "type: %s\n", ctx->type_pdags[i].name); for(int i = 0 ; i < ctx->nTypes ; ++i) { fprintf(fp, "\n" "type PDAG: %s\n" "----------\n", ctx->type_pdags[i].name); ln_pdagStats(ctx, ctx->type_pdags[i].pdag, fp, extendedStats); } fprintf(fp, "\n" "Main PDAG\n" "=========\n"); ln_pdagStats(ctx, ctx->pdag, fp, extendedStats); #ifdef ADVANCED_STATS const uint64_t parsers_failed = advstats_parsers_called - advstats_parsers_success; fprintf(fp, "\n" "Advanced Runtime Stats\n" "======================\n"); fprintf(fp, "These are actual number from analyzing the control flow " "at runtime.\n"); fprintf(fp, "Note that literal matching is also done via parsers. As such, \n" "it is expected that fail rates increase with the size of the \n" "rule base.\n"); fprintf(fp, "\n"); fprintf(fp, "Parser Calls:\n"); fprintf(fp, "total....: %10" PRIu64 "\n", advstats_parsers_called); fprintf(fp, "succesful: %10" PRIu64 "\n", advstats_parsers_success); fprintf(fp, "failed...: %10" PRIu64 " [%d%%]\n", parsers_failed, (int) ((parsers_failed * 100) / advstats_parsers_called) ); fprintf(fp, "\nIndividual Parser Calls " "(never called parsers are not shown):\n"); for( size_t i = 0 ; i < sizeof(parser_lookup_table) / sizeof(struct ln_parser_info) ; ++i) { if(parser_lookup_table[i].called > 0) { const uint64_t failed = parser_lookup_table[i].called - parser_lookup_table[i].success; fprintf(fp, "%20s: %10" PRIu64 " [%5.2f%%] " "success: %10" PRIu64 " [%5.1f%%] " "fail: %10" PRIu64 " [%5.1f%%]" "\n", parser_lookup_table[i].name, parser_lookup_table[i].called, (float)(parser_lookup_table[i].called * 100) / advstats_parsers_called, parser_lookup_table[i].success, (float)(parser_lookup_table[i].success * 100) / parser_lookup_table[i].called, failed, (float)(failed * 100) / parser_lookup_table[i].called ); } } uint64_t total_len; uint64_t total_cnt; fprintf(fp, "\n"); fprintf(fp, "\n" "Path Length Statistics\n" "----------------------\n" "The regular path length is the number of nodes being visited,\n" "where each node potentially evaluates several parsers. The\n" "parser call statistic is the number of parsers called along\n" "the path. That number is higher, as multiple parsers may be\n" "called at each node. The number of literal parser calls is\n" "given explicitly, as they use almost no time to process.\n" "\n" ); total_len = 0; total_cnt = 0; fprintf(fp, "Path Length\n"); for(int i = 0 ; i < ADVSTATS_MAX_ENTITIES ; ++i) { if(advstats_pathlens[i] > 0 ) { fprintf(fp, "%3d: %d\n", i, advstats_pathlens[i]); total_len += i * advstats_pathlens[i]; total_cnt += advstats_pathlens[i]; } } fprintf(fp, "avg: %f\n", (double) total_len / (double) total_cnt); fprintf(fp, "max: %d\n", advstats_max_pathlen); fprintf(fp, "\n"); total_len = 0; total_cnt = 0; fprintf(fp, "Nbr Backtracked\n"); for(int i = 0 ; i < ADVSTATS_MAX_ENTITIES ; ++i) { if(advstats_backtracks[i] > 0 ) { fprintf(fp, "%3d: %d\n", i, advstats_backtracks[i]); total_len += i * advstats_backtracks[i]; total_cnt += advstats_backtracks[i]; } } fprintf(fp, "avg: %f\n", (double) total_len / (double) total_cnt); fprintf(fp, "max: %d\n", advstats_max_backtracked); fprintf(fp, "\n"); /* we calc some stats while we output */ total_len = 0; total_cnt = 0; fprintf(fp, "Parser Calls\n"); for(int i = 0 ; i < ADVSTATS_MAX_ENTITIES ; ++i) { if(advstats_parser_calls[i] > 0 ) { fprintf(fp, "%3d: %d\n", i, advstats_parser_calls[i]); total_len += i * advstats_parser_calls[i]; total_cnt += advstats_parser_calls[i]; } } fprintf(fp, "avg: %f\n", (double) total_len / (double) total_cnt); fprintf(fp, "max: %d\n", advstats_max_parser_calls); fprintf(fp, "\n"); total_len = 0; total_cnt = 0; fprintf(fp, "LITERAL Parser Calls\n"); for(int i = 0 ; i < ADVSTATS_MAX_ENTITIES ; ++i) { if(advstats_lit_parser_calls[i] > 0 ) { fprintf(fp, "%3d: %d\n", i, advstats_lit_parser_calls[i]); total_len += i * advstats_lit_parser_calls[i]; total_cnt += advstats_lit_parser_calls[i]; } } fprintf(fp, "avg: %f\n", (double) total_len / (double) total_cnt); fprintf(fp, "max: %d\n", advstats_max_lit_parser_calls); fprintf(fp, "\n"); #endif } /** * Check if the provided dag is a leaf. This means that it * does not contain any subdags. * @return 1 if it is a leaf, 0 otherwise */ static inline int isLeaf(struct ln_pdag *dag) { return dag->nparsers == 0 ? 1 : 0; } /** * Add a parser instance to the pdag at the current position. * * @param[in] ctx * @param[in] prscnf json parser config *object* (no array!) * @param[in] pdag current pdag position (to which parser is to be added) * @param[in/out] nextnode contains point to the next node, either * an existing one or one newly created. * * The nextnode parameter permits to use this function to create * multiple parsers alternative parsers with a single run. To do so, * set nextnode=NULL on first call. On successive calls, keep the * value. If a value is present, we will not accept non-identical * parsers which point to different nodes - this will result in an * error. * * IMPORTANT: the caller is responsible to update its pdag pointer * to the nextnode value when he is done adding parsers. * * If a parser of the same type with identical data already exists, * it is "resued", which means the function is effectively used to * walk the path. This is used during parser construction to * navigate to new parts of the pdag. */ static int ln_pdagAddParserInstance(ln_ctx ctx, json_object *const __restrict__ prscnf, struct ln_pdag *const __restrict__ pdag, struct ln_pdag **nextnode) { int r; ln_parser_t *newtab; LN_DBGPRINTF(ctx, "ln_pdagAddParserInstance: %s, nextnode %p", json_object_to_json_string(prscnf), *nextnode); ln_parser_t *const parser = ln_newParser(ctx, prscnf); if(parser == NULL) { r = LN_BADCONFIG; goto done; } LN_DBGPRINTF(ctx, "pdag: %p, parser %p", pdag, parser); /* check if we already have this parser, if so, merge */ int i; for(i = 0 ; i < pdag->nparsers ; ++i) { LN_DBGPRINTF(ctx, "parser comparison:\n%s\n%s", pdag->parsers[i].conf, parser->conf); if( pdag->parsers[i].prsid == parser->prsid && !strcmp(pdag->parsers[i].conf, parser->conf)) { // FIXME: the current ->conf object is depending on // the order of json elements. We should do a JSON // comparison (a bit more complex). For now, it // works like we do it now. // FIXME: if nextnode is set, check we can actually combine, // else err out *nextnode = pdag->parsers[i].node; r = 0; LN_DBGPRINTF(ctx, "merging with pdag %p", pdag); pdagDeletePrs(ctx, parser); /* no need for data items */ goto done; } } /* if we reach this point, we have a new parser type */ if(*nextnode == NULL) { CHKN(*nextnode = ln_newPDAG(ctx)); /* we need a new node */ } else { (*nextnode)->refcnt++; } parser->node = *nextnode; newtab = realloc(pdag->parsers, (pdag->nparsers+1) * sizeof(ln_parser_t)); CHKN(newtab); pdag->parsers = newtab; memcpy(pdag->parsers+pdag->nparsers, parser, sizeof(ln_parser_t)); pdag->nparsers++; r = 0; done: free(parser); return r; } static int ln_pdagAddParserInternal(ln_ctx ctx, struct ln_pdag **pdag, const int mode, json_object *const prscnf, struct ln_pdag **nextnode); /** * add parsers to current pdag. This is used * to add parsers stored in an array. The mode specifies * how parsers shall be added. */ #define PRS_ADD_MODE_SEQ 0 #define PRS_ADD_MODE_ALTERNATIVE 1 static int ln_pdagAddParsers(ln_ctx ctx, json_object *const prscnf, const int mode, struct ln_pdag **pdag, struct ln_pdag **p_nextnode) { int r = LN_BADCONFIG; struct ln_pdag *dag = *pdag; struct ln_pdag *nextnode = *p_nextnode; const int lenarr = json_object_array_length(prscnf); for(int i = 0 ; i < lenarr ; ++i) { struct json_object *const curr_prscnf = json_object_array_get_idx(prscnf, i); LN_DBGPRINTF(ctx, "parser %d: %s", i, json_object_to_json_string(curr_prscnf)); if(json_object_get_type(curr_prscnf) == json_type_array) { struct ln_pdag *local_dag = dag; CHKR(ln_pdagAddParserInternal(ctx, &local_dag, mode, curr_prscnf, &nextnode)); if(mode == PRS_ADD_MODE_SEQ) { dag = local_dag; } } else { CHKR(ln_pdagAddParserInstance(ctx, curr_prscnf, dag, &nextnode)); } if(mode == PRS_ADD_MODE_SEQ) { dag = nextnode; *p_nextnode = nextnode; nextnode = NULL; } } if(mode != PRS_ADD_MODE_SEQ) dag = nextnode; *pdag = dag; r = 0; done: return r; } /* add a json parser config object. Note that this object may contain * multiple parser instances. Additionally, moves the pdag object to * the next node, which is either newly created or previously existed. */ static int ln_pdagAddParserInternal(ln_ctx ctx, struct ln_pdag **pdag, const int mode, json_object *const prscnf, struct ln_pdag **nextnode) { int r = LN_BADCONFIG; struct ln_pdag *dag = *pdag; LN_DBGPRINTF(ctx, "ln_pdagAddParserInternal: %s", json_object_to_json_string(prscnf)); if(json_object_get_type(prscnf) == json_type_object) { /* check for special types we need to handle here */ struct json_object *json; json_object_object_get_ex(prscnf, "type", &json); const char *const ftype = json_object_get_string(json); if(!strcmp(ftype, "alternative")) { json_object_object_get_ex(prscnf, "parser", &json); if(json_object_get_type(json) != json_type_array) { ln_errprintf(ctx, 0, "alternative type needs array of parsers. " "Object: '%s', type is %s", json_object_to_json_string(prscnf), json_type_to_name(json_object_get_type(json))); goto done; } CHKR(ln_pdagAddParsers(ctx, json, PRS_ADD_MODE_ALTERNATIVE, &dag, nextnode)); } else { CHKR(ln_pdagAddParserInstance(ctx, prscnf, dag, nextnode)); if(mode == PRS_ADD_MODE_SEQ) dag = *nextnode; } } else if(json_object_get_type(prscnf) == json_type_array) { CHKR(ln_pdagAddParsers(ctx, prscnf, PRS_ADD_MODE_SEQ, &dag, nextnode)); } else { ln_errprintf(ctx, 0, "bug: prscnf object of wrong type. Object: '%s'", json_object_to_json_string(prscnf)); goto done; } *pdag = dag; done: return r; } /* add a json parser config object. Note that this object may contain * multiple parser instances. Additionally, moves the pdag object to * the next node, which is either newly created or previously existed. */ int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *const prscnf) { struct ln_pdag *nextnode = NULL; int r = ln_pdagAddParserInternal(ctx, pdag, PRS_ADD_MODE_SEQ, prscnf, &nextnode); json_object_put(prscnf); return r; } void ln_displayPDAGComponent(struct ln_pdag *dag, int level) { char indent[2048]; if(level > 1023) level = 1023; memset(indent, ' ', level * 2); indent[level * 2] = '\0'; LN_DBGPRINTF(dag->ctx, "%ssubDAG%s %p (children: %d parsers, ref %d) [called %u, backtracked %u]", indent, dag->flags.isTerminal ? " [TERM]" : "", dag, dag->nparsers, dag->refcnt, dag->stats.called, dag->stats.backtracked); for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *const prs = dag->parsers+i; LN_DBGPRINTF(dag->ctx, "%sfield type '%s', name '%s': '%s': called %u", indent, parserName(prs->prsid), dag->parsers[i].name, (prs->prsid == PRS_LITERAL) ? ln_DataForDisplayLiteral(dag->ctx, prs->parser_data) : "UNKNOWN", dag->parsers[i].node->stats.called); } for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *const prs = dag->parsers+i; LN_DBGPRINTF(dag->ctx, "%sfield type '%s', name '%s': '%s':", indent, parserName(prs->prsid), dag->parsers[i].name, (prs->prsid == PRS_LITERAL) ? ln_DataForDisplayLiteral(dag->ctx, prs->parser_data) : "UNKNOWN"); if(prs->prsid == PRS_REPEAT) { struct data_Repeat *const data = (struct data_Repeat*) prs->parser_data; LN_DBGPRINTF(dag->ctx, "%sparser:", indent); ln_displayPDAGComponent(data->parser, level + 1); LN_DBGPRINTF(dag->ctx, "%swhile:", indent); ln_displayPDAGComponent(data->while_cond, level + 1); LN_DBGPRINTF(dag->ctx, "%send repeat def", indent); } ln_displayPDAGComponent(dag->parsers[i].node, level + 1); } } void ln_displayPDAGComponentAlternative(struct ln_pdag *dag, int level) { char indent[2048]; if(level > 1023) level = 1023; memset(indent, ' ', level * 2); indent[level * 2] = '\0'; LN_DBGPRINTF(dag->ctx, "%s%p[ref %d]: %s", indent, dag, dag->refcnt, dag->rb_id); for(int i = 0 ; i < dag->nparsers ; ++i) { ln_displayPDAGComponentAlternative(dag->parsers[i].node, level + 1); } } /* developer debug aid, to be used for example as follows: * LN_DBGPRINTF(dag->ctx, "---------------------------------------"); * ln_displayPDAG(dag); * LN_DBGPRINTF(dag->ctx, "======================================="); */ void ln_displayPDAG(ln_ctx ctx) { ln_pdagClearVisited(ctx); for(int i = 0 ; i < ctx->nTypes ; ++i) { LN_DBGPRINTF(ctx, "COMPONENT: %s", ctx->type_pdags[i].name); ln_displayPDAGComponent(ctx->type_pdags[i].pdag, 0); } LN_DBGPRINTF(ctx, "MAIN COMPONENT:"); ln_displayPDAGComponent(ctx->pdag, 0); LN_DBGPRINTF(ctx, "MAIN COMPONENT (alternative):"); ln_displayPDAGComponentAlternative(ctx->pdag, 0); } /* the following is a quick hack, which should be moved to the * string class. */ static inline void dotAddPtr(es_str_t **str, void *p) { char buf[64]; int i; i = snprintf(buf, sizeof(buf), "l%p", p); es_addBuf(str, buf, i); } struct data_Literal { const char *lit; }; // TODO remove when this hack is no longer needed /** * recursive handler for DOT graph generator. */ static void ln_genDotPDAGGraphRec(struct ln_pdag *dag, es_str_t **str) { char s_refcnt[16]; LN_DBGPRINTF(dag->ctx, "in dot: %p, visited %d", dag, (int) dag->flags.visited); if(dag->flags.visited) return; /* already processed this subpart */ dag->flags.visited = 1; dotAddPtr(str, dag); snprintf(s_refcnt, sizeof(s_refcnt), "%d", dag->refcnt); s_refcnt[sizeof(s_refcnt)-1] = '\0'; es_addBufConstcstr(str, " [ label=\""); es_addBuf(str, s_refcnt, strlen(s_refcnt)); es_addBufConstcstr(str, "\""); if(isLeaf(dag)) { es_addBufConstcstr(str, " style=\"bold\""); } es_addBufConstcstr(str, "]\n"); /* display field subdags */ for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *const prs = dag->parsers+i; dotAddPtr(str, dag); es_addBufConstcstr(str, " -> "); dotAddPtr(str, prs->node); es_addBufConstcstr(str, " [label=\""); es_addBuf(str, parserName(prs->prsid), strlen(parserName(prs->prsid))); es_addBufConstcstr(str, ":"); //es_addStr(str, node->name); if(prs->prsid == PRS_LITERAL) { for(const char *p = ((struct data_Literal*)prs->parser_data)->lit ; *p ; ++p) { // TODO: handle! if(*p == '\\') //es_addChar(str, '\\'); if(*p != '\\' && *p != '"') es_addChar(str, *p); } } es_addBufConstcstr(str, "\""); es_addBufConstcstr(str, " style=\"dotted\"]\n"); ln_genDotPDAGGraphRec(prs->node, str); } } void ln_genDotPDAGGraph(struct ln_pdag *dag, es_str_t **str) { ln_pdagClearVisited(dag->ctx); es_addBufConstcstr(str, "digraph pdag {\n"); ln_genDotPDAGGraphRec(dag, str); es_addBufConstcstr(str, "}\n"); } /** * recursive handler for statistics DOT graph generator. */ static void ln_genStatsDotPDAGGraphRec(struct ln_pdag *dag, FILE *const __restrict__ fp) { if(dag->flags.visited) return; /* already processed this subpart */ dag->flags.visited = 1; fprintf(fp, "l%p [ label=\"%u:%u\"", dag, dag->stats.called, dag->stats.backtracked); if(isLeaf(dag)) { fprintf(fp, " style=\"bold\""); } fprintf(fp, "]\n"); /* display field subdags */ for(int i = 0 ; i < dag->nparsers ; ++i) { ln_parser_t *const prs = dag->parsers+i; if(prs->node->stats.called == 0) continue; fprintf(fp, "l%p -> l%p [label=\"", dag, prs->node); if(prs->prsid == PRS_LITERAL) { for(const char *p = ((struct data_Literal*)prs->parser_data)->lit ; *p ; ++p) { if(*p != '\\' && *p != '"') fputc(*p, fp); } } else { fprintf(fp, "%s", parserName(prs->prsid)); } fprintf(fp, "\" style=\"dotted\"]\n"); ln_genStatsDotPDAGGraphRec(prs->node, fp); } } static void ln_genStatsDotPDAGGraph(struct ln_pdag *dag, FILE *const fp) { ln_pdagClearVisited(dag->ctx); fprintf(fp, "digraph pdag {\n"); ln_genStatsDotPDAGGraphRec(dag, fp); fprintf(fp, "}\n"); } void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp) { ln_genStatsDotPDAGGraph(ctx->pdag, fp); } static inline int addOriginalMsg(const char *str, const size_t strLen, struct json_object *const json) { int r = 1; struct json_object *value; value = json_object_new_string_len(str, strLen); if (value == NULL) { goto done; } json_object_object_add(json, ORIGINAL_MSG_KEY, value); r = 0; done: return r; } static char * strrev(char *const __restrict__ str) { char ch; size_t i = strlen(str)-1,j=0; while(i>j) { ch = str[i]; str[i]= str[j]; str[j] = ch; i--; j++; } return str; } /* note: "originalmsg" is NOT added as metadata in order to keep * backwards compatible. */ static inline void addRuleMetadata(npb_t *const __restrict__ npb, struct json_object *const json, struct ln_pdag *const __restrict__ endNode) { ln_ctx ctx = npb->ctx; struct json_object *meta = NULL; struct json_object *meta_rule = NULL; struct json_object *value; if(ctx->opts & LN_CTXOPT_ADD_RULE) { /* matching rule mockup */ if(meta_rule == NULL) meta_rule = json_object_new_object(); char *cstr = strrev(es_str2cstr(npb->rule, NULL)); json_object_object_add(meta_rule, RULE_MOCKUP_KEY, json_object_new_string(cstr)); free(cstr); } if(ctx->opts & LN_CTXOPT_ADD_RULE_LOCATION) { if(meta_rule == NULL) meta_rule = json_object_new_object(); struct json_object *const location = json_object_new_object(); value = json_object_new_string(endNode->rb_file); json_object_object_add(location, "file", value); value = json_object_new_int((int)endNode->rb_lineno); json_object_object_add(location, "line", value); json_object_object_add(meta_rule, RULE_LOCATION_KEY, location); } if(meta_rule != NULL) { if(meta == NULL) meta = json_object_new_object(); json_object_object_add(meta, META_RULE_KEY, meta_rule); } #ifdef ADVANCED_STATS /* complete execution path */ if(ctx->opts & LN_CTXOPT_ADD_EXEC_PATH) { if(meta == NULL) meta = json_object_new_object(); char hdr[128]; const size_t lenhdr = snprintf(hdr, sizeof(hdr), "[PATHLEN:%d, PARSER CALLS gen:%d, literal:%d]", npb->astats.pathlen, npb->astats.parser_calls, npb->astats.lit_parser_calls); es_addBuf(&npb->astats.exec_path, hdr, lenhdr); char * cstr = es_str2cstr(npb->astats.exec_path, NULL); value = json_object_new_string(cstr); if (value != NULL) { json_object_object_add(meta, EXEC_PATH_KEY, value); } free(cstr); } #endif if(meta != NULL) json_object_object_add(json, META_KEY, meta); } /** * add unparsed string to event. */ static inline int addUnparsedField(const char *str, const size_t strLen, const size_t offs, struct json_object *json) { int r = 1; struct json_object *value; CHKR(addOriginalMsg(str, strLen, json)); value = json_object_new_string(str + offs); if (value == NULL) { goto done; } json_object_object_add(json, UNPARSED_DATA_KEY, value); r = 0; done: return r; } static int checkDuplicate(const ln_parser_t *const prs, struct json_object *json, struct json_object *value, const char *check) { int r = 0; if (NULL != value && NULL != json) { struct json_object_iterator it = json_object_iter_begin(value); struct json_object_iterator itEnd = json_object_iter_end(value); while (!json_object_iter_equal(&it, &itEnd)) { const char *key = json_object_iter_peek_name(&it); if(key[0] == '.' && key[1] == '.' && key[2] == '\0') { key = prs->name; } if (json_object_object_get_ex(json, key, NULL)) { r = 1; break; } json_object_iter_next(&it); } } if (r == 0 && NULL != json && NULL != check && json_object_object_get_ex(json, check, NULL)) { r = 1; } return r; } /* Do some fixup to the json that we cannot do on a lower layer */ static int fixJSON(struct ln_pdag *dag, struct json_object **value, struct json_object *json, const ln_parser_t *const prs, const int failOnDuplicate) { int r = LN_WRONGPARSER; if(json == NULL) { if(*value != NULL) json_object_put(*value); *value = NULL; r = 0; goto done; } if(prs->name == NULL) { if (*value != NULL) { /* Free the unneeded value */ json_object_put(*value); } *value = NULL; } else if(prs->name[0] == '.' && prs->name[1] == '\0') { if(json_object_get_type(*value) == json_type_object) { struct json_object_iterator it = json_object_iter_begin(*value); struct json_object_iterator itEnd = json_object_iter_end(*value); while (!json_object_iter_equal(&it, &itEnd)) { const char *const key = json_object_iter_peek_name(&it); struct json_object *const val = json_object_iter_peek_value(&it); if(failOnDuplicate && json_object_object_get_ex(json, key, NULL)) { LN_DBGPRINTF(dag->ctx, "field name '%s' already exists with failOnDuplicate set", key); json_object_put(*value); *value = NULL; goto done; } json_object_get(val); json_object_object_add(json, key, val); json_object_iter_next(&it); } json_object_put(*value); } else { LN_DBGPRINTF(dag->ctx, "field name is '.', but json type is %s", json_type_to_name(json_object_get_type(*value))); json_object_object_add_ex(json, prs->name, *value, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } } else { int isDotDot = 0; struct json_object *valDotDot = NULL; if(json_object_get_type(*value) == json_type_object) { /* TODO: this needs to be sped up by just checking the first * member and ensuring there is only one member. This requires * extensions to libfastjson. */ int nSubobj = 0; struct json_object_iterator it = json_object_iter_begin(*value); struct json_object_iterator itEnd = json_object_iter_end(*value); while (!json_object_iter_equal(&it, &itEnd)) { ++nSubobj; const char *key = json_object_iter_peek_name(&it); if(key[0] == '.' && key[1] == '.' && key[2] == '\0') { isDotDot = 1; valDotDot = json_object_iter_peek_value(&it); } else { isDotDot = 0; } json_object_iter_next(&it); } if(nSubobj != 1) isDotDot = 0; } if(isDotDot) { LN_DBGPRINTF(dag->ctx, "subordinate field name is '..', combining"); if(failOnDuplicate && json_object_object_get_ex(json, prs->name, NULL)) { LN_DBGPRINTF(dag->ctx, "field name '%s' already exists with failOnDuplicate set", prs->name); json_object_put(*value); *value = NULL; goto done; } json_object_get(valDotDot); json_object_put(*value); json_object_object_add_ex(json, prs->name, valDotDot, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } else { if(failOnDuplicate && json_object_object_get_ex(json, prs->name, NULL)) { LN_DBGPRINTF(dag->ctx, "field name '%s' already exists with failOnDuplicate set", prs->name); json_object_put(*value); *value = NULL; goto done; } json_object_object_add_ex(json, prs->name, *value, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } } r = 0; done: *value = NULL; return r; } // TODO: streamline prototype when done with changes static int tryParser(npb_t *const __restrict__ npb, struct ln_pdag *dag, size_t *offs, size_t *const __restrict__ pParsed, struct json_object **value, const ln_parser_t *const prs, int failOnDuplicate, struct json_object *cur_json_object, const char *parser_name ) { int r = LN_WRONGPARSER; struct ln_pdag *endNode = NULL; size_t parsedTo = npb->parsedTo; struct ln_type_pdag *custType = NULL; # ifdef ADVANCED_STATS char hdr[16]; const size_t lenhdr = snprintf(hdr, sizeof(hdr), "%d:", npb->astats.recursion_level); es_addBuf(&npb->astats.exec_path, hdr, lenhdr); if(prs->prsid == PRS_LITERAL) { es_addChar(&npb->astats.exec_path, '\''); es_addBuf(&npb->astats.exec_path, ln_DataForDisplayLiteral(dag->ctx, prs->parser_data), strlen(ln_DataForDisplayLiteral(dag->ctx, prs->parser_data)) ); es_addChar(&npb->astats.exec_path, '\''); } else if(parser_lookup_table[prs->prsid].parser == ln_v2_parseCharTo) { es_addBuf(&npb->astats.exec_path, ln_DataForDisplayCharTo(dag->ctx, prs->parser_data), strlen(ln_DataForDisplayCharTo(dag->ctx, prs->parser_data)) ); } else { es_addBuf(&npb->astats.exec_path, parserName(prs->prsid), strlen(parserName(prs->prsid)) ); } es_addChar(&npb->astats.exec_path, ','); # endif if(prs->prsid == PRS_CUSTOM_TYPE) { if (prs->custType < 0 || prs->custType >= dag->ctx->nTypes) { LN_DBGPRINTF(dag->ctx, "tryParser: Invalid custom type index: %d (%d types)", prs->custType, dag->ctx->nTypes); goto done; } if(*value == NULL) *value = json_object_new_object(); custType = &dag->ctx->type_pdags[prs->custType]; LN_DBGPRINTF(dag->ctx, "calling custom parser '%s'", custType->name); r = ln_normalizeRec(npb, custType->pdag, *offs, 1, *value, &endNode, failOnDuplicate, cur_json_object, parser_name); LN_DBGPRINTF(dag->ctx, "called CUSTOM PARSER '%s', result %d, " "offs %zd, *pParsed %zd", custType->name, r, *offs, *pParsed); *pParsed = npb->parsedTo - *offs; if (r != 0) { json_object_put(*value); *value = NULL; } #ifdef ADVANCED_STATS es_addBuf(&npb->astats.exec_path, hdr, lenhdr); es_addBuf(&npb->astats.exec_path, "[R:USR],", 8); #endif } else { r = parser_lookup_table[prs->prsid].parser(npb, offs, prs->parser_data, parser_name, pParsed, (prs->name == NULL) ? NULL : value); } done: LN_DBGPRINTF(npb->ctx, "parser lookup returns %d, pParsed %zu", r, *pParsed); npb->parsedTo = parsedTo; #ifdef ADVANCED_STATS ++advstats_parsers_called; ++npb->astats.parser_calls; if(prs->prsid == PRS_LITERAL) ++npb->astats.lit_parser_calls; if(r == 0) ++advstats_parsers_success; if(prs->prsid != PRS_CUSTOM_TYPE) { ++parser_lookup_table[prs->prsid].called; if(r == 0) ++parser_lookup_table[prs->prsid].success; } #endif return r; } static void add_str_reversed(npb_t *const __restrict__ npb, const char *const __restrict__ str, const size_t len) { ssize_t i; for(i = len - 1 ; i >= 0 ; --i) { es_addChar(&npb->rule, str[i]); } } /* Add the current parser to the mockup rule. * Note: we add reversed strings, because we can call this * function effectively only when walking upwards the tree. * This means deepest entries come first. We solve this somewhat * elegantly by reversion strings, and then reversion the string * once more when we emit it, so that we get the right order. */ static inline void add_rule_to_mockup(npb_t *const __restrict__ npb, const ln_parser_t *const __restrict__ prs) { if(prs->prsid == PRS_LITERAL) { const char *const val = ln_DataForDisplayLiteral(npb->ctx, prs->parser_data); add_str_reversed(npb, val, strlen(val)); } else { /* note: name/value order must also be reversed! */ es_addChar(&npb->rule, '%'); add_str_reversed(npb, parserName(prs->prsid), strlen(parserName(prs->prsid)) ); es_addChar(&npb->rule, ':'); if(prs->name == NULL) { es_addChar(&npb->rule, '-'); } else { add_str_reversed(npb, prs->name, strlen(prs->name)); } es_addChar(&npb->rule, '%'); } } /** * Recursive step of the normalizer. It walks the parse dag and calls itself * recursively when this is appropriate. It also implements backtracking in * those (hopefully rare) cases where it is required. * * @param[in] dag current tree to process * @param[in] string string to be matched against (the to-be-normalized data) * @param[in] strLen length of the to-be-matched string * @param[in] offs start position in input data * @param[out] pPrasedTo ptr to position up to which the parsing succeed in max * @param[in/out] json ... that is being created during normalization * @param[out] endNode if a match was found, this is the matching node (undefined otherwise) * * @return regular liblognorm error code (0->OK, something else->error) * TODO: can we use parameter block to prevent pushing params to the stack? */ int ln_normalizeRec(npb_t *const __restrict__ npb, struct ln_pdag *dag, const size_t offs, const int bPartialMatch, struct json_object *json, struct ln_pdag **endNode, int failOnDuplicate, struct json_object *cur_json_object, const char *parser_name ) { int r = LN_WRONGPARSER; int localR; size_t i; size_t iprs; size_t parsedTo = npb->parsedTo; size_t parsed = 0; struct json_object *value = NULL; LN_DBGPRINTF(dag->ctx, "%zu: enter parser, dag node %p, json %p", offs, dag, json); ++dag->stats.called; #ifdef ADVANCED_STATS ++npb->astats.pathlen; ++npb->astats.recursion_level; #endif /* now try the parsers */ for(iprs = 0 ; iprs < dag->nparsers && r != 0; ++iprs) { const ln_parser_t *const prs = dag->parsers + iprs; if (failOnDuplicate && checkDuplicate(prs, cur_json_object, NULL, prs->name)) { LN_DBGPRINTF(dag->ctx, "parser field '%s' already exists with skip duplicate set, skipping", prs->name); continue; } if(dag->ctx->debug) { LN_DBGPRINTF(dag->ctx, "%zu/%d:trying '%s' parser for field '%s', " "data '%s'", offs, bPartialMatch, parserName(prs->prsid), prs->name, (prs->prsid == PRS_LITERAL) ? ln_DataForDisplayLiteral(dag->ctx, prs->parser_data) : "UNKNOWN"); } i = offs; localR = tryParser(npb, dag, &i, &parsed, &value, prs, failOnDuplicate, json, prs->name); if(localR == 0) { parsedTo = i + parsed; /* potential hit, need to verify */ LN_DBGPRINTF(dag->ctx, "%zu: potential hit, trying subtree %p", offs, prs->node); r = ln_normalizeRec(npb, prs->node, parsedTo, bPartialMatch, json, endNode, failOnDuplicate, cur_json_object, parser_name); LN_DBGPRINTF(dag->ctx, "%zu: subtree returns %d, parsedTo %zu", offs, r, parsedTo); if(r == 0) { LN_DBGPRINTF(dag->ctx, "%zu: parser matches at %zu", offs, i); CHKR(fixJSON(dag, &value, json, prs, failOnDuplicate)); value = NULL; if(npb->ctx->opts & LN_CTXOPT_ADD_RULE) { add_rule_to_mockup(npb, prs); } /* did we have a longer parser --> then update */ if(parsedTo > npb->parsedTo) npb->parsedTo = parsedTo; } else { ++dag->stats.backtracked; #ifdef ADVANCED_STATS ++npb->astats.backtracked; es_addBuf(&npb->astats.exec_path, "[B]", 3); #endif LN_DBGPRINTF(dag->ctx, "%zu nonmatch, backtracking required, parsed to=%zu", offs, parsedTo); } } if (value != NULL) { /* Free the value if it was created */ json_object_put(value); value = NULL; } /* did we have a longer parser --> then update */ if(parsedTo > npb->longestParsedTo) npb->longestParsedTo = parsedTo; LN_DBGPRINTF(dag->ctx, "parsedTo %zu, *pParsedTo %zu", parsedTo, npb->parsedTo); } LN_DBGPRINTF(dag->ctx, "offs %zu, strLen %zu, isTerm %d", offs, npb->strLen, dag->flags.isTerminal); if(dag->flags.isTerminal && (offs == npb->strLen || bPartialMatch)) { *endNode = dag; r = 0; goto done; } done: LN_DBGPRINTF(dag->ctx, "%zu returns %d, pParsedTo %zu, parsedTo %zu", offs, r, npb->parsedTo, parsedTo); # ifdef ADVANCED_STATS --npb->astats.recursion_level; # endif return r; } int ln_normalize_to_str(ln_ctx ctx, const char *str, const size_t strLen, char **json_str, size_t *json_len) { int r; #ifdef ENABLE_TURBO if(ln_turbo_is_available(ctx)) { r = ln_turbo_normalize_to_str(ctx, str, strLen, json_str, json_len); if(r == 0) return 0; ln_dbgprintf(ctx, "turbo normalize_to_str failed, fallback to walker"); } #endif /* Fallback: recursive walker -> json_object -> string */ struct json_object *json = NULL; r = ln_normalize(ctx, str, strLen, &json); if(r == 0 && json != NULL) { const char *rendered = json_object_to_json_string_ext(json, JSON_C_TO_STRING_PLAIN); *json_len = strlen(rendered); *json_str = strdup(rendered); json_object_put(json); } return r; } int ln_normalize(ln_ctx ctx, const char *str, const size_t strLen, struct json_object **json_p) { int r; struct ln_pdag *endNode = NULL; /* old cruft */ if(ctx->version == 1) { r = ln_v1_normalize(ctx, str, strLen, json_p); goto done; } /* end old cruft */ npb_t npb; memset(&npb, 0, sizeof(npb)); npb.ctx = ctx; npb.str = str; npb.strLen = strLen; if(ctx->opts & LN_CTXOPT_ADD_RULE) { npb.rule = es_newStr(1024); } # ifdef ADVANCED_STATS npb.astats.exec_path = es_newStr(1024); # endif if(*json_p == NULL) { CHKN(*json_p = json_object_new_object()); } r = ln_normalizeRec(&npb, ctx->pdag, 0, 0, *json_p, &endNode, 0, NULL, NULL); if(ctx->debug) { if(r == 0) { LN_DBGPRINTF(ctx, "final result for normalizer: parsedTo %zu, endNode %p, " "isTerminal %d, tagbucket %p", npb.parsedTo, endNode, endNode->flags.isTerminal, endNode->tags); } else { LN_DBGPRINTF(ctx, "final result for normalizer: parsedTo %zu, endNode %p", npb.parsedTo, endNode); } } LN_DBGPRINTF(ctx, "DONE, final return is %d", r); if(r == 0 && endNode->flags.isTerminal) { /* success, finalize event */ if(endNode->tags != NULL) { /* add tags to an event */ json_object_get(endNode->tags); json_object_object_add(*json_p, "event.tags", endNode->tags); CHKR(ln_annotate(ctx, *json_p, endNode->tags)); } if(ctx->opts & LN_CTXOPT_ADD_ORIGINALMSG) { /* originalmsg must be kept outside of metadata for * backward compatibility reasons. */ json_object_object_add(*json_p, ORIGINAL_MSG_KEY, json_object_new_string_len(str, strLen)); } addRuleMetadata(&npb, *json_p, endNode); r = 0; } else { addUnparsedField(str, strLen, npb.longestParsedTo, *json_p); } if(ctx->opts & LN_CTXOPT_ADD_RULE) { es_deleteStr(npb.rule); } #ifdef ADVANCED_STATS if(r != 0) es_addBuf(&npb.astats.exec_path, "[FAILED]", 8); else if(!endNode->flags.isTerminal) es_addBuf(&npb.astats.exec_path, "[FAILED:NON-TERMINAL]", 21); if(npb.astats.pathlen < ADVSTATS_MAX_ENTITIES) advstats_pathlens[npb.astats.pathlen]++; if(npb.astats.pathlen > advstats_max_pathlen) { advstats_max_pathlen = npb.astats.pathlen; } if(npb.astats.backtracked < ADVSTATS_MAX_ENTITIES) advstats_backtracks[npb.astats.backtracked]++; if(npb.astats.backtracked > advstats_max_backtracked) { advstats_max_backtracked = npb.astats.backtracked; } /* parser calls */ if(npb.astats.parser_calls < ADVSTATS_MAX_ENTITIES) advstats_parser_calls[npb.astats.parser_calls]++; if(npb.astats.parser_calls > advstats_max_parser_calls) { advstats_max_parser_calls = npb.astats.parser_calls; } if(npb.astats.lit_parser_calls < ADVSTATS_MAX_ENTITIES) advstats_lit_parser_calls[npb.astats.lit_parser_calls]++; if(npb.astats.lit_parser_calls > advstats_max_lit_parser_calls) { advstats_max_lit_parser_calls = npb.astats.lit_parser_calls; } es_deleteStr(npb.astats.exec_path); #endif done: return r; } liblognorm-2.1.0/src/pdag.h000066400000000000000000000205441520037563000155370ustar00rootroot00000000000000/** * @file pdag.h * @brief The parse DAG object. * @class ln_pdag pdag.h *//* * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_PDAG_H_INCLUDED #define LIBLOGNORM_PDAG_H_INCLUDED #include #include #include #define META_KEY "metadata" #define ORIGINAL_MSG_KEY "originalmsg" #define UNPARSED_DATA_KEY "unparsed-data" #define EXEC_PATH_KEY "exec-path" #define META_RULE_KEY "rule" #define RULE_MOCKUP_KEY "mockup" #define RULE_LOCATION_KEY "location" typedef struct ln_pdag ln_pdag; /**< the parse DAG object */ typedef struct ln_parser_s ln_parser_t; typedef struct npb npb_t; typedef uint8_t prsid_t; struct ln_type_pdag; /** * parser IDs. * * These identfy a parser. VERY IMPORTANT: they must start at zero * and continuously increment. They must exactly match the index * of the respective parser inside the parser lookup table. */ #define PRS_LITERAL 0 #define PRS_REPEAT 1 #if 0 #define PRS_DATE_RFC3164 1 #define PRS_DATE_RFC5424 2 #define PRS_NUMBER 3 #define PRS_FLOAT 4 #define PRS_HEXNUMBER 5 #define PRS_KERNEL_TIMESTAMP 6 #define PRS_WHITESPACE 7 #define PRS_IPV4 8 #define PRS_IPV6 9 #define PRS_WORD 10 #define PRS_ALPHA 11 #define PRS_REST 12 #define PRS_OP_QUOTED_STRING 13 #define PRS_QUOTED_STRING 14 #define PRS_DATE_ISO 15 #define PRS_TIME_24HR 16 #define PRS_TIME_12HR 17 #define PRS_DURATION 18 #define PRS_CISCO_INTERFACE_SPEC 19 #define PRS_NAME_VALUE_LIST 20 #define PRS_JSON 21 #define PRS_CEE_SYSLOG 22 #define PRS_MAC48 23 #define PRS_CEF 24 #define PRS_CHECKPOINT_LEA 25 #define PRS_v2_IPTABLES 26 #define PRS_STRING_TO 27 #define PRS_CHAR_TO 28 #define PRS_CHAR_SEP 29 #endif #define PRS_CUSTOM_TYPE 254 #define PRS_INVALID 255 /* NOTE: current max limit on parser ID is 255, because we use uint8_t * for the prsid_t type (which gains cache performance). If more parsers * come up, the type must be modified. */ /** * object describing a specific parser instance. */ struct ln_parser_s { prsid_t prsid; /**< parser ID (for lookup table) */ ln_pdag *node; /**< node to branch to if parser succeeded */ void *parser_data; /**< opaque data that the field-parser understands */ int custType; /**< index of custom type, if such is used */ int prio; /**< priority (combination of user- and parser-specific parts) */ const char *name; /**< field name */ const char *conf; /**< configuration as printable json for comparison reasons */ }; struct ln_parser_info { const char *name; /**< parser name as used in rule base */ int prio; /**< parser specific prio in range 0..255 */ int (*construct)(ln_ctx ctx, json_object *const json, void **); int (*parser)(npb_t *npb, size_t*, void *const, const char *, size_t*, struct json_object **); /**< parser to use */ void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */ #ifdef ADVANCED_STATS uint64_t called; uint64_t success; #endif }; /* parse DAG object */ struct ln_pdag { ln_ctx ctx; /**< our context */ // TODO: why do we need it? ln_parser_t *parsers; /* array of parsers to try */ prsid_t nparsers; /**< current table size (prsid_t slightly abused) */ struct { unsigned isTerminal:1; /**< designates this node a terminal sequence */ unsigned visited:1; /**< work var for recursive procedures */ } flags; struct json_object *tags; /**< tags to assign to events of this type */ int refcnt; /**< reference count for deleting tracking */ struct { unsigned called; unsigned backtracked; /**< incremented when backtracking was initiated */ unsigned terminated; } stats; /**< usage statistics */ const char *rb_id; /**< human-readable rulebase identifier, for stats etc */ // experimental, move outside later const char *rb_file; unsigned int rb_lineno; }; #ifdef ADVANCED_STATS struct advstats { int pathlen; int parser_calls; /**< parser calls in general during path */ int lit_parser_calls; /**< same just for the literal parser */ int backtracked; int recursion_level; es_str_t *exec_path; }; #define ADVSTATS_MAX_ENTITIES 100 extern int advstats_max_pathlen; extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES]; extern int advstats_max_backtracked; extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES]; #endif /** the "normalization parameter block" (npb) * This structure is passed to all normalization routines including * parsers. It contains data that commonly needs to be passed, * like the to be parsed string and its length, as well as read/write * data which is used to track information over the general * normalization process (like the execution path, if requested). * The main purpose is to save stack writes by eliminating the * need for using multiple function parameters. Note that it * must be carefully considered which items to add to the * npb - those that change from recursion level to recursion * level are NOT to be placed here. */ struct npb { ln_ctx ctx; const char *str; /**< to-be-normalized message */ size_t strLen; /**< length of it */ size_t parsedTo; /**< up to which byte could this be parsed? */ size_t longestParsedTo; /**< up to which byte could this be parsed? */ es_str_t *rule; /**< a mock-up of the rule used to parse */ es_str_t *exec_path; #ifdef ADVANCED_STATS int pathlen; int backtracked; int recursion_level; struct advstats astats; #endif }; /* Methods */ /** * Allocates and initializes a new parse DAG node. * @memberof ln_pdag * * @param[in] ctx current library context. This MUST match the * context of the parent. * @param[in] parent pointer to the new node inside the parent * * @return pointer to new node or NULL on error */ struct ln_pdag* ln_newPDAG(ln_ctx ctx); /** * Free a parse DAG and destruct all members. * @memberof ln_pdag * * @param[in] DAG pointer to pdag to free */ void ln_pdagDelete(struct ln_pdag *DAG); /** * Add parser to dag node. * Works on unoptimized dag. * * @param[in] pdag pointer to pdag to modify * @param[in] parser parser definition * @returns 0 on success, something else otherwise */ int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *); /** * Display the content of a pdag (debug function). * This is a debug aid that spits out a textual representation * of the provided pdag via multiple calls of the debug callback. * * @param DAG pdag to display */ void ln_displayPDAG(ln_ctx ctx); /** * Generate a DOT graph. * Well, actually it does not generate the graph itself, but a * control file that is suitable for the GNU DOT tool. Such a file * can be very useful to understand complex sample databases * (not to mention that it is probably fun for those creating * samples). * The dot commands are appended to the provided string. * * @param[in] DAG pdag to display * @param[out] str string which receives the DOT commands. */ void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str); /** * Build a pdag based on the provided string, but only if necessary. * The passed-in DAG is searched and traversed for str. If a node exactly * matching str is found, that node is returned. If no exact match is found, * a new node is added. Existing nodes may be split, if a so-far common * prefix needs to be split in order to add the new node. * * @param[in] DAG root of the current DAG * @param[in] str string to be added * @param[in] offs offset into str where match needs to start * (this is required for recursive calls to handle * common prefixes) * @return NULL on error, otherwise the pdag leaf that * corresponds to the parameters passed. */ struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs); prsid_t ln_parserName2ID(const char *const __restrict__ name); int ln_pdagOptimize(ln_ctx ctx); void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int); ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit); ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf); int ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd); void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp); /* friends */ int ln_normalizeRec(npb_t *const __restrict__ npb, struct ln_pdag *dag, const size_t offs, const int bPartialMatch, struct json_object *json, struct ln_pdag **endNode, int failOnDuplicate, json_object *cur_json_object, const char *parser_name ); #endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */ liblognorm-2.1.0/src/samp.c000066400000000000000000000751111520037563000155570ustar00rootroot00000000000000/* samp.c -- code for ln_samp objects. * This code handles rulebase processing. Rulebases have been called * "sample bases" in the early days of liblognorm, thus the name. * * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include #include "liblognorm.h" #include "lognorm.h" #include "samp.h" #include "internal.h" #include "parser.h" #include "pdag.h" #include "v1_liblognorm.h" #include "v1_ptree.h" void ln_sampFree(ln_ctx __attribute__((unused)) ctx, struct ln_samp *samp) { free(samp); } static int ln_parseLegacyFieldDescr(ln_ctx ctx, const char *const buf, const size_t lenBuf, size_t *bufOffs, es_str_t **str, json_object **prscnf) { int r = 0; char *cstr; /* for debug mode strings */ char *ftype = NULL; char name[MAX_FIELDNAME_LEN]; size_t iDst; struct json_object *json = NULL; char *ed = NULL; es_size_t i = *bufOffs; es_str_t *edata = NULL; for( iDst = 0 ; iDst < (MAX_FIELDNAME_LEN - 1) && i < lenBuf && buf[i] != ':' ; ++iDst) { name[iDst] = buf[i++]; } name[iDst] = '\0'; if(iDst == (MAX_FIELDNAME_LEN - 1)) { ln_errprintf(ctx, 0, "field name too long in: %s", buf+(*bufOffs)); FAIL(LN_INVLDFDESCR); } if(i == lenBuf) { ln_errprintf(ctx, 0, "field definition wrong in: %s", buf+(*bufOffs)); FAIL(LN_INVLDFDESCR); } if(iDst == 0) { FAIL(LN_INVLDFDESCR); } if(ctx->debug) { ln_dbgprintf(ctx, "parsed field: '%s'", name); } if(buf[i] != ':') { ln_errprintf(ctx, 0, "missing colon in: %s", buf+(*bufOffs)); FAIL(LN_INVLDFDESCR); } ++i; /* skip ':' */ /* parse and process type (trailing whitespace must be trimmed) */ es_emptyStr(*str); size_t j = i; /* scan for terminator */ while(j < lenBuf && buf[j] != ':' && buf[j] != '{' && buf[j] != '%') ++j; /* now trim trailing space backwards */ size_t next = j; --j; while(j >= i && isspace(buf[j])) --j; /* now copy */ while(i <= j) { CHKR(es_addChar(str, buf[i++])); } /* finally move i to consumed position */ i = next; if(i == lenBuf) { ln_errprintf(ctx, 0, "premature end (missing %%?) in: %s", buf+(*bufOffs)); FAIL(LN_INVLDFDESCR); } ftype = es_str2cstr(*str, NULL); ln_dbgprintf(ctx, "field type '%s', i %d", ftype, i); if(buf[i] == '{') { struct json_tokener *tokener = json_tokener_new(); json = json_tokener_parse_ex(tokener, buf+i, (int) (lenBuf - i)); if(json == NULL) { ln_errprintf(ctx, 0, "invalid json in '%s'", buf+i); } i += tokener->char_offset; json_tokener_free(tokener); } if(buf[i] == '%') { i++; } else { /* parse extra data */ CHKN(edata = es_newStr(8)); i++; while(i < lenBuf) { if(buf[i] == '%') { ++i; break; /* end of field */ } CHKR(es_addChar(&edata, buf[i++])); } es_unescapeStr(edata); if(ctx->debug) { cstr = es_str2cstr(edata, NULL); ln_dbgprintf(ctx, "parsed extra data: '%s'", cstr); free(cstr); } } struct json_object *val; *prscnf = json_object_new_object(); CHKN(val = json_object_new_string(name)); json_object_object_add(*prscnf, "name", val); CHKN(val = json_object_new_string(ftype)); json_object_object_add(*prscnf, "type", val); if(edata != NULL) { ed = es_str2cstr(edata, " "); CHKN(val = json_object_new_string(ed)); json_object_object_add(*prscnf, "extradata", val); } if(json != NULL) { /* now we need to merge the json params into the main object */ struct json_object_iterator it = json_object_iter_begin(json); struct json_object_iterator itEnd = json_object_iter_end(json); while (!json_object_iter_equal(&it, &itEnd)) { struct json_object *const v = json_object_iter_peek_value(&it); json_object_get(v); json_object_object_add(*prscnf, json_object_iter_peek_name(&it), v); json_object_iter_next(&it); } } *bufOffs = i; done: free(ed); if(edata != NULL) es_deleteStr(edata); free(ftype); if(json != NULL) json_object_put(json); return r; } /** * Extract a field description from a sample. * The field description is added to the tail of the current * subtree's field list. The parse buffer must be position on the * leading '%' that starts a field definition. It is a program error * if this condition is not met. * * Note that we break up the object model and access ptree members * directly. Let's consider us a friend of ptree. This is necessary * to optimize the structure for a high-speed parsing process. * * @param[in] str a temporary work string. This is passed in to save the * creation overhead * @returns 0 on success, something else otherwise */ static int addFieldDescr(ln_ctx ctx, struct ln_pdag **pdag, es_str_t *rule, size_t *bufOffs, es_str_t **str) { int r = 0; es_size_t i = *bufOffs; char *ftype = NULL; const char *buf; es_size_t lenBuf; struct json_object *prs_config = NULL; buf = (const char*)es_getBufAddr(rule); lenBuf = es_strlen(rule); assert(buf[i] == '%'); ++i; /* "eat" ':' */ /* skip leading whitespace in field name */ while(i < lenBuf && isspace(buf[i])) ++i; /* check if we have new-style json config */ if(buf[i] == '{' || buf[i] == '[') { struct json_tokener *tokener = json_tokener_new(); prs_config = json_tokener_parse_ex(tokener, buf+i, (int) (lenBuf - i)); i += tokener->char_offset; json_tokener_free(tokener); if(prs_config == NULL || i == lenBuf || buf[i] != '%') { ln_errprintf(ctx, 0, "invalid json in '%s'", buf+i); r = -1; goto done; } *bufOffs = i+1; /* eat '%' - if above ensures it is present */ } else { *bufOffs = i; CHKR(ln_parseLegacyFieldDescr(ctx, buf, lenBuf, bufOffs, str, &prs_config)); } CHKR(ln_pdagAddParser(ctx, pdag, prs_config)); done: free(ftype); return r; } /** * Construct a literal parser json definition. */ static json_object * newLiteralParserJSONConf(char lit) { char buf[] = "x"; buf[0] = lit; struct json_object *val; struct json_object *prscnf = json_object_new_object(); val = json_object_new_string("literal"); json_object_object_add(prscnf, "type", val); val = json_object_new_string(buf); json_object_object_add(prscnf, "text", val); return prscnf; } /** * Parse a Literal string out of the template and add it to the tree. * This function is used to create the unoptimized tree. So we do * one node for each character. These will be compacted by the optimizer * in a later stage. The advantage is that we do not need to care about * splitting the tree. As such the processing is fairly simple: * * for each character in literal (left-to-right): * create literal parser object o * add new DAG node o, advance to it * * @param[in] ctx the context * @param[in/out] subtree on entry, current subtree, on exist newest * deepest subtree * @param[in] rule string with current rule * @param[in/out] bufOffs parse pointer, up to which offset is parsed * (is updated so that it points to first char after consumed * string on exit). * @param str a work buffer, provided to prevent creation of a new object * @return 0 on success, something else otherwise */ static int parseLiteral(ln_ctx ctx, struct ln_pdag **pdag, es_str_t *rule, size_t *const __restrict__ bufOffs, es_str_t **str) { int r = 0; size_t i = *bufOffs; unsigned char *buf = es_getBufAddr(rule); const size_t lenBuf = es_strlen(rule); const char *cstr = NULL; es_emptyStr(*str); while(i < lenBuf) { if(buf[i] == '%') { if(i+1 < lenBuf && buf[i+1] != '%') { break; /* field start is end of literal */ } if (++i == lenBuf) break; } CHKR(es_addChar(str, buf[i])); ++i; } es_unescapeStr(*str); cstr = es_str2cstr(*str, NULL); if(ctx->debug) { ln_dbgprintf(ctx, "parsed literal: '%s'", cstr); } *bufOffs = i; /* we now add the string to the tree */ for(i = 0 ; cstr[i] != '\0' ; ++i) { struct json_object *const prscnf = newLiteralParserJSONConf(cstr[i]); CHKN(prscnf); CHKR(ln_pdagAddParser(ctx, pdag, prscnf)); } r = 0; done: free((void*)cstr); return r; } /* Implementation note: * We read in the sample, and split it into chunks of literal text and * fields. Each literal text is added as whole to the tree, as is each * field individually. To do so, we keep track of our current subtree * root, which changes whenever a new part of the tree is build. It is * set to the then-lowest part of the tree, where the next step sample * data is to be added. * * This function processes the whole string or returns an error. * * format: literal1%field:type:extra-data%literal2 * * @returns the new dag root (or NULL in case of error) */ static int addSampToTree(ln_ctx ctx, es_str_t *rule, ln_pdag *dag, struct json_object *tagBucket) { int r = -1; es_str_t *str = NULL; size_t i; CHKN(str = es_newStr(256)); i = 0; while(i < es_strlen(rule)) { LN_DBGPRINTF(ctx, "addSampToTree %zu of %d", i, es_strlen(rule)); CHKR(parseLiteral(ctx, &dag, rule, &i, &str)); /* After the literal there can be field only*/ if (i < es_strlen(rule)) { CHKR(addFieldDescr(ctx, &dag, rule, &i, &str)); if (i == es_strlen(rule)) { /* finish the tree with empty literal to avoid false merging*/ CHKR(parseLiteral(ctx, &dag, rule, &i, &str)); } } } LN_DBGPRINTF(ctx, "end addSampToTree %zu of %d", i, es_strlen(rule)); /* we are at the end of rule processing, so this node is a terminal */ dag->flags.isTerminal = 1; dag->tags = tagBucket; dag->rb_file = strdup(ctx->conf_file); dag->rb_lineno = ctx->conf_ln_nbr; done: if(str != NULL) es_deleteStr(str); return r; } /** * get the initial word of a rule line that tells us the type of the * line. * @param[in] buf line buffer * @param[in] len length of buffer * @param[out] offs offset after "=" * @param[out] str string with "linetype-word" (newly created) * @returns 0 on success, something else otherwise */ static int getLineType(const char *buf, es_size_t lenBuf, size_t *offs, es_str_t **str) { int r = -1; size_t i; *str = es_newStr(16); for(i = 0 ; i < lenBuf && buf[i] != '=' ; ++i) { CHKR(es_addChar(str, buf[i])); } if(i < lenBuf) ++i; /* skip over '=' */ *offs = i; done: return r; } /** * Get a new common prefix from the config file. That is actually everything from * the current offset to the end of line. * * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset after "=" * @param[in/out] str string to store common offset. If NULL, it is created, * otherwise it is emptied. * @returns 0 on success, something else otherwise */ static int getPrefix(const char *buf, es_size_t lenBuf, es_size_t offs, es_str_t **str) { int r; if(*str == NULL) { CHKN(*str = es_newStr(lenBuf - offs)); } else { es_emptyStr(*str); } r = es_addBuf(str, (char*)buf + offs, lenBuf - offs); done: return r; } /** * Extend the common prefix. This means that the line is concatenated * to the prefix. This is useful if the same rulebase is to be used with * different prefixes (well, not strictly necessary, but probably useful). * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset to-be-added text starts * @returns 0 on success, something else otherwise */ static int extendPrefix(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { return es_addBuf(&ctx->rulePrefix, (char*)buf+offs, lenBuf - offs); } /** * Add a tag to the tag bucket. Helper to processTags. * @param[in] ctx current context * @param[in] tagname string with tag name * @param[out] tagBucket tagbucket to which new tags shall be added * the tagbucket is created if it is NULL * @returns 0 on success, something else otherwise */ static int addTagStrToBucket(ln_ctx ctx, es_str_t *tagname, struct json_object **tagBucket) { int r = -1; char *cstr; struct json_object *tag; if(*tagBucket == NULL) { CHKN(*tagBucket = json_object_new_array()); } cstr = es_str2cstr(tagname, NULL); ln_dbgprintf(ctx, "tag found: '%s'", cstr); CHKN(tag = json_object_new_string(cstr)); json_object_array_add(*tagBucket, tag); free(cstr); r = 0; done: return r; } /** * Extract the tags and create a tag bucket out of them * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in,out] poffs offset where tags start, on exit and success * offset after tag part (excluding ':') * @param[out] tagBucket tagbucket to which new tags shall be added * the tagbucket is created if it is NULL * @returns 0 on success, something else otherwise */ static int processTags(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t *poffs, struct json_object **tagBucket) { int r = -1; es_str_t *str = NULL; es_size_t i; assert(poffs != NULL); i = *poffs; while(i < lenBuf && buf[i] != ':') { if(buf[i] == ',') { /* end of this tag */ if(str == NULL) goto done; CHKR(addTagStrToBucket(ctx, str, tagBucket)); es_deleteStr(str); str = NULL; } else { if(str == NULL) { CHKN(str = es_newStr(32)); } CHKR(es_addChar(&str, buf[i])); } ++i; } if(i >= lenBuf || buf[i] != ':') goto done; ++i; /* skip ':' */ if(str != NULL) { CHKR(addTagStrToBucket(ctx, str, tagBucket)); es_deleteStr(str); } *poffs = i; r = 0; done: return r; } /** * Process a new rule and add it to pdag. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset where rule starts * @returns 0 on success, something else otherwise */ static int processRule(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { int r = -1; es_str_t *str = NULL; struct json_object *tagBucket = NULL; ln_dbgprintf(ctx, "rule line to add: '%s'", buf+offs); CHKR(processTags(ctx, buf, lenBuf, &offs, &tagBucket)); if(offs == lenBuf) { ln_errprintf(ctx, 0, "error: actual message sample part is missing"); goto done; } if(ctx->rulePrefix == NULL) { CHKN(str = es_newStr(lenBuf)); } else { CHKN(str = es_strdup(ctx->rulePrefix)); } CHKR(es_addBuf(&str, (char*)buf + offs, lenBuf - offs)); CHKR(addSampToTree(ctx, str, ctx->pdag, tagBucket)); r = 0; done: if(r != 0 && tagBucket != NULL) json_object_put(tagBucket); if(str != NULL) es_deleteStr(str); return r; } static int getTypeName(ln_ctx ctx, const char *const __restrict__ buf, const size_t lenBuf, size_t *const __restrict__ offs, char *const __restrict__ dstbuf) { int r = -1; size_t iDst; size_t i = *offs; if(buf[i] != '@') { ln_errprintf(ctx, 0, "user-defined type name must " "start with '@'"); goto done; } for( iDst = 0 ; i < lenBuf && buf[i] != ':' && iDst < MAX_TYPENAME_LEN - 1 ; ++i, ++iDst) { if(isspace(buf[i])) { ln_errprintf(ctx, 0, "user-defined type name must " "not contain whitespace"); goto done; } dstbuf[iDst] = buf[i]; } dstbuf[iDst] = '\0'; if(i < lenBuf && buf[i] == ':') { r = 0, *offs = i+1; /* skip ":" */ } done: return r; } /** * Process a type definition and add it to the PDAG * disconnected components. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset where rule starts * @returns 0 on success, something else otherwise */ static int processType(ln_ctx ctx, const char *const __restrict__ buf, const size_t lenBuf, size_t offs) { int r = -1; es_str_t *str = NULL; char typename[MAX_TYPENAME_LEN]; ln_dbgprintf(ctx, "type line to add: '%s'", buf+offs); CHKR(getTypeName(ctx, buf, lenBuf, &offs, typename)); ln_dbgprintf(ctx, "type name is '%s'", typename); ln_dbgprintf(ctx, "type line to add: '%s'", buf+offs); if(offs == lenBuf) { ln_errprintf(ctx, 0, "error: actual message sample part is missing in type def"); goto done; } // TODO: optimize CHKN(str = es_newStr(lenBuf)); CHKR(es_addBuf(&str, (char*)buf + offs, lenBuf - offs)); int td = ln_pdagFindType(ctx, typename, 1); CHKN((td >= 0 && td < ctx->nTypes) ? ctx->type_pdags[td].pdag : NULL); CHKR(addSampToTree(ctx, str, ctx->type_pdags[td].pdag, NULL)); r = 0; done: if(str != NULL) es_deleteStr(str); return r; } /** * Obtain a field name from a rule base line. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset where tag starts, * on exit: updated offset AFTER TAG and (':') * @param [out] strTag obtained tag, if successful * @returns 0 on success, something else otherwise */ static int getFieldName(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs, es_str_t **strTag) { int r = -1; es_size_t i; i = *offs; while(i < lenBuf && (isalnum(buf[i]) || buf[i] == '_' || buf[i] == '.')) { if(*strTag == NULL) { CHKN(*strTag = es_newStr(32)); } CHKR(es_addChar(strTag, buf[i])); ++i; } *offs = i; r = 0; done: return r; } /** * Skip over whitespace. * Skips any whitespace present at the offset. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset first unprocessed position */ static void skipWhitespace(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs) { while(*offs < lenBuf && isspace(buf[*offs])) { (*offs)++; } } /** * Obtain an annotation (field) operation. * This usually is a plus or minus sign followed by a field name * followed (if plus) by an equal sign and the field value. On entry, * offs must be positioned on the first unprocessed field (after ':' for * the initial field!). Extra whitespace is detected and, if present, * skipped. The obtained operation is added to the annotation set provided. * Note that extracted string objects are passed to the annotation; thus it * is vital NOT to free them (most importantly, this is *not* a memory leak). * * @param[in] ctx current context * @param[in] annot active annotation set to which the operation is to be added * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset where tag starts, * on exit: updated offset AFTER TAG and (':') * @param [out] strTag obtained tag, if successful * @returns 0 on success, something else otherwise */ static int getAnnotationOp(ln_ctx ctx, ln_annot *annot, const char *buf, es_size_t lenBuf, es_size_t *offs) { int r = -1; es_size_t i; es_str_t *fieldName = NULL; es_str_t *fieldVal = NULL; ln_annot_opcode opc; i = *offs; skipWhitespace(ctx, buf, lenBuf, &i); if(i == lenBuf) { r = 0; goto done; /* nothing left to process (no error!) */ } switch(buf[i]) { case '+': opc = ln_annot_ADD; break; case '#': ln_dbgprintf(ctx, "inline comment in 'annotate' line: %s", buf); *offs = lenBuf; r = 0; goto done; case '-': ln_dbgprintf(ctx, "annotate op '-' not yet implemented - failing"); /*FALLTHROUGH*/ default:ln_errprintf(ctx, 0, "invalid annotate operation '%c': %s", buf[i], buf+i); goto fail; } i++; if(i == lenBuf) goto fail; /* nothing left to process */ CHKR(getFieldName(ctx, buf, lenBuf, &i, &fieldName)); if(i == lenBuf) goto fail; /* nothing left to process */ if(buf[i] != '=') goto fail; /* format error */ i++; skipWhitespace(ctx, buf, lenBuf, &i); if(buf[i] != '"') goto fail; /* format error */ ++i; while(i < lenBuf && buf[i] != '"') { if(fieldVal == NULL) { CHKN(fieldVal = es_newStr(32)); } CHKR(es_addChar(&fieldVal, buf[i])); ++i; } *offs = (i == lenBuf) ? i : i+1; CHKR(ln_addAnnotOp(annot, opc, fieldName, fieldVal)); r = 0; done: return r; fail: return -1; } /** * Process a new annotation and add it to the annotation set. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset where annotation starts * @returns 0 on success, something else otherwise */ static int processAnnotate(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { int r; es_str_t *tag = NULL; ln_annot *annot; ln_dbgprintf(ctx, "sample annotation to add: '%s'", buf+offs); CHKR(getFieldName(ctx, buf, lenBuf, &offs, &tag)); skipWhitespace(ctx, buf, lenBuf, &offs); if(buf[offs] != ':' || tag == NULL) { ln_dbgprintf(ctx, "invalid tag field in annotation, line is '%s'", buf); r=-1; goto done; } ++offs; /* we got an annotation! */ CHKN(annot = ln_newAnnot(tag)); while(offs < lenBuf) { CHKR(getAnnotationOp(ctx, annot, buf, lenBuf, &offs)); } r = ln_addAnnotToSet(ctx->pas, annot); done: return r; } /** * Process include directive. This permits to add unlimited layers * of include files. * * @param[in] ctx current context * @param[in] buf line buffer, a C-string * @param[in] offs offset where annotation starts * @returns 0 on success, something else otherwise */ static int processInclude(ln_ctx ctx, const char *buf, const size_t offs) { int r; const char *const conf_file_save = ctx->conf_file; char *const fname = strdup(buf+offs); size_t lenfname = strlen(fname); const unsigned conf_ln_nbr_save = ctx->conf_ln_nbr; /* trim string - not optimized but also no need to */ for(size_t i = lenfname - 1 ; i > 0 ; --i) { if(isspace(fname[i])) { fname[i] = '\0'; --lenfname; } } CHKR(ln_loadSamples(ctx, fname)); done: free(fname); ctx->conf_file = conf_file_save; ctx->conf_ln_nbr = conf_ln_nbr_save; return r; } /** * Reads a rule (sample) stored in buffer buf and creates a new ln_samp object * out of it, which it adds to the pdag (if required). * * @param[ctx] ctx current library context * @param[buf] cstr buffer containing the string contents of the sample * @param[lenBuf] length of the sample contained within buf * @return standard error code */ static int ln_processSamp(ln_ctx ctx, const char *buf, const size_t lenBuf) { int r = -1; es_str_t *typeStr = NULL; size_t offs; CHKR(getLineType(buf, lenBuf, &offs, &typeStr)); if(!es_strconstcmp(typeStr, "prefix")) { CHKR(getPrefix(buf, lenBuf, offs, &ctx->rulePrefix)); } else if(!es_strconstcmp(typeStr, "extendprefix")) { CHKR(extendPrefix(ctx, buf, lenBuf, offs)); } else if(!es_strconstcmp(typeStr, "rule")) { CHKR(processRule(ctx, buf, lenBuf, offs)); } else if(!es_strconstcmp(typeStr, "type")) { CHKR(processType(ctx, buf, lenBuf, offs)); } else if(!es_strconstcmp(typeStr, "annotate")) { CHKR(processAnnotate(ctx, buf, lenBuf, offs)); } else if(!es_strconstcmp(typeStr, "include")) { CHKR(processInclude(ctx, buf, offs)); } else { char *str; str = es_str2cstr(typeStr, NULL); ln_errprintf(ctx, 0, "invalid record type detected: '%s'", str); free(str); goto done; } r = 0; done: if(typeStr != NULL) es_deleteStr(typeStr); return r; } /** * Read a character from our sample source. */ static int ln_sampReadChar(const ln_ctx __attribute__((unused)) ctx, FILE *const __restrict__ repo, const char **inpbuf) { int c; assert((repo != NULL && inpbuf == NULL) || (repo == NULL && inpbuf != NULL)); if(repo == NULL) { c = (**inpbuf == '\0') ? EOF : *(*inpbuf)++; } else { c = fgetc(repo); } return c; } /* note: comments are only supported at beginning of line! */ /* skip to end of line */ void ln_sampSkipCommentLine(ln_ctx ctx, FILE * const __restrict__ repo, const char **inpbuf) { int c; do { c = ln_sampReadChar(ctx, repo, inpbuf); } while(c != EOF && c != '\n'); ++ctx->conf_ln_nbr; } /* this checks if in a multi-line rule, the next line seems to be a new * rule, which would meand we have some unmatched percent signs inside * our rule (what we call a "runaway rule"). This can easily happen and * is otherwise hard to debug, so let's see if it is the case... * @return 1 if this is a runaway rule, 0 if not */ int ln_sampChkRunawayRule(ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf) { int r = 1; fpos_t fpos; char buf[6]; int cont = 1; int read; fgetpos(repo, &fpos); while(cont) { fpos_t inner_fpos; fgetpos(repo, &inner_fpos); if((read = fread(buf, sizeof(char), sizeof(buf)-1, repo)) == 0) { r = 0; goto done; } if(buf[0] == '\n') { fsetpos(repo, &inner_fpos); if(fread(buf, sizeof(char), 1, repo)) {}; /* skip '\n' */ continue; } else if(buf[0] == '#') { fsetpos(repo, &inner_fpos); const unsigned conf_ln_nbr_save = ctx->conf_ln_nbr; ln_sampSkipCommentLine(ctx, repo, inpbuf); ctx->conf_ln_nbr = conf_ln_nbr_save; continue; } if(read != 5) goto done; /* cannot be a rule= line! */ cont = 0; /* no comment, so we can decide */ buf[5] = '\0'; if(!strncmp(buf, "rule=", 5)) { ln_errprintf(ctx, 0, "line has 'rule=' at begin of line, which " "does look like a typo in the previous lines (unmatched " "%% character) and is forbidden. If valid, please re-format " "the rule to start with other characters. Rule ignored."); goto done; } } r = 0; done: fsetpos(repo, &fpos); return r; } /** * Read a rule (sample) from repository (sequentially). * * Reads a sample starting with the current file position and * creates a new ln_samp object out of it, which it adds to the * pdag. * * @param[in] ctx current library context * @param[in] repo repository descriptor if file input is desired * @param[in/out] ptr to ptr of input buffer; this is used if a string is * provided instead of a file. If so, this pointer is advanced * as data is consumed. * @param[out] isEof must be set to 0 on entry and is switched to 1 if EOF occurred. * @return standard error code */ static int ln_sampRead(ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf, int *const __restrict__ isEof) { int r = 0; char buf[64*1024]; /**< max size of rule - TODO: make configurable */ size_t i = 0; int inParser = 0; int done = 0; while(!done) { const int c = ln_sampReadChar(ctx, repo, inpbuf); if(c == EOF) { *isEof = 1; if(i == 0) goto done; else done = 1; /* last line missing LF, still process it! */ } else if(c == '\n') { ++ctx->conf_ln_nbr; if(inParser && repo != NULL) { if(ln_sampChkRunawayRule(ctx, repo, inpbuf)) { /* ignore previous rule */ inParser = 0; i = 0; } } if(!inParser && i != 0) done = 1; } else if(c == '#' && i == 0) { ln_sampSkipCommentLine(ctx, repo, inpbuf); i = 0; /* back to beginning */ } else { if(c == '%') inParser = (inParser) ? 0 : 1; buf[i++] = c; if(i >= sizeof(buf)) { ln_errprintf(ctx, 0, "line is too long"); goto done; } } } buf[i] = '\0'; ln_dbgprintf(ctx, "read rulebase line[~%d]: '%s'", ctx->conf_ln_nbr, buf); CHKR(ln_processSamp(ctx, buf, i)); done: return r; } /* check rulebase format version. Returns 2 if this is v2 rulebase, * 1 for any pre-v2 and -1 if there was a problem reading the file. */ static int checkVersion(FILE *const fp) { char buf[64]; size_t len; if(fgets(buf, sizeof(buf), fp) == NULL) return -1; /* Strip trailing CR/LF for CRLF tolerance (rulebase files may * pass through git autocrlf or Windows editors). */ len = strlen(buf); while(len > 0 && (buf[len-1] == '\n' || buf[len-1] == '\r')) buf[--len] = '\0'; if(!strcmp(buf, "version=2")) { return 2; } else { return 1; } } /* we have a v1 rulebase, so let's do all stuff that we need * to make that ole piece of ... work. */ static int doOldCruft(ln_ctx ctx, const char *file) { int r = -1; if((ctx->ptree = ln_newPTree(ctx, NULL)) == NULL) { free(ctx); r = -1; goto done; } r = ln_v1_loadSamples(ctx, file); done: return r; } /* try to open a rulebase file. This also tries to see if we need to * load it from some pre-configured alternative location. * @returns open file pointer or NULL in case of error */ static FILE * tryOpenRBFile(ln_ctx ctx, const char *const file) { FILE *repo = NULL; if((repo = fopen(file, "r")) != NULL) goto done; const int eno1 = errno; const char *const rb_lib = getenv("LIBLOGNORM_RULEBASES"); if(rb_lib == NULL || *file == '/') { ln_errprintf(ctx, eno1, "cannot open rulebase '%s'", file); goto done; } char *fname = NULL; int len; len = asprintf(&fname, (rb_lib[strlen(rb_lib)-1] == '/') ? "%s%s" : "%s/%s", rb_lib, file); if(len == -1) { ln_errprintf(ctx, errno, "alloc error: cannot open rulebase '%s'", file); goto done; } if((repo = fopen(fname, "r")) == NULL) { const int eno2 = errno; ln_errprintf(ctx, eno1, "cannot open rulebase '%s'", file); ln_errprintf(ctx, eno2, "also tried to locate %s via " "rulebase directory without success. Expanded " "name was '%s'", file, fname); } free(fname); done: return repo; } /* @return 0 if all is ok, 1 if an error occurred */ int ln_sampLoad(ln_ctx ctx, const char *file) { int r = 1; FILE *repo; int isEof = 0; ln_dbgprintf(ctx, "loading rulebase file '%s'", file); if(file == NULL) goto done; if((repo = tryOpenRBFile(ctx, file)) == NULL) goto done; const int version = checkVersion(repo); ln_dbgprintf(ctx, "rulebase version is %d\n", version); if(version == -1) { ln_errprintf(ctx, errno, "error determining version of %s", file); fclose(repo); goto done; } if(ctx->version != 0 && version != ctx->version) { ln_errprintf(ctx, errno, "rulebase '%s' must be version %d, but is version %d " " - can not be processed", file, ctx->version, version); fclose(repo); goto done; } ctx->version = version; if(ctx->version == 1) { fclose(repo); r = doOldCruft(ctx, file); goto done; } /* now we are in our native code */ ++ctx->conf_ln_nbr; /* "version=2" is line 1! */ while(!isEof) { CHKR(ln_sampRead(ctx, repo, NULL, &isEof)); } fclose(repo); r = 0; if(ctx->include_level == 1) ln_pdagOptimize(ctx); done: return r; } /* @return 0 if all is ok, 1 if an error occurred */ int ln_sampLoadFromString(ln_ctx ctx, const char *string) { int r = 1; int isEof = 0; if(string == NULL) goto done; ln_dbgprintf(ctx, "loading v2 rulebase from string '%s'", string); ctx->version = 2; while(!isEof) { CHKR(ln_sampRead(ctx, NULL, &string, &isEof)); } r = 0; if(ctx->include_level == 1) ln_pdagOptimize(ctx); done: return r; } liblognorm-2.1.0/src/samp.h000066400000000000000000000035401520037563000155610ustar00rootroot00000000000000/** * @file samples.h * @brief Object to process log samples. * @author Rainer Gerhards * * This object handles log samples, and in actual log sample files. * It co-operates with the ptree object to build the actual parser tree. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General PublicCH License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_SAMPLES_H_INCLUDED #define LIBLOGNORM_SAMPLES_H_INCLUDED #include /* we need es_size_t */ #include /** * A single log sample. */ struct ln_samp { es_str_t *msg; }; void ln_sampFree(ln_ctx ctx, struct ln_samp *samp); int ln_sampLoad(ln_ctx ctx, const char *file); int ln_sampLoadFromString(ln_ctx ctx, const char *string); /* dual-use funtions for v1 engine */ void ln_sampSkipCommentLine(ln_ctx ctx, FILE * const __restrict__ repo, const char **inpbuf); int ln_sampChkRunawayRule(ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf); #endif /* #ifndef LIBLOGNORM_SAMPLES_H_INCLUDED */ liblognorm-2.1.0/src/turbo.c000066400000000000000000000745531520037563000157630ustar00rootroot00000000000000/* * turbo.c -- liblognorm integration for TurboVM bytecode engine * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #ifdef ENABLE_TURBO /* liblognorm headers */ #include "liblognorm.h" #include "lognorm.h" #include "pdag.h" #include "parser.h" #include "annot.h" #include "internal.h" /* libfastjson */ #include /* Turbo headers */ #include "turbo.h" #include "turbo_opcode.h" #include "turbo_vm.h" #include "turbo_result_fast.h" #include "turbo_arena.h" #include "turbo_simd.h" #include "turbo_snapshot.h" #include "turbo_json.h" #include #include #include #include /*============================================================================ * Turbo Context Structure (OPTIMIZED) *============================================================================*/ struct ln_turbo_ctx_s { /* Compiled program */ ln_instr_t *code; uint32_t code_len; uint32_t code_cap; /* Arena for overflow allocations */ ln_arena_t arena; /* VM instance (reusable) */ ln_vm_t vm; /* FAST result structure (inline strings, static names) */ ln_fast_result_t result; /* Pre-allocated JSON buffer */ char *json_buf; size_t json_buf_cap; /* Statistics */ ln_turbo_stats_t stats; /* Configuration */ int enabled; int debug; }; /*============================================================================ * Parser ID Mapping *============================================================================*/ #define PRSID_LITERAL 0 #define PRSID_REPEAT 1 #define PRSID_RFC3164DATE 2 #define PRSID_RFC5424DATE 3 #define PRSID_NUMBER 4 #define PRSID_FLOAT 5 #define PRSID_HEXNUMBER 6 #define PRSID_KERNEL_TIMESTAMP 7 #define PRSID_WHITESPACE 8 #define PRSID_IPV4 9 #define PRSID_IPV6 10 #define PRSID_WORD 11 #define PRSID_ALPHA 12 #define PRSID_REST 13 #define PRSID_OPQUOTEDSTRING 14 #define PRSID_QUOTEDSTRING 15 #define PRSID_ISODATE 16 #define PRSID_TIME24HR 17 #define PRSID_TIME12HR 18 #define PRSID_DURATION 19 #define PRSID_CISCO_IFACE 20 #define PRSID_JSON 21 #define PRSID_CEE_SYSLOG 22 #define PRSID_MAC48 23 #define PRSID_CEF 24 #define PRSID_V2_IPTABLES 25 #define PRSID_NAMEVALUE 26 #define PRSID_CHECKPOINT 27 #define PRSID_STRINGTO 28 #define PRSID_CHARTO 29 #define PRSID_CHARSEP 30 #define PRSID_STRING 31 static ln_opcode_t prsid_to_opcode(prsid_t prsid) { switch (prsid) { case PRSID_LITERAL: return OP_LITERAL; case PRSID_WORD: return OP_FIELD_WORD; case PRSID_ALPHA: return OP_FIELD_WORD; case PRSID_STRING: return OP_FIELD_WORD; case PRSID_NUMBER: return OP_FIELD_INT; case PRSID_FLOAT: return OP_FIELD_FLOAT; case PRSID_HEXNUMBER: return OP_FIELD_HEX; case PRSID_IPV4: return OP_FIELD_IPV4; case PRSID_IPV6: return OP_FIELD_IPV6; case PRSID_REST: return OP_FIELD_REST; case PRSID_OPQUOTEDSTRING: return OP_FIELD_QUOTED; case PRSID_QUOTEDSTRING: return OP_FIELD_QUOTED; case PRSID_WHITESPACE: return OP_SKIP_SPACE; case PRSID_JSON: return OP_FIELD_JSON; case PRSID_MAC48: return OP_FIELD_MAC; case PRSID_CHARTO: return OP_FIELD_CHAR_TO; case PRSID_CHARSEP: return OP_FIELD_CHAR_TO; /* Same behavior */ case PRSID_STRINGTO: return OP_FIELD_STR_TO; case PRSID_RFC3164DATE: return OP_FIELD_DATE; case PRSID_RFC5424DATE: return OP_FIELD_DATE; case PRSID_ISODATE: return OP_FIELD_DATE; case PRSID_TIME24HR: return OP_FIELD_DATE; case PRSID_TIME12HR: return OP_FIELD_DATE; case PRSID_DURATION: return OP_FIELD_WORD; /* Parse as word */ case PRSID_KERNEL_TIMESTAMP: return OP_FIELD_WORD; /* Parse as word */ case PRSID_CISCO_IFACE: return OP_FIELD_WORD; /* Parse as word */ case PRSID_NAMEVALUE: return OP_FIELD_NAME_VALUE; case PRSID_V2_IPTABLES: return OP_V2_IPTABLES; case PRSID_CEE_SYSLOG: return OP_CEE_SYSLOG; case PRSID_CEF: return OP_CEF_HDR; case PRSID_CHECKPOINT: return OP_CHECKPOINT_LEA; default: return OP_INVALID; } } /*============================================================================ * Compiler State *============================================================================*/ typedef struct { ln_turbo_ctx_t *turbo; ln_ctx ctx; int n_rules; int n_branches; int max_depth; int depth; int in_custom_type; struct { uint32_t *offsets; int cap; int len; } visited; } compiler_t; /*============================================================================ * Compiler Helpers *============================================================================*/ static uint32_t emit(compiler_t *comp, ln_instr_t *instr) { ln_turbo_ctx_t *turbo = comp->turbo; if (turbo->code_len >= turbo->code_cap) { uint32_t new_cap = turbo->code_cap * 2; if (new_cap == 0) new_cap = 256; ln_instr_t *new_code = realloc(turbo->code, new_cap * sizeof(ln_instr_t)); if (!new_code) return UINT32_MAX; turbo->code = new_code; turbo->code_cap = new_cap; } uint32_t idx = turbo->code_len++; turbo->code[idx] = *instr; return idx; } static uint32_t emit_literal(compiler_t *comp, const char *lit, size_t len) { ln_instr_t instr = {0}; instr.op = OP_LITERAL; if (len >= sizeof(instr.data.str)) return 0; /* too long for inline — fall back to v1 */ instr.aux = (uint16_t)len; memcpy(instr.data.str, lit, len); return emit(comp, &instr); } static uint32_t emit_field(compiler_t *comp, ln_opcode_t op, const char *name, char delim) { ln_instr_t instr = {0}; instr.op = op; instr.flags = LN_INSTR_F_STORE; if (op == OP_FIELD_CHAR_TO || op == OP_FIELD_STR_TO) { instr.data.char_to.delim = (uint8_t)delim; if (name) { size_t nlen = strlen(name); if (nlen >= sizeof(instr.data.char_to.name)) return 0; /* field name too long — fall back to v1 */ memcpy(instr.data.char_to.name, name, nlen); } } else { if (name) { size_t nlen = strlen(name); if (nlen >= sizeof(instr.data.str)) return 0; /* field name too long — fall back to v1 */ memcpy(instr.data.str, name, nlen); } } return emit(comp, &instr); } static uint32_t emit_fork(compiler_t *comp) { ln_instr_t instr = {0}; instr.op = OP_FORK; comp->n_branches++; return emit(comp, &instr); } /** * @brief Emit OP_TAG instructions for all tags on a terminal node. */ static int emit_tags(compiler_t *comp, struct ln_pdag *node) { if (!node->tags) return 0; /* no tags */ int n = json_object_array_length(node->tags); for (int i = 0; i < n; i++) { struct json_object *tagObj = json_object_array_get_idx(node->tags, i); if (!tagObj) continue; const char *tagStr = json_object_get_string(tagObj); if (!tagStr || !tagStr[0]) continue; ln_instr_t instr = {0}; instr.op = OP_TAG; size_t len = strlen(tagStr); if (len >= sizeof(instr.data.str)) return -1; /* tag name too long — fall back to v1 */ memcpy(instr.data.str, tagStr, len); if (emit(comp, &instr) == UINT32_MAX) return -1; } return 0; } /** * @brief Resolve annotations at compile time and emit OP_STATIC_FIELD. * * For each tag on the terminal node, look up the annotation set. * For each annotation operation (ADD), emit an OP_STATIC_FIELD instruction * with the field name and value baked into the instruction data. * * This eliminates the need for runtime annotation resolution entirely. * The annotation values (like event.kind="event") become part of the * compiled instruction stream — zero per-message overhead. */ static int emit_annotation_fields(compiler_t *comp, struct ln_pdag *node) { if (!node->tags || !comp->ctx->pas) return 0; int n = json_object_array_length(node->tags); for (int i = 0; i < n; i++) { struct json_object *tagObj = json_object_array_get_idx(node->tags, i); if (!tagObj) continue; const char *tagStr = json_object_get_string(tagObj); if (!tagStr || !tagStr[0]) continue; /* Look up annotation for this tag */ es_str_t *tag_es = es_newStrFromCStr(tagStr, strlen(tagStr)); if (!tag_es) continue; ln_annot *annot = ln_findAnnot(comp->ctx->pas, tag_es); es_deleteStr(tag_es); if (!annot) continue; /* Emit OP_STATIC_FIELD for each ADD operation */ for (ln_annot_op *op = annot->oproot; op; op = op->next) { if (op->opc != ln_annot_ADD) continue; char *name_cstr = ln_es_str2cstr(&op->name); char *val_cstr = op->value ? ln_es_str2cstr(&op->value) : NULL; if (!name_cstr || !name_cstr[0]) continue; ln_instr_t instr = {0}; instr.op = OP_STATIC_FIELD; size_t klen = strlen(name_cstr); if (klen >= sizeof(instr.data.kv.key)) klen = sizeof(instr.data.kv.key) - 1; memcpy(instr.data.kv.key, name_cstr, klen); instr.aux = (uint16_t)klen; if (val_cstr) { size_t vlen = strlen(val_cstr); if (vlen >= sizeof(instr.data.kv.val)) vlen = sizeof(instr.data.kv.val) - 1; memcpy(instr.data.kv.val, val_cstr, vlen); } if (emit(comp, &instr) == UINT32_MAX) return -1; } } return 0; } static uint32_t emit_match(compiler_t *comp, struct ln_pdag *node) { /* Emit tags first — these populate result.tags[] */ if (emit_tags(comp, node) != 0) return UINT32_MAX; /* Emit resolved annotation fields as static key-value pairs */ if (emit_annotation_fields(comp, node) != 0) return UINT32_MAX; /* Emit the MATCH instruction */ ln_instr_t instr = {0}; instr.op = OP_MATCH; if (node->rb_id) { size_t len = strlen(node->rb_id); if (len >= sizeof(instr.data.str)) len = sizeof(instr.data.str) - 1; memcpy(instr.data.str, node->rb_id, len); } comp->n_rules++; return emit(comp, &instr); } static uint32_t emit_halt(compiler_t *comp) { ln_instr_t instr = {0}; instr.op = OP_HALT; return emit(comp, &instr); } static uint32_t emit_ctx_push(compiler_t *comp, const char *name) { ln_instr_t instr = {0}; instr.op = OP_CTX_PUSH; if (name) { size_t nlen = strlen(name); if (nlen >= sizeof(instr.data.str)) return 0; /* context name too long — fall back to v1 */ memcpy(instr.data.str, name, nlen); } return emit(comp, &instr); } static uint32_t emit_ctx_pop(compiler_t *comp) { ln_instr_t instr = {0}; instr.op = OP_CTX_POP; return emit(comp, &instr); } /*============================================================================ * PDAG Traversal *============================================================================*/ static int compile_node(compiler_t *comp, struct ln_pdag *node, uint32_t *entry); /* Forward declaration for name-value-list parser data (defined in parser.c) */ struct data_NameValue { char sep; /* separator (between key/value pairs) */ char ass; /* assignator (between key and value) */ }; /* Forward declaration for char-sep parser data (defined in parser.c). * Layout matches the beginning of data_CharTo — both start with * term_chars + n_term_chars, so the cast is ABI-safe. */ struct data_CharSeparated { char *term_chars; int n_term_chars; }; /* Forward declaration for Checkpoint LEA parser data (defined in parser.c). */ struct data_CheckpointLEA { char terminator; }; static int compile_parser(compiler_t *comp, ln_parser_t *prs, uint32_t *out_pc) { uint32_t pc; /* Handle custom types with context push/pop */ if (prs->prsid == PRS_CUSTOM_TYPE) { /* Push field name context BEFORE the CALL */ /* Skip if name is "." (root context placeholder) or empty */ if (prs->name && prs->name[0] && !(prs->name[0] == '.' && prs->name[1] == '\0')) { uint32_t ctx_pc = emit_ctx_push(comp, prs->name); if (ctx_pc == UINT32_MAX) return -1; *out_pc = ctx_pc; } if (prs->custType >= 0 && (int)prs->custType < comp->ctx->nTypes) { ln_pdag *type_pdag = comp->ctx->type_pdags[prs->custType].pdag; if (type_pdag) { ln_instr_t call_instr = {0}; call_instr.op = OP_CALL; uint32_t call_pc = emit(comp, &call_instr); if (call_pc == UINT32_MAX) return -1; if (*out_pc == 0) *out_pc = call_pc; ln_instr_t jump_instr = {0}; jump_instr.op = OP_JUMP; uint32_t jump_pc = emit(comp, &jump_instr); if (jump_pc == UINT32_MAX) return -1; uint32_t type_start = comp->turbo->code_len; comp->turbo->code[call_pc].data.jump.offset = (int32_t)type_start - (int32_t)call_pc; comp->in_custom_type++; uint32_t type_entry; int r = compile_node(comp, type_pdag, &type_entry); comp->in_custom_type--; if (r != 0) return r; uint32_t type_end = comp->turbo->code_len; comp->turbo->code[jump_pc].data.jump.offset = (int32_t)type_end - (int32_t)jump_pc; } } /* Pop field name context AFTER the call */ /* Only pop if we pushed (skip if name was "." or empty) */ if (prs->name && prs->name[0] && !(prs->name[0] == '.' && prs->name[1] == '\0')) { if (emit_ctx_pop(comp) == UINT32_MAX) return -1; } if (prs->node) { uint32_t cont_entry; int r = compile_node(comp, prs->node, &cont_entry); if (r != 0) return r; if (*out_pc == 0) *out_pc = cont_entry; } return 0; } ln_opcode_t op = prsid_to_opcode(prs->prsid); if (op == OP_INVALID) { LN_DBGPRINTF(comp->ctx, "turbo: unsupported parser %d", prs->prsid); return -1; } if (op == OP_LITERAL) { const char *litstr = ln_DataForDisplayLiteral(comp->ctx, prs->parser_data); if (litstr && *litstr) { pc = emit_literal(comp, litstr, strlen(litstr)); /* Named literal: also emit the matched text as a static field. * e.g. %{"name":"network.type","type":"literal","text":"4"}% * matches "4" AND stores network.type="4" in the result. */ if (prs->name && prs->name[0]) { ln_instr_t sf = {0}; sf.op = OP_STATIC_FIELD; size_t klen = strlen(prs->name); if (klen >= sizeof(sf.data.kv.key)) klen = sizeof(sf.data.kv.key) - 1; memcpy(sf.data.kv.key, prs->name, klen); sf.aux = (uint16_t)klen; size_t vlen = strlen(litstr); if (vlen >= sizeof(sf.data.kv.val)) vlen = sizeof(sf.data.kv.val) - 1; memcpy(sf.data.kv.val, litstr, vlen); if (emit(comp, &sf) == UINT32_MAX) return -1; } } else { ln_instr_t nop = {0}; nop.op = OP_NOP; pc = emit(comp, &nop); } } else if (op == OP_SKIP_SPACE) { ln_instr_t instr = {0}; instr.op = OP_SKIP_SPACE; pc = emit(comp, &instr); } else if (op == OP_FIELD_NAME_VALUE) { /* name-value-list: extract sep/ass from parser_data */ char sep = 0, ass = 0; /* 0 = default (whitespace sep, '=' ass) */ if (prs->parser_data) { struct data_NameValue *nvdata = (struct data_NameValue *)prs->parser_data; sep = nvdata->sep; ass = nvdata->ass; } ln_instr_t instr = {0}; instr.op = OP_FIELD_NAME_VALUE; instr.flags = LN_INSTR_F_STORE; instr.data.char_to.delim = (uint8_t)sep; instr.data.char_to.ass = (uint8_t)ass; if (prs->name) { size_t nlen = strlen(prs->name); if (nlen >= sizeof(instr.data.char_to.name)) return -1; /* field name too long — fall back to v1 */ memcpy(instr.data.char_to.name, prs->name, nlen); } pc = emit(comp, &instr); } else if (op == OP_FIELD_CHAR_TO || op == OP_FIELD_STR_TO) { char delim = ' '; /* Default to space */ /* Both char-to and char-sep store their delimiter in the same * memory layout: term_chars[0..n_term_chars-1]. We cast through * data_CharSeparated which mirrors the first two fields of the * upstream data_CharTo struct (ABI-safe). * * NOTE: Do NOT use ln_DataForDisplayCharTo() here — it returns * the display string "char-to{X}", not the raw delimiter char. */ if (prs->parser_data && (prs->prsid == PRSID_CHARTO || prs->prsid == PRSID_CHARSEP)) { struct data_CharSeparated *csdata = (struct data_CharSeparated *)prs->parser_data; if (csdata->n_term_chars > 0 && csdata->term_chars) { delim = csdata->term_chars[0]; } } pc = emit_field(comp, op, prs->name, delim); } else if (op == OP_V2_IPTABLES || op == OP_CEE_SYSLOG || op == OP_CEF_HDR) { /* Simple opcodes: no parser_data config, just emit with field name */ pc = emit_field(comp, op, prs->name, ' '); } else if (op == OP_CHECKPOINT_LEA) { /* Checkpoint LEA: extract terminator from parser_data */ ln_instr_t instr = {0}; instr.op = OP_CHECKPOINT_LEA; instr.flags = LN_INSTR_F_STORE; instr.data.char_to.delim = 0; /* no terminator by default */ if (prs->parser_data) { struct data_CheckpointLEA *cpdata = (struct data_CheckpointLEA *)prs->parser_data; instr.data.char_to.delim = (uint8_t)cpdata->terminator; } if (prs->name) { size_t nlen = strlen(prs->name); if (nlen >= sizeof(instr.data.char_to.name)) return -1; /* field name too long — fall back to v1 */ memcpy(instr.data.char_to.name, prs->name, nlen); } pc = emit(comp, &instr); } else { pc = emit_field(comp, op, prs->name, ' '); } if (pc == UINT32_MAX) return -1; *out_pc = pc; if (prs->node) { uint32_t cont_entry; int r = compile_node(comp, prs->node, &cont_entry); if (r != 0) return r; } return 0; } static int compile_node(compiler_t *comp, struct ln_pdag *node, uint32_t *entry) { if (!node) return -1; comp->depth++; if (comp->depth > comp->max_depth) { comp->max_depth = comp->depth; } if (comp->depth > 200) { comp->depth--; return -1; } uint32_t first = comp->turbo->code_len; int r; if (node->nparsers == 0 && node->flags.isTerminal) { if (comp->in_custom_type == 0) { *entry = emit_match(comp, node); comp->depth--; return (*entry == UINT32_MAX) ? -1 : 0; } else { ln_instr_t ret_instr = {0}; ret_instr.op = OP_RET; *entry = emit(comp, &ret_instr); comp->depth--; return (*entry == UINT32_MAX) ? -1 : 0; } } if (node->nparsers == 0) { /* Non-terminal dead-end: no parsers to continue matching. * This is valid in the pdag (the recursive walker just fails to * match here and backtracks). Emit OP_FAIL so the VM does the * same instead of aborting the whole compilation. */ LN_DBGPRINTF(comp->ctx, "turbo: non-terminal dead-end node %p " "(nparsers=0, depth=%d) — emitting OP_FAIL", (void *)node, comp->depth); ln_instr_t fail_instr = {0}; fail_instr.op = OP_FAIL; *entry = emit(comp, &fail_instr); comp->depth--; return (*entry == UINT32_MAX) ? -1 : 0; } *entry = first; if (node->nparsers == 1) { uint32_t pc; r = compile_parser(comp, &node->parsers[0], &pc); if (r != 0) { comp->depth--; return r; } if (node->flags.isTerminal) { if (comp->in_custom_type == 0) { if (emit_match(comp, node) == UINT32_MAX) { comp->depth--; return -1; } } else { ln_instr_t ret_instr = {0}; ret_instr.op = OP_RET; if (emit(comp, &ret_instr) == UINT32_MAX) { comp->depth--; return -1; } } } comp->depth--; return 0; } uint32_t *fork_pcs = malloc(sizeof(uint32_t) * node->nparsers); if (!fork_pcs) { comp->depth--; return -1; } for (int i = 0; i < node->nparsers; i++) { if (i < node->nparsers - 1) { fork_pcs[i] = emit_fork(comp); if (fork_pcs[i] == UINT32_MAX) { free(fork_pcs); comp->depth--; return -1; } } uint32_t parser_pc; r = compile_parser(comp, &node->parsers[i], &parser_pc); if (r != 0) { free(fork_pcs); comp->depth--; return r; } if (i < node->nparsers - 1) { comp->turbo->code[fork_pcs[i]].data.jump.offset = (int32_t)comp->turbo->code_len - (int32_t)fork_pcs[i]; } } free(fork_pcs); if (node->flags.isTerminal) { if (comp->in_custom_type == 0) { if (emit_match(comp, node) == UINT32_MAX) { comp->depth--; return -1; } } else { ln_instr_t ret_instr = {0}; ret_instr.op = OP_RET; if (emit(comp, &ret_instr) == UINT32_MAX) { comp->depth--; return -1; } } } comp->depth--; return 0; } /*============================================================================ * Public API *============================================================================*/ ln_turbo_ctx_t * ln_turbo_ctx_init(void) { ln_turbo_ctx_t *turbo = calloc(1, sizeof(*turbo)); if (!turbo) return NULL; if (ln_arena_init(&turbo->arena) != 0) { free(turbo); return NULL; } if (ln_vm_init(&turbo->vm, &turbo->arena) != 0) { ln_arena_destroy(&turbo->arena); free(turbo); return NULL; } ln_fast_result_init(&turbo->result, &turbo->arena); /* Pre-allocate JSON buffer (8KB initial) */ turbo->json_buf_cap = 8192; turbo->json_buf = malloc(turbo->json_buf_cap); turbo->enabled = 1; return turbo; } void ln_turbo_ctx_free(ln_turbo_ctx_t *turbo) { if (!turbo) return; if (turbo->code) { free(turbo->code); turbo->code = NULL; } if (turbo->json_buf) { free(turbo->json_buf); turbo->json_buf = NULL; } if (turbo->arena.base) { ln_arena_destroy(&turbo->arena); } free(turbo); } int ln_turbo_compile(ln_ctx ctx) { if (!ctx || !ctx->turbo) return -1; if (!ctx->pdag) return -1; LN_DBGPRINTF(ctx, "turbo: compile entry — version=%d pdag=%p " "nparsers=%d isTerminal=%d nNodes=%d", ctx->version, (void *)ctx->pdag, ctx->pdag->nparsers, ctx->pdag->flags.isTerminal, ctx->nNodes); ln_turbo_ctx_t *turbo = ctx->turbo; free(turbo->code); turbo->code = NULL; turbo->code_len = 0; turbo->code_cap = 0; compiler_t comp = {0}; comp.turbo = turbo; comp.ctx = ctx; comp.visited.cap = 256; comp.visited.offsets = calloc(comp.visited.cap, sizeof(uint32_t)); if (!comp.visited.offsets) return -1; uint32_t entry; int r = compile_node(&comp, ctx->pdag, &entry); free(comp.visited.offsets); if (r != 0) { LN_DBGPRINTF(ctx, "turbo: compilation failed"); return -1; } emit_halt(&comp); turbo->stats.n_instructions = turbo->code_len; turbo->stats.n_rules = comp.n_rules; turbo->stats.n_branches = comp.n_branches; turbo->stats.max_depth = comp.max_depth; LN_DBGPRINTF(ctx, "turbo: compiled %u instructions, %u rules, %u branches", turbo->code_len, comp.n_rules, comp.n_branches); return 0; } int ln_turbo_is_available(ln_ctx ctx) { if(!ctx || !ctx->turbo) return 0; ln_turbo_ctx_t *turbo = (ln_turbo_ctx_t *)ctx->turbo; return turbo->enabled && turbo->code_len > 0; } /** * @brief Normalize using turbo VM and return JSON string (FAST PATH). */ int ln_turbo_normalize_to_str(ln_ctx ctx, const char *str, size_t strLen, char **json_str, size_t *json_len) { if (!ln_turbo_is_available(ctx)) return -1; ln_turbo_ctx_t *turbo = ctx->turbo; /* Reset fast result */ ln_fast_result_clear(&turbo->result); ln_arena_reset(&turbo->arena); ln_vm_reset(&turbo->vm); ln_program_t prog = { .code = turbo->code, .code_len = turbo->code_len, .name = "turbo" }; /* Execute VM with fast result */ int r = ln_vm_exec(&turbo->vm, &prog, str, strLen, &turbo->result); if (r != LN_VM_OK) { LN_DBGPRINTF(ctx, "turbo VM exec returned %d, error: %s", r, turbo->vm.error ? turbo->vm.error : "(none)"); *json_str = NULL; return -1; } /* Serialize using fast JSON (with nested object support) */ size_t est = ln_fast_json_estimate(&turbo->result); /* Grow buffer if needed */ if (est > turbo->json_buf_cap) { size_t new_cap = est + 1024; char *new_buf = realloc(turbo->json_buf, new_cap); if (!new_buf) { *json_str = NULL; return -1; } turbo->json_buf = new_buf; turbo->json_buf_cap = new_cap; } size_t outlen; if (ln_fast_to_json(&turbo->result, turbo->json_buf, turbo->json_buf_cap, &outlen) != 0) { *json_str = NULL; return -1; } /* Return allocated copy (caller will free) */ *json_str = strdup(turbo->json_buf); if (!*json_str) { return -1; } if (json_len) *json_len = outlen; /* Update stats */ turbo->stats.messages_processed++; turbo->stats.total_bytes += strLen; return 0; } /** * @brief Legacy function returning fastjson structure. */ int ln_turbo_normalize(ln_ctx ctx, const char *str, size_t strLen, struct json_object **json_p) { char *json_str; size_t json_len; if (ln_turbo_normalize_to_str(ctx, str, strLen, &json_str, &json_len) != 0) { *json_p = NULL; return -1; } /* Parse JSON string to json_object */ *json_p = json_tokener_parse(json_str); free(json_str); if (!*json_p) { return -1; } return 0; } int ln_turbo_get_stats(ln_ctx ctx, ln_turbo_stats_t *stats) { if (!ctx || !ctx->turbo || !stats) return -1; ln_turbo_ctx_t *turbo = (ln_turbo_ctx_t *)ctx->turbo; *stats = turbo->stats; return 0; } /*============================================================================ * Direct Result Access API (zero JSON overhead) *============================================================================*/ /** * @brief Normalize using turbo VM - direct result access (FASTEST PATH). * * Returns a pointer to the internal result structure. No JSON serialization, * no string conversion, no memory allocation. The result is valid until the * next normalize call on the same context. * * This is the optimal API for rsyslog integration: the caller can iterate * over typed fields and build json_object* directly, avoiding the * serialize-to-JSON-string + parse-JSON-string roundtrip. */ int ln_turbo_normalize_raw(ln_ctx ctx, const char *str, size_t strLen, const ln_fast_result_t **result) { if (!ln_turbo_is_available(ctx)) return -1; if (!result) return -1; ln_turbo_ctx_t *turbo = ctx->turbo; /* Reset per-message state */ ln_fast_result_clear(&turbo->result); ln_arena_reset(&turbo->arena); ln_vm_reset(&turbo->vm); ln_program_t prog = { .code = turbo->code, .code_len = turbo->code_len, .name = "turbo" }; /* Execute VM */ int r = ln_vm_exec(&turbo->vm, &prog, str, strLen, &turbo->result); if (r != LN_VM_OK) { LN_DBGPRINTF(ctx, "turbo VM exec returned %d, error: %s", r, turbo->vm.error ? turbo->vm.error : "(none)"); *result = NULL; return -1; } *result = &turbo->result; /* Update stats */ turbo->stats.messages_processed++; turbo->stats.total_bytes += strLen; return 0; } /*============================================================================ * Fast Result Accessor Functions *============================================================================*/ int ln_fast_result_field_count(const ln_fast_result_t *r) { return r ? r->n_fields : 0; } int ln_fast_result_get_field(const ln_fast_result_t *r, int idx, const char **name, size_t *nlen, const char **value, size_t *vlen) { if (!r || idx < 0 || idx >= r->n_fields) return -1; const ln_fast_field_t *f = &r->fields[idx]; *name = f->name; *nlen = f->name_len; switch (f->type) { case LN_FTYPE_STRING: *value = f->v.str.ptr; *vlen = f->v.str.len; break; case LN_FTYPE_STRING_INLINE: *value = f->v.inl; *vlen = strlen(f->v.inl); break; default: /* Non-string types: caller should use typed accessors */ *value = NULL; *vlen = 0; break; } return 0; } int ln_fast_result_get_string(const ln_fast_result_t *r, const char *name, const char **value, size_t *vlen) { if (!r || !name) return -1; for (int i = 0; i < r->n_fields; i++) { if (r->fields[i].name && strcmp(r->fields[i].name, name) == 0) { const ln_fast_field_t *f = &r->fields[i]; switch (f->type) { case LN_FTYPE_STRING: *value = f->v.str.ptr; *vlen = f->v.str.len; return 0; case LN_FTYPE_STRING_INLINE: *value = f->v.inl; *vlen = strlen(f->v.inl); return 0; default: return -1; } } } return -1; } int ln_fast_result_get_int(const ln_fast_result_t *r, const char *name, int64_t *value) { if (!r || !name) return -1; for (int i = 0; i < r->n_fields; i++) { if (r->fields[i].type == LN_FTYPE_INT && r->fields[i].name && strcmp(r->fields[i].name, name) == 0) { *value = r->fields[i].v.i; return 0; } } return -1; } int ln_fast_result_tag_count(const ln_fast_result_t *r) { return r ? r->n_tags : 0; } const char * ln_fast_result_get_tag(const ln_fast_result_t *r, int idx) { if (!r || idx < 0 || idx >= r->n_tags) return NULL; return r->tags[idx].tag; } int ln_fast_result_has_tag(const ln_fast_result_t *r, const char *tag) { return ln_fast_has_tag(r, tag); } const char * ln_fast_result_get_rule_id(const ln_fast_result_t *r) { return r ? r->rule_id : NULL; } /*============================================================================ * Snapshot API (for rsyslog zero-JSON hot path) *============================================================================*/ /** * @brief Create a snapshot of the current turbo result. * * Must be called after a successful ln_turbo_normalize_raw() and before * the next normalize call (which resets the arena). The snapshot is a * self-contained deep copy that can be attached to smsg_t. * * @param ctx liblognorm context with a valid turbo result * @return Snapshot (caller owns), or NULL on failure */ ln_fast_result_snapshot_t * ln_turbo_snapshot_result(ln_ctx ctx) { if (!ctx || !ctx->turbo) return NULL; ln_turbo_ctx_t *turbo = ctx->turbo; return ln_fast_result_snapshot_create(&turbo->result, &turbo->arena); } /*============================================================================ * Bytecode Disassembly (for diagnostic / LN_VM_TRACE builds) *============================================================================*/ void ln_turbo_disasm(ln_ctx ctx, FILE *fp, const char *label) { #ifdef LN_VM_TRACE if (!ctx || !ctx->turbo || !fp) return; ln_turbo_ctx_t *turbo = ctx->turbo; if (turbo->code_len == 0) { fprintf(fp, "[%s] (no compiled bytecode)\n", label ? label : "turbo"); return; } ln_program_t p = ln_program_make(turbo->code, turbo->code_len, label ? label : "turbo"); ln_program_disasm(&p, fp); #else (void)ctx; (void)fp; (void)label; #endif } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/src/turbo.h000066400000000000000000000216211520037563000157540ustar00rootroot00000000000000/** * @file turbo.h * @brief liblognorm integration for TurboVM bytecode engine *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_H_INCLUDED #define LIBLOGNORM_TURBO_H_INCLUDED /* When building liblognorm itself, config.h provides ENABLE_TURBO. * When included from external projects (e.g. rsyslog), the consumer * should define LOGNORM_TURBO_SUPPORTED (from lognorm-features.h). */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #if defined(ENABLE_TURBO) || defined(LOGNORM_TURBO_SUPPORTED) #include #include #include #ifdef __cplusplus extern "C" { #endif /* Forward declarations */ typedef struct ln_ctx_s *ln_ctx; struct json_object; /* libfastjson (for legacy API only) */ /*============================================================================ * Turbo VM State (opaque) *============================================================================*/ typedef struct ln_turbo_ctx_s ln_turbo_ctx_t; /*============================================================================ * Fast Result Structure (opaque for external use) *============================================================================*/ typedef struct ln_fast_result_s ln_fast_result_t; /*============================================================================ * Context Management *============================================================================*/ /** * Initialize turbo VM context. * Call from ln_initCtx(). */ ln_turbo_ctx_t *ln_turbo_ctx_init(void); /** * Free turbo VM context. * Call from ln_exitCtx(). */ void ln_turbo_ctx_free(ln_turbo_ctx_t *turbo); /** * Compile PDAG to VM bytecode. * Call after ln_loadSamples() completes successfully. * * @param ctx liblognorm context with loaded PDAG * @return 0 on success, -1 on error (non-fatal, will use recursive walker) */ int ln_turbo_compile(ln_ctx ctx); /** * Check if turbo VM is available for this context. */ int ln_turbo_is_available(ln_ctx ctx); /*============================================================================ * Normalization API - Choose based on your needs *============================================================================*/ /** * Normalize using turbo VM - JSON string output. * Best for CLI tools and JSON pipelines. * * @param ctx liblognorm context * @param str Input string to normalize * @param strLen Length of input string * @param json_str Receives JSON string (caller must free) * @param json_len Receives JSON string length * @return 0 on match, negative on no match/error */ int ln_turbo_normalize_to_str(ln_ctx ctx, const char *str, size_t strLen, char **json_str, size_t *json_len); /** * Normalize using turbo VM - direct result access. * Best for rsyslog - ZERO JSON overhead! * * Result is valid until next normalize call on same context. * DO NOT free the result - it's owned by the turbo context. * * @param ctx liblognorm context * @param str Input string to normalize * @param strLen Length of input string * @param result Receives pointer to internal result structure * @return 0 on match, negative on no match/error */ int ln_turbo_normalize_raw(ln_ctx ctx, const char *str, size_t strLen, const ln_fast_result_t **result); /** * Normalize using turbo VM - libfastjson object output. * Legacy API for backward compatibility. Avoid if possible. * * @param ctx liblognorm context * @param str Input string to normalize * @param strLen Length of input string * @param json_p Receives JSON result object (caller must free) * @return 0 on match, negative on no match/error */ int ln_turbo_normalize(ln_ctx ctx, const char *str, size_t strLen, struct json_object **json_p); /*============================================================================ * Direct Field Access API (for rsyslog - no JSON overhead) *============================================================================*/ /** * Get number of fields in result. */ int ln_fast_result_field_count(const ln_fast_result_t *r); /** * Get field by index. * * @param r Result * @param idx Field index (0 to field_count-1) * @param name Receives field name pointer (do not free) * @param nlen Receives name length * @param value Receives value pointer (do not free) * @param vlen Receives value length * @return 0 on success, -1 if index out of range */ int ln_fast_result_get_field(const ln_fast_result_t *r, int idx, const char **name, size_t *nlen, const char **value, size_t *vlen); /** * Get string field by name. * * @param r Result * @param name Field name to find * @param value Receives value pointer * @param vlen Receives value length * @return 0 if found, -1 if not found */ int ln_fast_result_get_string(const ln_fast_result_t *r, const char *name, const char **value, size_t *vlen); /** * Get integer field by name. * * @param r Result * @param name Field name to find * @param value Receives integer value * @return 0 if found, -1 if not found or not an integer */ int ln_fast_result_get_int(const ln_fast_result_t *r, const char *name, int64_t *value); /** * Get number of tags in result. */ int ln_fast_result_tag_count(const ln_fast_result_t *r); /** * Get tag by index. * * @param r Result * @param idx Tag index (0 to tag_count-1) * @return Tag string or NULL if index out of range */ const char *ln_fast_result_get_tag(const ln_fast_result_t *r, int idx); /** * Check if result has a specific tag. * * @param r Result * @param tag Tag to check * @return 1 if tag present, 0 if not */ int ln_fast_result_has_tag(const ln_fast_result_t *r, const char *tag); /** * Get matched rule ID. * * @param r Result * @return Rule ID string or NULL if no match */ const char *ln_fast_result_get_rule_id(const ln_fast_result_t *r); /*============================================================================ * Snapshot API (for rsyslog zero-JSON hot path) *============================================================================*/ /** * Forward declare snapshot type. * Full definition in turbo_snapshot.h. */ typedef struct ln_fast_result_snapshot_s ln_fast_result_snapshot_t; /** * Create a snapshot of the current turbo parse result. * * Must be called after a successful ln_turbo_normalize_raw() and before * the next normalize call on the same context (which resets the arena). * * The snapshot is a self-contained deep copy: single allocation containing * the result struct + arena data, with all pointers rebased. * * @param ctx liblognorm context with a valid turbo result * @return Snapshot (caller must free with ln_fast_result_snapshot_free), * or NULL on failure */ ln_fast_result_snapshot_t *ln_turbo_snapshot_result(ln_ctx ctx); /*============================================================================ * Statistics *============================================================================*/ typedef struct { uint32_t n_instructions; /**< Instructions in compiled program */ uint32_t n_rules; /**< Number of rules */ uint32_t n_branches; /**< Branch instructions (FORK) */ uint32_t max_depth; /**< Max PDAG depth */ uint64_t messages_processed;/**< Messages normalized */ uint64_t total_bytes; /**< Total bytes processed */ uint64_t vm_time_ns; /**< Time spent in VM (nanoseconds) */ } ln_turbo_stats_t; /** * Get turbo VM statistics. */ int ln_turbo_get_stats(ln_ctx ctx, ln_turbo_stats_t *stats); /** * Dump compiled bytecode disassembly to a file stream. * Useful for verifying per-worker bytecode matches the standalone tool. * Only available when liblognorm is built with -DLN_VM_TRACE. * * @param ctx liblognorm context with compiled turbo bytecode * @param fp Output stream (e.g. stderr) * @param label Label to prefix the disassembly (e.g. "worker-turbo") */ void ln_turbo_disasm(ln_ctx ctx, FILE *fp, const char *label); #ifdef __cplusplus } #endif #else /* !ENABLE_TURBO && !LOGNORM_TURBO_SUPPORTED */ /* Stub definitions when turbo is disabled */ typedef void *ln_turbo_ctx_t; typedef void *ln_fast_result_t; typedef void *ln_fast_result_snapshot_t; #define ln_turbo_ctx_init() NULL #define ln_turbo_ctx_free(t) ((void)(t)) #define ln_turbo_compile(ctx) (-1) #define ln_turbo_is_available(ctx) (0) #define ln_turbo_normalize(ctx, str, len, json) (-1) #define ln_turbo_normalize_to_str(ctx, str, len, js, jl) (-1) #define ln_turbo_normalize_raw(ctx, str, len, r) (-1) #define ln_turbo_snapshot_result(ctx) ((void*)0) #define ln_fast_result_snapshot_get(snap) ((void*)0) #define ln_fast_result_snapshot_free(snap) ((void)(snap)) #define ln_turbo_disasm(ctx, fp, label) ((void)0) #endif /* ENABLE_TURBO || LOGNORM_TURBO_SUPPORTED */ #endif /* LIBLOGNORM_TURBO_H_INCLUDED */ liblognorm-2.1.0/src/turbo_arena.c000066400000000000000000000224511520037563000171170ustar00rootroot00000000000000/* * turbo_arena.c -- High-performance arena allocator * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "turbo_arena.h" #include #include #include /*============================================================================ * Internal Helpers *============================================================================*/ /** * @brief Check if a value is a power of two. */ static inline bool is_power_of_two(size_t x) { return x && !(x & (x - 1)); } /** * @brief Align a size/offset up to the specified alignment. * * @param value Value to align * @param alignment Alignment boundary (must be power of 2) * @return Aligned value (>= original value) */ static inline size_t align_up(size_t value, size_t alignment) { return (value + alignment - 1) & ~(alignment - 1); } /** * @brief Clamp a capacity value to valid range. */ static inline size_t clamp_capacity(size_t capacity) { if (capacity < LN_ARENA_MIN_CAPACITY) { return LN_ARENA_MIN_CAPACITY; } if (capacity > LN_ARENA_MAX_CAPACITY) { return LN_ARENA_MAX_CAPACITY; } return capacity; } /** * @brief Allocate aligned memory from the system. * * Uses posix_memalign on POSIX systems, _aligned_malloc on Windows. * * @param size Number of bytes to allocate * @param alignment Alignment boundary * @return Aligned pointer, or NULL on failure */ static void * system_aligned_alloc(size_t size, size_t alignment) { void *ptr = NULL; #if defined(_WIN32) || defined(_WIN64) ptr = _aligned_malloc(size, alignment); #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__APPLE__) /* C11 aligned_alloc - size must be multiple of alignment */ size_t aligned_size = align_up(size, alignment); ptr = aligned_alloc(alignment, aligned_size); #else /* POSIX posix_memalign */ if (posix_memalign(&ptr, alignment, size) != 0) { ptr = NULL; } #endif return ptr; } /** * @brief Free aligned memory allocated by system_aligned_alloc. */ static void system_aligned_free(void *ptr) { #if defined(_WIN32) || defined(_WIN64) _aligned_free(ptr); #else free(ptr); #endif } /** * @brief Check for addition overflow. */ static inline bool would_overflow_add(size_t a, size_t b) { return a > SIZE_MAX - b; } /*============================================================================ * Lifecycle Functions *============================================================================*/ int ln_arena_init(ln_arena_t *arena) { return ln_arena_init_sized(arena, LN_ARENA_DEFAULT_CAPACITY); } int ln_arena_init_sized(ln_arena_t *arena, size_t capacity) { if (!arena) { return LN_ARENA_EINVAL; } /* Zero the structure first */ memset(arena, 0, sizeof(*arena)); /* Clamp and align capacity */ capacity = clamp_capacity(capacity); capacity = align_up(capacity, LN_ARENA_CACHE_LINE); /* Allocate cache-line-aligned memory for optimal performance */ arena->base = (uint8_t *)system_aligned_alloc(capacity, LN_ARENA_CACHE_LINE); if (!arena->base) { return LN_ARENA_ENOMEM; } arena->capacity = capacity; arena->used = 0; arena->peak = 0; arena->alloc_count = 0; arena->flags = LN_ARENA_FLAG_OWNED; return LN_ARENA_OK; } int ln_arena_init_static(ln_arena_t *arena, void *buffer, size_t capacity) { if (!arena || !buffer) { return LN_ARENA_EINVAL; } if (capacity < LN_ARENA_MIN_CAPACITY) { return LN_ARENA_EINVAL; } memset(arena, 0, sizeof(*arena)); arena->base = (uint8_t *)buffer; arena->capacity = capacity; arena->used = 0; arena->peak = 0; arena->alloc_count = 0; arena->flags = LN_ARENA_FLAG_STATIC; return LN_ARENA_OK; } void ln_arena_destroy(ln_arena_t *arena) { if (!arena) { return; } /* Only free if we own the memory */ if ((arena->flags & LN_ARENA_FLAG_OWNED) && arena->base) { system_aligned_free(arena->base); } /* Zero the structure for safety */ memset(arena, 0, sizeof(*arena)); } /*============================================================================ * Allocation Functions *============================================================================*/ void * ln_arena_alloc(ln_arena_t *arena, size_t size) { return ln_arena_alloc_aligned(arena, size, LN_ARENA_DEFAULT_ALIGN); } void * ln_arena_alloc_aligned(ln_arena_t *arena, size_t size, size_t alignment) { size_t current; size_t aligned_offset; size_t new_used; if (!arena || !arena->base) { return NULL; } /* Validate alignment */ if (alignment == 0 || !is_power_of_two(alignment) || alignment > 4096) { return NULL; } /* Zero-size allocations return a valid unique pointer */ if (size == 0) { size = 1; } /* Calculate aligned offset */ current = arena->used; aligned_offset = align_up(current, alignment); /* Check for alignment overflow */ if (aligned_offset < current) { return NULL; /* Overflow in alignment calculation */ } /* Check for size overflow */ if (would_overflow_add(aligned_offset, size)) { return NULL; } new_used = aligned_offset + size; /* Check capacity */ if (new_used > arena->capacity) { return NULL; /* Arena exhausted */ } /* Commit allocation */ arena->used = new_used; arena->alloc_count++; /* Update peak usage */ if (new_used > arena->peak) { arena->peak = new_used; } return arena->base + aligned_offset; } void * ln_arena_calloc(ln_arena_t *arena, size_t size) { void *ptr = ln_arena_alloc(arena, size); if (ptr) { memset(ptr, 0, size); } return ptr; } char * ln_arena_strdup(ln_arena_t *arena, const char *str) { if (!str) { return NULL; } return ln_arena_strndup(arena, str, strlen(str)); } char * ln_arena_strndup(ln_arena_t *arena, const char *str, size_t len) { char *copy; if (!arena || !str) { return NULL; } /* Check for overflow when adding null terminator */ if (len == SIZE_MAX) { return NULL; } /* Allocate space for string + null terminator */ copy = (char *)ln_arena_alloc(arena, len + 1); if (!copy) { return NULL; } memcpy(copy, str, len); copy[len] = '\0'; return copy; } void * ln_arena_memdup(ln_arena_t *arena, const void *src, size_t size) { void *copy; if (!arena || !src || size == 0) { return NULL; } copy = ln_arena_alloc(arena, size); if (!copy) { return NULL; } memcpy(copy, src, size); return copy; } /*============================================================================ * Reset and Backtracking *============================================================================*/ void ln_arena_reset(ln_arena_t *arena) { if (!arena) { return; } arena->used = 0; arena->alloc_count = 0; /* Note: peak is intentionally preserved across resets */ } void ln_arena_save(const ln_arena_t *arena, ln_arena_mark_t *mark) { if (!arena || !mark) { return; } mark->used = arena->used; mark->alloc_count = arena->alloc_count; } void ln_arena_restore(ln_arena_t *arena, const ln_arena_mark_t *mark) { if (!arena || !mark) { return; } /* Sanity check: can't restore to a point beyond current usage */ if (mark->used > arena->used) { return; } arena->used = mark->used; arena->alloc_count = mark->alloc_count; } /*============================================================================ * Query Functions *============================================================================*/ void ln_arena_get_stats(const ln_arena_t *arena, ln_arena_stats_t *stats) { if (!stats) { return; } memset(stats, 0, sizeof(*stats)); if (!arena) { return; } stats->capacity = arena->capacity; stats->used = arena->used; stats->peak = arena->peak; stats->available = arena->capacity - arena->used; stats->alloc_count = arena->alloc_count; if (arena->capacity > 0) { stats->utilization = (double)arena->used / (double)arena->capacity; } } /*============================================================================ * Debug/Test Support *============================================================================*/ #ifdef LN_ARENA_DEBUG #include /** * @brief Dump arena state to stderr (debug builds only). */ void ln_arena_dump(const ln_arena_t *arena, const char *label) { if (!arena) { fprintf(stderr, "[ARENA] %s: NULL\n", label ? label : "arena"); return; } fprintf(stderr, "[ARENA] %s: base=%p capacity=%zu used=%zu peak=%zu " "alloc_count=%u flags=0x%x\n", label ? label : "arena", (void *)arena->base, arena->capacity, arena->used, arena->peak, arena->alloc_count, arena->flags); } #endif /* LN_ARENA_DEBUG */ liblognorm-2.1.0/src/turbo_arena.h000066400000000000000000000341401520037563000171220ustar00rootroot00000000000000/** * @file turbo_arena.h * @brief High-performance arena allocator *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_ARENA_H_INCLUDED #define LIBLOGNORM_TURBO_ARENA_H_INCLUDED #include #include #include #ifdef __cplusplus extern "C" { #endif /*============================================================================ * Configuration *============================================================================*/ /** * Default arena capacity (bytes). * Sized for typical log messages with 20-30 extracted fields. * 16KB fits in L1 cache on most modern CPUs. */ #define LN_ARENA_DEFAULT_CAPACITY 16384 /** * Minimum arena capacity (bytes). * Must be at least one cache line. */ #define LN_ARENA_MIN_CAPACITY 64 /** * Maximum arena capacity (bytes). * Prevents accidental huge allocations. 16MB should handle any message. */ #define LN_ARENA_MAX_CAPACITY (16 * 1024 * 1024) /** * Default alignment for allocations (bytes). * 8-byte alignment is sufficient for most data types. */ #define LN_ARENA_DEFAULT_ALIGN 8 /** * Cache line size for alignment hints. */ #define LN_ARENA_CACHE_LINE 64 /*============================================================================ * Error Codes *============================================================================*/ #define LN_ARENA_OK 0 /**< Success */ #define LN_ARENA_EINVAL -1 /**< Invalid argument */ #define LN_ARENA_ENOMEM -2 /**< Out of memory (system) */ #define LN_ARENA_EOVERFLOW -3 /**< Arena capacity exceeded */ #define LN_ARENA_EALIGN -4 /**< Invalid alignment */ /*============================================================================ * Data Structures *============================================================================*/ /** * Arena memory region. * * Memory layout: * ┌────────────────────────────────────────────────────────────┠* │ Allocated region (used) │ Free region (available) │ * └────────────────────────────────────────────────────────────┘ * ^ ^ ^ * base base + used base + capacity * * Allocations grow upward from base. Reset returns used to zero. */ typedef struct ln_arena_s { uint8_t *base; /**< Base pointer of allocated region */ size_t capacity; /**< Total capacity in bytes */ size_t used; /**< Currently used bytes */ size_t peak; /**< Peak usage (high water mark) */ uint32_t alloc_count;/**< Number of allocations (for stats) */ uint32_t flags; /**< Arena flags */ } ln_arena_t; /** * Arena flags. */ #define LN_ARENA_FLAG_OWNED 0x0001 /**< Arena owns the memory (should free) */ #define LN_ARENA_FLAG_STATIC 0x0002 /**< Arena uses static/external buffer */ /** * Arena mark for save/restore during backtracking. * * Usage pattern: * ln_arena_mark_t mark; * ln_arena_save(arena, &mark); * // ... attempt parse, allocate fields ... * if (parse_failed) { * ln_arena_restore(arena, &mark); // rollback allocations * } */ typedef struct ln_arena_mark_s { size_t used; /**< Saved used offset */ uint32_t alloc_count;/**< Saved allocation count */ } ln_arena_mark_t; /** * Arena statistics. */ typedef struct ln_arena_stats_s { size_t capacity; /**< Total capacity */ size_t used; /**< Current usage */ size_t peak; /**< Peak usage */ size_t available; /**< Remaining capacity */ uint32_t alloc_count; /**< Total allocations since last reset */ uint32_t reset_count; /**< Number of resets (if tracked externally) */ double utilization; /**< Current utilization (0.0 - 1.0) */ } ln_arena_stats_t; /*============================================================================ * Lifecycle Functions *============================================================================*/ /** * @brief Initialize an arena with default capacity. * * Allocates LN_ARENA_DEFAULT_CAPACITY bytes of memory for the arena. * The arena takes ownership of the allocated memory. * * @param[out] arena Arena to initialize (must not be NULL) * @return LN_ARENA_OK on success, negative error code on failure * * @note Call ln_arena_destroy() to free resources. * * Example: * @code * ln_arena_t arena; * if (ln_arena_init(&arena) != LN_ARENA_OK) { * // handle error * } * // ... use arena ... * ln_arena_destroy(&arena); * @endcode */ int ln_arena_init(ln_arena_t *arena); /** * @brief Initialize an arena with specified capacity. * * @param[out] arena Arena to initialize (must not be NULL) * @param[in] capacity Desired capacity in bytes * @return LN_ARENA_OK on success, negative error code on failure * * @note Capacity is clamped to [LN_ARENA_MIN_CAPACITY, LN_ARENA_MAX_CAPACITY] */ int ln_arena_init_sized(ln_arena_t *arena, size_t capacity); /** * @brief Initialize an arena using an external buffer. * * The arena does NOT take ownership of the buffer. The caller must ensure * the buffer remains valid for the lifetime of the arena. * * @param[out] arena Arena to initialize (must not be NULL) * @param[in] buffer External buffer (must not be NULL) * @param[in] capacity Buffer size in bytes * @return LN_ARENA_OK on success, negative error code on failure * * @note ln_arena_destroy() will NOT free the external buffer. * * Example: * @code * uint8_t stack_buffer[4096]; * ln_arena_t arena; * ln_arena_init_static(&arena, stack_buffer, sizeof(stack_buffer)); * // ... use arena ... * ln_arena_destroy(&arena); // safe, does not free stack_buffer * @endcode */ int ln_arena_init_static(ln_arena_t *arena, void *buffer, size_t capacity); /** * @brief Destroy an arena and free its resources. * * If the arena owns its memory (allocated via ln_arena_init or * ln_arena_init_sized), the memory is freed. Static arenas are * simply zeroed. * * @param[in,out] arena Arena to destroy (may be NULL, no-op if so) * * @note Safe to call multiple times on the same arena. * @note After destroy, arena is zeroed and safe to re-init. */ void ln_arena_destroy(ln_arena_t *arena); /*============================================================================ * Allocation Functions *============================================================================*/ /** * @brief Allocate memory from the arena. * * Returns a pointer to at least `size` bytes of memory, aligned to * LN_ARENA_DEFAULT_ALIGN (8 bytes). The memory is NOT initialized. * * @param[in] arena Arena to allocate from (must not be NULL) * @param[in] size Number of bytes to allocate * @return Pointer to allocated memory, or NULL if arena is exhausted * * @note Returned pointer is valid until arena is reset or destroyed. * @note For aligned allocations, use ln_arena_alloc_aligned(). */ void *ln_arena_alloc(ln_arena_t *arena, size_t size); /** * @brief Allocate aligned memory from the arena. * * Returns a pointer to at least `size` bytes of memory, aligned to * the specified alignment boundary. * * @param[in] arena Arena to allocate from (must not be NULL) * @param[in] size Number of bytes to allocate * @param[in] alignment Alignment boundary (must be power of 2, max 4096) * @return Pointer to allocated memory, or NULL on failure * * @note Use for SIMD data requiring 16, 32, or 64-byte alignment. * * Example: * @code * // Allocate 64-byte aligned buffer for SIMD operations * void *simd_buf = ln_arena_alloc_aligned(arena, 256, 64); * @endcode */ void *ln_arena_alloc_aligned(ln_arena_t *arena, size_t size, size_t alignment); /** * @brief Allocate and zero-initialize memory from the arena. * * Equivalent to ln_arena_alloc() followed by memset to zero. * * @param[in] arena Arena to allocate from * @param[in] size Number of bytes to allocate * @return Pointer to zero-initialized memory, or NULL on failure */ void *ln_arena_calloc(ln_arena_t *arena, size_t size); /** * @brief Duplicate a string into the arena. * * Allocates strlen(str)+1 bytes and copies the string including * the null terminator. * * @param[in] arena Arena to allocate from * @param[in] str String to duplicate (must not be NULL) * @return Pointer to duplicated string, or NULL on failure */ char *ln_arena_strdup(ln_arena_t *arena, const char *str); /** * @brief Duplicate a string with known length into the arena. * * Allocates len+1 bytes, copies len bytes, and null-terminates. * More efficient than ln_arena_strdup() when length is known. * * @param[in] arena Arena to allocate from * @param[in] str String to duplicate (must not be NULL) * @param[in] len Number of characters to copy (excluding null terminator) * @return Pointer to duplicated string, or NULL on failure * * @note The source string does not need to be null-terminated. */ char *ln_arena_strndup(ln_arena_t *arena, const char *str, size_t len); /** * @brief Copy memory into the arena. * * Allocates `size` bytes and copies data from `src`. * * @param[in] arena Arena to allocate from * @param[in] src Source data to copy * @param[in] size Number of bytes to copy * @return Pointer to copied data, or NULL on failure */ void *ln_arena_memdup(ln_arena_t *arena, const void *src, size_t size); /*============================================================================ * Reset and Backtracking *============================================================================*/ /** * @brief Reset the arena for reuse. * * Returns the arena to its initial empty state. All allocations become * invalid. The underlying memory is retained for reuse. * * This is the primary mechanism for per-message arena reuse: * @code * for (each message) { * ln_arena_reset(&arena); * // ... parse message, allocate fields ... * } * @endcode * * @param[in,out] arena Arena to reset * * @note Peak usage statistic is preserved across resets. * @note This is O(1) - does not touch the memory. */ void ln_arena_reset(ln_arena_t *arena); /** * @brief Save current arena position for later restore. * * Creates a checkpoint that can be restored with ln_arena_restore(). * Used for backtracking during parsing when a parse path fails. * * @param[in] arena Arena to save state from * @param[out] mark Mark structure to save state into * * @note Marks can be nested (save multiple marks, restore in reverse order). */ void ln_arena_save(const ln_arena_t *arena, ln_arena_mark_t *mark); /** * @brief Restore arena to a previously saved position. * * All allocations made after the mark was saved become invalid. * The arena's used pointer is reset to the marked position. * * @param[in,out] arena Arena to restore * @param[in] mark Mark to restore to * * @note The mark must have been created from the same arena. * @note Restoring invalidates any marks created after this one. */ void ln_arena_restore(ln_arena_t *arena, const ln_arena_mark_t *mark); /*============================================================================ * Query Functions *============================================================================*/ /** * @brief Get remaining capacity in the arena. * * @param[in] arena Arena to query * @return Number of bytes available for allocation */ static inline size_t ln_arena_available(const ln_arena_t *arena) { return arena ? arena->capacity - arena->used : 0; } /** * @brief Get current usage of the arena. * * @param[in] arena Arena to query * @return Number of bytes currently allocated */ static inline size_t ln_arena_used(const ln_arena_t *arena) { return arena ? arena->used : 0; } /** * @brief Get peak usage of the arena. * * Returns the maximum `used` value seen since arena creation. * Useful for tuning arena capacity. * * @param[in] arena Arena to query * @return Peak number of bytes allocated */ static inline size_t ln_arena_peak(const ln_arena_t *arena) { return arena ? arena->peak : 0; } /** * @brief Get total capacity of the arena. * * @param[in] arena Arena to query * @return Total capacity in bytes */ static inline size_t ln_arena_capacity(const ln_arena_t *arena) { return arena ? arena->capacity : 0; } /** * @brief Check if arena has enough space for an allocation. * * @param[in] arena Arena to check * @param[in] size Desired allocation size * @return true if allocation would succeed, false otherwise * * @note Does not account for alignment padding. */ static inline bool ln_arena_has_space(const ln_arena_t *arena, size_t size) { return arena && (arena->capacity - arena->used) >= size; } /** * @brief Get detailed arena statistics. * * @param[in] arena Arena to query * @param[out] stats Statistics structure to fill */ void ln_arena_get_stats(const ln_arena_t *arena, ln_arena_stats_t *stats); /*============================================================================ * Utility Macros *============================================================================*/ /** * @brief Allocate a typed object from the arena. * * Example: * @code * my_struct_t *obj = LN_ARENA_NEW(arena, my_struct_t); * @endcode */ #define LN_ARENA_NEW(arena, type) \ ((type *)ln_arena_alloc((arena), sizeof(type))) /** * @brief Allocate an array of typed objects from the arena. * * Example: * @code * int *array = LN_ARENA_ARRAY(arena, int, 100); * @endcode */ #define LN_ARENA_ARRAY(arena, type, count) \ ((type *)ln_arena_alloc((arena), sizeof(type) * (count))) /** * @brief Allocate an aligned typed object from the arena. * * Example: * @code * __m128i *vec = LN_ARENA_NEW_ALIGNED(arena, __m128i, 16); * @endcode */ #define LN_ARENA_NEW_ALIGNED(arena, type, align) \ ((type *)ln_arena_alloc_aligned((arena), sizeof(type), (align))) #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_ARENA_H_INCLUDED */ liblognorm-2.1.0/src/turbo_json.h000066400000000000000000000010421520037563000170000ustar00rootroot00000000000000/** * @file turbo_json.h * @brief JSON serialization for turbo results *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_JSON_H_INCLUDED #define LIBLOGNORM_TURBO_JSON_H_INCLUDED #include "config.h" #ifdef ENABLE_TURBO #include "turbo_result_fast.h" #include #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus } #endif #endif /* ENABLE_TURBO */ #endif /* LIBLOGNORM_TURBO_JSON_H_INCLUDED */ liblognorm-2.1.0/src/turbo_json_fast.c000066400000000000000000000022671520037563000200220ustar00rootroot00000000000000/* * turbo_json_fast.c -- Fast JSON serialization declarations * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #ifdef ENABLE_TURBO /* See turbo_json_impl.c */ #endif liblognorm-2.1.0/src/turbo_json_fast.h000066400000000000000000000007611520037563000200240ustar00rootroot00000000000000/** * @file turbo_json_fast.h * @brief Fast JSON serialization declarations *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_JSON_FAST_H_INCLUDED #define LIBLOGNORM_TURBO_JSON_FAST_H_INCLUDED #include "turbo_result_fast.h" #include #ifdef __cplusplus extern "C" { #endif #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_JSON_FAST_H_INCLUDED */ liblognorm-2.1.0/src/turbo_json_impl.c000066400000000000000000000427541520037563000200330ustar00rootroot00000000000000/* * turbo_json_impl.c -- Ultra-fast JSON serialization with nested object support * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include "turbo_json.h" #include "turbo_json_fast.h" #include "turbo_result_fast.h" #include #include #include /* JSON escaping needs only SSE2/NEON, not SSE4.2 — separate from LN_SIMD_* in simd.h */ #ifdef __ARM_NEON #include #define HAS_SIMD 1 #elif defined(__SSE2__) #include #define HAS_SIMD 1 #else #define HAS_SIMD 0 #endif /*============================================================================ * Fast Integer Formatting *============================================================================*/ static const char digit_pairs[201] = "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899"; static inline int fast_i64_to_str(int64_t val, char *buf) { char tmp[21]; char *p = tmp + 20; int neg = 0; int len; uint64_t uval; *p = '\0'; if (val < 0) { neg = 1; uval = ~(uint64_t)val + 1u; } else { uval = (uint64_t)val; } while (uval >= 100) { unsigned idx = (uval % 100) * 2; uval /= 100; *--p = digit_pairs[idx + 1]; *--p = digit_pairs[idx]; } if (uval >= 10) { unsigned idx = (unsigned)uval * 2; *--p = digit_pairs[idx + 1]; *--p = digit_pairs[idx]; } else { *--p = '0' + (char)uval; } if (neg) *--p = '-'; len = (int)(tmp + 20 - p); memcpy(buf, p, len); return len; } /*============================================================================ * Fast String Escaping *============================================================================*/ /* Lookup table: 1 if char needs escaping */ static const uint8_t needs_escape[256] = { /* 0x00-0x1F: control chars need escaping */ 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, /* 0x20-0x7F: printable, except " and \ */ 0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0, /* " at 0x22 */ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0, /* \ at 0x5C */ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, /* 0x80-0xFF: UTF-8 continuation, pass through */ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, }; /** * @brief Check if string needs escaping (fast path). * * Returns position of first char needing escape, or len if clean. */ static inline size_t find_escape_needed(const char *s, size_t len) { #if HAS_SIMD && defined(__ARM_NEON) /* NEON: check 16 bytes at a time */ size_t i = 0; /* Characters that need escaping: < 0x20, 0x22 ("), 0x5C (\) */ uint8x16_t v_space = vdupq_n_u8(0x20); uint8x16_t v_quote = vdupq_n_u8('"'); uint8x16_t v_bslash = vdupq_n_u8('\\'); while (i + 16 <= len) { uint8x16_t v = vld1q_u8((const uint8_t *)(s + i)); /* Check for control chars (< 0x20) */ uint8x16_t ctrl = vcltq_u8(v, v_space); /* Check for quote */ uint8x16_t quote = vceqq_u8(v, v_quote); /* Check for backslash */ uint8x16_t bslash = vceqq_u8(v, v_bslash); /* Combine */ uint8x16_t any = vorrq_u8(vorrq_u8(ctrl, quote), bslash); /* Check if any byte set */ if (vmaxvq_u8(any) != 0) { /* Find first set byte */ for (size_t j = 0; j < 16; j++) { if (needs_escape[(uint8_t)s[i + j]]) { return i + j; } } } i += 16; } /* Scalar tail */ for (; i < len; i++) { if (needs_escape[(uint8_t)s[i]]) return i; } return len; #elif HAS_SIMD && defined(__SSE2__) /* SSE2: similar approach */ size_t i = 0; __m128i v_space = _mm_set1_epi8(0x20); __m128i v_quote = _mm_set1_epi8('"'); __m128i v_bslash = _mm_set1_epi8('\\'); while (i + 16 <= len) { /* _mm_loadu_si128 is the unaligned load intrinsic * — it explicitly does NOT require 16-byte alignment. * The cast to const __m128i * is just how Intel's intrinsic API is designed; * clang's -Wcast-align doesn't understand this semantic. * So we cast through const void * to break the alignment chain */ __m128i v = _mm_loadu_si128((const __m128i *)(const void *)(s + i)); /* Check for control chars */ __m128i ctrl = _mm_cmplt_epi8(v, v_space); /* Check for quote and backslash */ __m128i quote = _mm_cmpeq_epi8(v, v_quote); __m128i bslash = _mm_cmpeq_epi8(v, v_bslash); /* Combine and get mask */ __m128i any = _mm_or_si128(_mm_or_si128(ctrl, quote), bslash); int mask = _mm_movemask_epi8(any); if (mask) { return i + __builtin_ctz(mask); } i += 16; } /* Scalar tail */ for (; i < len; i++) { if (needs_escape[(uint8_t)s[i]]) return i; } return len; #else /* Scalar fallback */ for (size_t i = 0; i < len; i++) { if (needs_escape[(uint8_t)s[i]]) return i; } return len; #endif } /** * @brief Write escaped string to buffer. * * @return Number of bytes written, or -1 if overflow */ static inline int write_escaped_string(const char *s, size_t len, char *out, size_t outlen) { char *p = out; char *end = out + outlen; size_t i; size_t clean_start, clean_end, clean_len; uint8_t c; if (p >= end) return -1; *p++ = '"'; i = 0; while (i < len) { /* Find run of clean chars */ clean_start = i; clean_end = find_escape_needed(s + i, len - i) + i; /* Copy clean run */ clean_len = clean_end - clean_start; if (clean_len > 0) { if (p + clean_len >= end) return -1; memcpy(p, s + clean_start, clean_len); p += clean_len; i = clean_end; } /* Handle escape if needed */ if (i < len) { if (p + 6 >= end) return -1; /* Max escape is \uXXXX */ c = (uint8_t)s[i]; switch (c) { case '"': *p++ = '\\'; *p++ = '"'; break; case '\\': *p++ = '\\'; *p++ = '\\'; break; case '\b': *p++ = '\\'; *p++ = 'b'; break; case '\f': *p++ = '\\'; *p++ = 'f'; break; case '\n': *p++ = '\\'; *p++ = 'n'; break; case '\r': *p++ = '\\'; *p++ = 'r'; break; case '\t': *p++ = '\\'; *p++ = 't'; break; default: { /* \uXXXX for other control chars */ int esc_n = snprintf(p, (size_t)(end - p), "\\u%04x", c); if (esc_n < 0 || p + esc_n >= end) return -1; p += esc_n; break; } } i++; } } if (p >= end) return -1; *p++ = '"'; return (int)(p - out); } /*============================================================================ * Multi-Depth Nested Object Serialization *============================================================================*/ /** * Maximum nesting depth for ECS-style dotted field names. * ECS rarely exceeds 3 levels (e.g., user.group.name). * 8 levels provides generous headroom with minimal stack cost. */ #define LN_JSON_MAX_DEPTH 8 _Static_assert(LN_JSON_MAX_DEPTH <= 8, "level_has_entry bitmask requires LN_JSON_MAX_DEPTH <= 8"); /** * Path component for tracking open JSON objects. * Each level stores the start offset and length of the component * within the original field name string. */ typedef struct { const char *start; /**< Pointer into field name string */ uint16_t len; /**< Length of this path component */ } ln_json_path_comp_t; /** * @brief Parse a dotted field name into path components. * * "user.group.name" → ["user", "group", "name"] * "source.ip" → ["source", "ip"] * "message" → ["message"] * * @return Number of components (1 = no dots, >1 = nested) */ static inline int parse_path(const char *name, uint16_t name_len, ln_json_path_comp_t *comps, int max_comps) { int n = 0; const char *p = name; const char *end = name + name_len; while (p < end && n < max_comps) { const char *dot = p; while (dot < end && *dot != '.') dot++; comps[n].start = p; comps[n].len = (uint16_t)(dot - p); n++; p = dot + 1; /* skip the dot (or past end) */ } return n; } /** * @brief Compare two path components for equality. */ static inline int path_comp_eq(const ln_json_path_comp_t *a, const ln_json_path_comp_t *b) { return a->len == b->len && memcmp(a->start, b->start, a->len) == 0; } /** * @brief Insertion sort on field index array by field name. * * Insertion sort is optimal for N ≤ 64: no function call overhead, * no recursion, excellent cache behavior on small arrays. * Sorting indices (uint8_t) avoids moving 64-byte field structs. */ static inline void sort_field_indices(const ln_fast_result_t *r, uint8_t *idx, uint8_t n) { /* Initialize identity mapping */ for (uint8_t i = 0; i < n; i++) idx[i] = i; /* Insertion sort by field name (strcmp order) */ for (uint8_t i = 1; i < n; i++) { uint8_t key = idx[i]; const char *key_name = r->fields[key].name; int j = (int)i - 1; while (j >= 0 && strcmp(r->fields[idx[j]].name, key_name) > 0) { idx[j + 1] = idx[j]; j--; } idx[j + 1] = key; } } /*============================================================================ * Main Serialization *============================================================================*/ /** * @brief Write a single field value. */ static inline int write_field_value(const ln_fast_field_t *f, char *out, size_t outlen) { char *p = out; char *end = out + outlen; int n; switch (f->type) { case LN_FTYPE_STRING: n = write_escaped_string(f->v.str.ptr, f->v.str.len, p, end - p); if (n < 0) return -1; return n; case LN_FTYPE_STRING_INLINE: { size_t len = strlen(f->v.inl); n = write_escaped_string(f->v.inl, len, p, end - p); if (n < 0) return -1; return n; } case LN_FTYPE_INT: if (p + 21 >= end) return -1; return fast_i64_to_str(f->v.i, p); case LN_FTYPE_DOUBLE: if (p + 32 >= end) return -1; n = snprintf(p, end - p, "%.2f", f->v.d); if (n < 0 || n >= (int)(end - p)) return -1; return n; case LN_FTYPE_BOOL: if (f->v.b) { if (p + 4 >= end) return -1; memcpy(p, "true", 4); return 4; } else { if (p + 5 >= end) return -1; memcpy(p, "false", 5); return 5; } case LN_FTYPE_NULL: default: if (p + 4 >= end) return -1; memcpy(p, "null", 4); return 4; } } size_t ln_fast_json_estimate(const ln_fast_result_t *r) { size_t est = 2; /* {} */ for (uint8_t i = 0; i < r->n_fields; i++) { const ln_fast_field_t *f = &r->fields[i]; if (!f->name) continue; /* Each dot in name generates an extra object: "key":{ ... } * Worst case: each component needs "name":{ = name_len + 4 * Plus closing braces. Generous estimate: name_len * 2 + 8 */ est += f->name_len * 2 + 8; switch (f->type) { case LN_FTYPE_STRING: est += f->v.str.len * 2 + 2; /* Worst case escaping */ break; case LN_FTYPE_STRING_INLINE: est += LN_FAST_INLINE_SIZE * 2 + 2; break; case LN_FTYPE_INT: est += 21; break; case LN_FTYPE_DOUBLE: est += 32; break; default: est += 6; } est += 1; /* comma */ } /* Tags array */ if (r->n_tags > 0) { est += 20; /* "event":{"tags":[ or "tags":[ + closing */ for (uint8_t i = 0; i < r->n_tags; i++) { if (r->tags[i].tag) est += strlen(r->tags[i].tag) + 3; } } return est + 64; /* Safety margin */ } /** * @brief Serialize fast result to nested JSON with multi-depth support. * * Algorithm: * 1. Sort field indices by name (insertion sort, O(N²) optimal for N≤64) * 2. Walk sorted fields with a path component stack * 3. At each field, compare its path to the currently-open path: * - Find the divergence point * - Close objects from current depth down to divergence * - Open objects from divergence up to new depth - 1 * - Emit the leaf key:value * * Example with fields [source.ip, source.port, user.group.name, user.name]: * - source.ip: open "source", emit "ip" * - source.port: same prefix, emit "port" * - user.group.name: close "source", open "user", open "group", emit "name" * - user.name: close "group", emit "name" * → {"source":{"ip":"...","port":443},"user":{"group":{"name":"..."},"name":"..."}} * * Tags are emitted under "event.tags" as a JSON array, nested under "event". */ int ln_fast_to_json(const ln_fast_result_t *r, char *buf, size_t buflen, size_t *outlen) { char *p = buf; char *end = buf + buflen; int n; uint8_t sorted[LN_FAST_MAX_FIELDS]; uint8_t n_valid = 0; ln_json_path_comp_t open_path[LN_JSON_MAX_DEPTH]; int open_depth = 0; /* Number of open nested objects */ uint8_t fi; /* * Per-level comma tracking. * When we close a level and return to a parent, we need to know * whether a comma is needed at that parent level. We use a bitmask: * bit i is set if level i has already emitted at least one entry. */ uint8_t level_has_entry = 0; /* bitmask, bit 0 = root level */ uint8_t si; if (buflen < 3) return -1; /* Sort field indices by name for proper nesting grouping */ for (fi = 0; fi < r->n_fields; fi++) { if (r->fields[fi].name && r->fields[fi].name_len > 0) sorted[n_valid++] = fi; } if (n_valid > 1) sort_field_indices(r, sorted, n_valid); *p++ = '{'; #define NEED_COMMA_AT(lvl) (level_has_entry & (1u << (lvl))) #define SET_HAS_ENTRY(lvl) (level_has_entry |= (1u << (lvl))) #define CLEAR_ENTRY_FROM(lvl) (level_has_entry &= (uint8_t)((1u << (lvl)) - 1)) for (si = 0; si < n_valid; si++) { const ln_fast_field_t *f = &r->fields[sorted[si]]; const ln_json_path_comp_t *leaf; ln_json_path_comp_t comps[LN_JSON_MAX_DEPTH]; int n_comps, leaf_idx, new_depth, common, min_depth, d; /* Parse this field's path: "user.group.name" → [user, group, name] */ n_comps = parse_path(f->name, f->name_len, comps, LN_JSON_MAX_DEPTH); leaf_idx = n_comps - 1; /* Last component is the leaf key */ new_depth = n_comps - 1; /* Object nesting depth (0 = top-level) */ /* Find divergence point between open_path and this field's path. * We only compare object-level components (indices 0..new_depth-1). */ common = 0; min_depth = open_depth < new_depth ? open_depth : new_depth; while (common < min_depth && path_comp_eq(&open_path[common], &comps[common])) { common++; } /* Close objects from open_depth down to common */ for (d = open_depth - 1; d >= common; d--) { if (p >= end) return -1; *p++ = '}'; } /* After closing, we're back at depth=common. * Clear comma state for levels we just closed. */ CLEAR_ENTRY_FROM(common + 1); open_depth = common; /* Open objects from common up to new_depth */ for (d = common; d < new_depth; d++) { /* Comma at current level? */ if (NEED_COMMA_AT(d)) { if (p >= end) return -1; *p++ = ','; } SET_HAS_ENTRY(d); /* Write object key: "component":{ */ if ((size_t)(end - p) < (size_t)comps[d].len + 4) return -1; *p++ = '"'; memcpy(p, comps[d].start, comps[d].len); p += comps[d].len; *p++ = '"'; *p++ = ':'; *p++ = '{'; open_path[d] = comps[d]; } open_depth = new_depth; /* Emit comma at leaf level if needed */ if (NEED_COMMA_AT(open_depth)) { if (p >= end) return -1; *p++ = ','; } SET_HAS_ENTRY(open_depth); /* Write leaf key */ leaf = &comps[leaf_idx]; if ((size_t)(end - p) < (size_t)leaf->len + 3) return -1; *p++ = '"'; memcpy(p, leaf->start, leaf->len); p += leaf->len; *p++ = '"'; *p++ = ':'; /* Write value */ n = write_field_value(f, p, end - p); if (n < 0) return -1; p += n; } /* Close all remaining open objects */ { int d; for (d = open_depth - 1; d >= 0; d--) { if (p >= end) return -1; *p++ = '}'; } } open_depth = 0; /* Tags array — emit as "tags":[...] at root level (ECS standard) */ if (r->n_tags > 0) { int tag_first = 1; uint8_t ti; size_t tlen; if (NEED_COMMA_AT(0)) { if (p >= end) return -1; *p++ = ','; } if ((size_t)(end - p) < 9) return -1; memcpy(p, "\"tags\":[", 8); p += 8; for (ti = 0; ti < r->n_tags; ti++) { if (!r->tags[ti].tag) continue; if (!tag_first) { if (p >= end) return -1; *p++ = ','; } tag_first = 0; tlen = strlen(r->tags[ti].tag); n = write_escaped_string(r->tags[ti].tag, tlen, p, end - p); if (n < 0) return -1; p += n; } if (p >= end) return -1; *p++ = ']'; } if (p >= end) return -1; *p++ = '}'; /* Null-terminate */ if (p >= end) return -1; *p = '\0'; if (outlen) *outlen = p - buf; return 0; #undef NEED_COMMA_AT #undef SET_HAS_ENTRY #undef CLEAR_ENTRY_FROM } int ln_fast_to_json_alloc(const ln_fast_result_t *r, char **json_str, size_t *json_len) { size_t est = ln_fast_json_estimate(r); char *buf; size_t len; buf = malloc(est); if (!buf) return -1; if (ln_fast_to_json(r, buf, est, &len) != 0) { free(buf); return -1; } *json_str = buf; if (json_len) *json_len = len; return 0; } liblognorm-2.1.0/src/turbo_opcode.h000066400000000000000000000242541520037563000173120ustar00rootroot00000000000000/** * @file turbo_opcode.h * @brief VM instruction set for TurboVM *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_OPCODE_H_INCLUDED #define LIBLOGNORM_TURBO_OPCODE_H_INCLUDED #include #include #include #ifdef __cplusplus extern "C" { #endif /*============================================================================ * Constants *============================================================================*/ /** Fixed instruction size (64 bytes) */ #define LN_INSTR_SIZE 64 /** Max inline data (literal or field name) */ #define LN_INSTR_MAX_INLINE 60 /*============================================================================ * Opcodes *============================================================================*/ typedef enum { /*=== Control (0x00-0x0F) ===*/ OP_HALT = 0x00, /**< Stop, no match */ OP_MATCH = 0x01, /**< Rule matched, payload=rule_id */ OP_JUMP = 0x02, /**< Unconditional jump, offset in payload */ OP_FORK = 0x03, /**< Try path, alt on fail, offset=alt */ OP_FAIL = 0x04, /**< Force backtrack */ OP_CALL = 0x05, /**< Call subroutine */ OP_RET = 0x06, /**< Return from subroutine */ /*=== Literals (0x10-0x1F) ===*/ OP_LITERAL = 0x10, /**< Match literal (inline), len in header */ OP_LITERAL_EXT = 0x11, /**< Match literal (external ptr) */ OP_LITERAL_CI = 0x12, /**< Match literal case-insensitive */ OP_CHAR = 0x13, /**< Match single char */ OP_ANY = 0x14, /**< Match any char (advance 1) */ OP_CHARSET = 0x15, /**< Match char from set (external bitmap) */ /*=== Fields (0x20-0x3F) ===*/ OP_FIELD_WORD = 0x20, /**< Extract word (whitespace-delimited) */ OP_FIELD_INT = 0x21, /**< Extract signed integer */ OP_FIELD_UINT = 0x22, /**< Extract unsigned integer */ OP_FIELD_FLOAT = 0x23, /**< Extract floating point */ OP_FIELD_IPV4 = 0x24, /**< Extract IPv4 address */ OP_FIELD_IPV6 = 0x25, /**< Extract IPv6 address */ OP_FIELD_HEX = 0x26, /**< Extract hex number */ OP_FIELD_QUOTED = 0x27, /**< Extract quoted string */ OP_FIELD_CHAR_TO= 0x28, /**< Extract until delimiter char */ OP_FIELD_STR_TO = 0x29, /**< Extract until delimiter string */ OP_FIELD_REST = 0x2A, /**< Extract rest of line */ OP_FIELD_JSON = 0x2B, /**< Extract JSON value */ OP_FIELD_MAC = 0x2C, /**< Extract MAC address */ OP_FIELD_DATE = 0x2D, /**< Extract date/timestamp */ OP_FIELD_REGEX = 0x2E, /**< Extract via regex (external) */ OP_FIELD_NAME_VALUE = 0x2F, /**< Parse name=value pairs (name-value-list) */ /*=== Skipping (0x40-0x4F) ===*/ OP_SKIP_SPACE = 0x40, /**< Skip whitespace (0+) */ OP_SKIP_SPACE1 = 0x41, /**< Skip whitespace (1+, fail if none) */ OP_SKIP_N = 0x42, /**< Skip N bytes */ OP_SKIP_TO = 0x43, /**< Skip to char (not including) */ OP_SKIP_PAST = 0x44, /**< Skip past char (including) */ OP_SKIP_LINE = 0x45, /**< Skip to end of line */ /*=== Tags (0x50-0x5F) ===*/ OP_TAG = 0x50, /**< Add tag, payload=tag name */ OP_RULE_ID = 0x51, /**< Set rule ID */ OP_STATIC_FIELD = 0x52, /**< Add static key=value field from annotation. * Layout: data.kv.key[30] + data.kv.val[30], * aux = key length. Value is null-terminated. */ /*=== Field Context (0x58-0x5F) - for ".." substitution ===*/ OP_CTX_PUSH = 0x58, /**< Push field name context for custom types */ OP_CTX_POP = 0x59, /**< Pop field name context */ OP_CTX_NEST = 0x5A, /**< Start nested object (for multi-field types) */ OP_CTX_UNNEST = 0x5B, /**< End nested object */ /*=== Assertions (0x60-0x6F) ===*/ OP_ASSERT_CHAR = 0x60, /**< Assert next char equals (no consume) */ OP_ASSERT_END = 0x61, /**< Assert at end of input */ OP_ASSERT_START = 0x62, /**< Assert at start of input */ /*=== Special (0x70-0x7F) ===*/ OP_SYSLOG_PRI = 0x70, /**< Parse field */ OP_SYSLOG_TS = 0x71, /**< Parse syslog timestamp */ OP_CEF_HDR = 0x72, /**< Parse CEF header */ OP_V2_IPTABLES = 0x73, /**< Parse iptables name=value pairs */ OP_CEE_SYSLOG = 0x74, /**< Parse CEE-syslog (@cee: + JSON) */ OP_CHECKPOINT_LEA = 0x75, /**< Parse Checkpoint LEA name: value; */ /*=== Debug (0xF0-0xFF) ===*/ OP_NOP = 0xF0, /**< No operation */ OP_DEBUG = 0xFE, /**< Debug breakpoint */ OP_INVALID = 0xFF, /**< Invalid marker */ } ln_opcode_t; /*============================================================================ * Instruction Structure *============================================================================*/ /** * @brief VM instruction (64 bytes fixed). * * Layout: * [0] opcode (ln_opcode_t) * [1] flags * [2-3] aux (length, char, etc.) * [4-63] payload (inline data or structured) */ typedef struct { uint8_t op; /**< Opcode */ uint8_t flags; /**< Flags */ uint16_t aux; /**< Auxiliary (length, etc.) */ union { /* Raw bytes for inline data */ char str[60]; /**< Inline string (literal, name, etc.) */ uint8_t bytes[60]; /**< Raw byte access */ /* Jump/fork target */ struct { int32_t offset; /**< Relative instruction offset */ int32_t _pad[14]; } jump; /* Field with delimiter */ struct { char name[56]; /**< Field name */ uint8_t delim; /**< Delimiter char */ uint8_t ass; /**< Assignator char (for name-value-list) */ uint8_t _pad[2]; } char_to; /* Static key-value pair (for OP_STATIC_FIELD) */ struct { char key[30]; /**< Field name, null-terminated */ char val[30]; /**< Field value, null-terminated */ } kv; } data; } ln_instr_t; _Static_assert(sizeof(ln_instr_t) == LN_INSTR_SIZE, "Instruction must be 64 bytes"); /*============================================================================ * Instruction Flags *============================================================================*/ #define LN_INSTR_F_OPTIONAL 0x01 /**< Optional (don't fail if no match) */ #define LN_INSTR_F_GREEDY 0x02 /**< Greedy matching */ #define LN_INSTR_F_STORE 0x04 /**< Store extracted field */ #define LN_INSTR_F_CASE_INS 0x08 /**< Case-insensitive */ /*============================================================================ * Instruction Builders *============================================================================*/ /** Create HALT instruction */ static inline ln_instr_t ln_i_halt(void) { ln_instr_t i = {0}; i.op = OP_HALT; return i; } /** Create MATCH instruction */ static inline ln_instr_t ln_i_match(const char *rule) { ln_instr_t i = {0}; i.op = OP_MATCH; if (rule) { for (int j = 0; j < LN_INSTR_MAX_INLINE && rule[j]; j++) i.data.str[j] = rule[j]; } return i; } /** Create LITERAL instruction (inline) */ static inline ln_instr_t ln_i_literal(const char *lit, uint16_t len) { ln_instr_t i = {0}; i.op = OP_LITERAL; i.aux = len; for (uint16_t j = 0; j < len && j < LN_INSTR_MAX_INLINE; j++) i.data.str[j] = lit[j]; return i; } /** Create CHAR instruction */ static inline ln_instr_t ln_i_char(char c) { ln_instr_t i = {0}; i.op = OP_CHAR; i.data.str[0] = c; return i; } /** Create JUMP instruction */ static inline ln_instr_t ln_i_jump(int32_t offset) { ln_instr_t i = {0}; i.op = OP_JUMP; i.data.jump.offset = offset; return i; } /** Create FORK instruction */ static inline ln_instr_t ln_i_fork(int32_t alt_offset) { ln_instr_t i = {0}; i.op = OP_FORK; i.data.jump.offset = alt_offset; return i; } /** Create FAIL instruction */ static inline ln_instr_t ln_i_fail(void) { ln_instr_t i = {0}; i.op = OP_FAIL; return i; } /** Create field instruction with name */ static inline ln_instr_t ln_i_field(ln_opcode_t op, const char *name) { ln_instr_t i = {0}; i.op = op; i.flags = LN_INSTR_F_STORE; if (name) { for (int j = 0; j < LN_INSTR_MAX_INLINE && name[j]; j++) i.data.str[j] = name[j]; } return i; } /** Create FIELD_CHAR_TO instruction */ static inline ln_instr_t ln_i_field_char_to(const char *name, char delim) { ln_instr_t i = {0}; i.op = OP_FIELD_CHAR_TO; i.flags = LN_INSTR_F_STORE; i.data.char_to.delim = (uint8_t)delim; if (name) { for (int j = 0; j < 56 && name[j]; j++) i.data.char_to.name[j] = name[j]; } return i; } /** Create FIELD_NAME_VALUE instruction */ static inline ln_instr_t ln_i_field_name_value(const char *name, char sep, char ass) { ln_instr_t i = {0}; i.op = OP_FIELD_NAME_VALUE; i.flags = LN_INSTR_F_STORE; i.data.char_to.delim = (uint8_t)sep; i.data.char_to.ass = (uint8_t)ass; if (name) { for (int j = 0; j < 56 && name[j]; j++) i.data.char_to.name[j] = name[j]; } return i; } /** Create SKIP_SPACE instruction */ static inline ln_instr_t ln_i_skip_space(void) { ln_instr_t i = {0}; i.op = OP_SKIP_SPACE; return i; } /** Create SKIP_N instruction */ static inline ln_instr_t ln_i_skip_n(uint16_t n) { ln_instr_t i = {0}; i.op = OP_SKIP_N; i.aux = n; return i; } /** Create TAG instruction */ static inline ln_instr_t ln_i_tag(const char *tag) { ln_instr_t i = {0}; i.op = OP_TAG; if (tag) { for (int j = 0; j < LN_INSTR_MAX_INLINE && tag[j]; j++) i.data.str[j] = tag[j]; } return i; } /** Create CTX_PUSH instruction */ static inline ln_instr_t ln_i_ctx_push(const char *name) { ln_instr_t i = {0}; i.op = OP_CTX_PUSH; if (name) { for (int j = 0; j < LN_INSTR_MAX_INLINE && name[j]; j++) i.data.str[j] = name[j]; } return i; } /** Create CTX_POP instruction */ static inline ln_instr_t ln_i_ctx_pop(void) { ln_instr_t i = {0}; i.op = OP_CTX_POP; return i; } /** Create NOP instruction */ static inline ln_instr_t ln_i_nop(void) { ln_instr_t i = {0}; i.op = OP_NOP; return i; } /*============================================================================ * Debug Helpers *============================================================================*/ /** * @brief Get opcode name. */ const char *ln_opcode_name(ln_opcode_t op); /** * @brief Disassemble instruction to string. */ int ln_instr_disasm(const ln_instr_t *inst, char *buf, size_t len); #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_OPCODE_H_INCLUDED */ liblognorm-2.1.0/src/turbo_result.c000066400000000000000000000024051520037563000173440ustar00rootroot00000000000000/* * turbo_result.c -- Compatibility wrapper for result types * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #ifdef ENABLE_TURBO #include "turbo_result_fast.h" /* All functions are inline in turbo_result_fast.h */ #endif /* ENABLE_TURBO */ liblognorm-2.1.0/src/turbo_result.h000066400000000000000000000016051520037563000173520ustar00rootroot00000000000000/** * @file turbo_result.h * @brief Compatibility wrapper for result types *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_RESULT_H_INCLUDED #define LIBLOGNORM_TURBO_RESULT_H_INCLUDED #include "turbo_result_fast.h" /* For backward compatibility, alias the types */ typedef ln_fast_result_t ln_result_t; typedef ln_fast_field_t ln_field_t; /* Alias functions */ #define ln_result_init(r, a) ln_fast_result_init(r, a) #define ln_result_clear(r) ln_fast_result_clear(r) #define ln_result_add_string(r,n,v,l) ln_fast_add_string_static(r, n, strlen(n), v, l) #define ln_result_add_int(r,n,v) ln_fast_add_int_static(r, n, strlen(n), v) #define ln_result_add_tag(r,t) ln_fast_add_tag(r, t) #endif /* LIBLOGNORM_TURBO_RESULT_H_INCLUDED */ liblognorm-2.1.0/src/turbo_result_fast.c000066400000000000000000000023501520037563000203600ustar00rootroot00000000000000/* * turbo_result_fast.c -- Optimized result structure for maximum performance * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #ifdef ENABLE_TURBO /* All functions are static inline in turbo_result_fast.h */ #endif liblognorm-2.1.0/src/turbo_result_fast.h000066400000000000000000000222321520037563000203660ustar00rootroot00000000000000/** * @file turbo_result_fast.h * @brief Optimized result structure for maximum performance *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_RESULT_FAST_H_INCLUDED #define LIBLOGNORM_TURBO_RESULT_FAST_H_INCLUDED #include #include #include #include #ifdef __cplusplus extern "C" { #endif /*============================================================================ * Configuration *============================================================================*/ /** Maximum fields per result */ #define LN_FAST_MAX_FIELDS 64 /** Maximum tags per result */ #define LN_FAST_MAX_TAGS 16 /** Inline string size (fits in cache line with field metadata) */ #define LN_FAST_INLINE_SIZE 48 /** Tag hash table size (power of 2) */ #define LN_FAST_TAG_HASH_SIZE 32 /*============================================================================ * Field Types *============================================================================*/ typedef enum { LN_FTYPE_NULL = 0, LN_FTYPE_STRING, /* External string (ptr + len) */ LN_FTYPE_STRING_INLINE,/* Inline string (no alloc) */ LN_FTYPE_INT, LN_FTYPE_DOUBLE, LN_FTYPE_BOOL, } ln_ftype_t; /*============================================================================ * Field Structure (64 bytes - cache line aligned) *============================================================================*/ typedef struct { const char *name; /* Field name (static or arena) - 8 bytes */ uint16_t name_len; /* Name length - 2 bytes */ uint8_t type; /* ln_ftype_t - 1 byte */ uint8_t flags; /* Flags - 1 byte */ uint32_t _pad; /* Padding - 4 bytes */ union { /* 48 bytes */ struct { const char *ptr; uint32_t len; uint32_t _pad; } str; /* External string - 16 bytes */ char inl[LN_FAST_INLINE_SIZE]; /* Inline string - 48 bytes */ int64_t i; /* Integer - 8 bytes */ double d; /* Double - 8 bytes */ bool b; /* Boolean - 1 byte */ } v; } ln_fast_field_t; _Static_assert(sizeof(ln_fast_field_t) == 64, "Field must be 64 bytes"); /* Field flags */ #define LN_FFIELD_STATIC_NAME 0x01 /* Name is static (don't free) */ #define LN_FFIELD_STATIC_VAL 0x02 /* Value is static (don't free) */ #define LN_FFIELD_NESTED 0x04 /* Part of nested object (name has dots) */ /** * @brief Check if a field name contains a dot (indicating nested object). * Returns LN_FFIELD_NESTED if dotted, 0 otherwise. */ static inline uint8_t ln_ffield_detect_nested(const char *name, uint16_t name_len) { for (uint16_t i = 0; i < name_len; i++) { if (name[i] == '.') return LN_FFIELD_NESTED; } return 0; } /*============================================================================ * Tag Hash Entry *============================================================================*/ typedef struct { const char *tag; /* Tag string (static) */ uint32_t hash; /* Pre-computed hash */ } ln_tag_entry_t; /*============================================================================ * Fast Result Structure *============================================================================*/ typedef struct ln_fast_result_s { /* Fields array */ ln_fast_field_t fields[LN_FAST_MAX_FIELDS]; uint8_t n_fields; /* Tags with hash-based dedup */ ln_tag_entry_t tags[LN_FAST_MAX_TAGS]; uint8_t tag_hash[LN_FAST_TAG_HASH_SIZE]; /* Quick lookup bitmap */ uint8_t n_tags; /* Match info */ const char *rule_id; uint8_t flags; /* Original message (for unparsed-data on partial match) */ const char *original; uint32_t original_len; /* Arena for any overflow allocations */ void *arena; } ln_fast_result_t; /* Result flags */ #define LN_FRESULT_MATCHED 0x01 #define LN_FRESULT_PARTIAL 0x02 #define LN_FRESULT_HAS_ORIG 0x04 /*============================================================================ * Fast Hash Function (FNV-1a) *============================================================================*/ static inline uint32_t ln_fast_hash(const char *str) { uint32_t h = 2166136261u; while (*str) { h ^= (uint8_t)*str++; h *= 16777619u; } return h; } static inline uint32_t ln_fast_hash_n(const char *str, size_t len) { uint32_t h = 2166136261u; for (size_t i = 0; i < len; i++) { h ^= (uint8_t)str[i]; h *= 16777619u; } return h; } /*============================================================================ * Inline Operations *============================================================================*/ static inline void ln_fast_result_init(ln_fast_result_t *r, void *arena) { memset(r, 0, sizeof(*r)); r->arena = arena; } static inline void ln_fast_result_clear(ln_fast_result_t *r) { r->n_fields = 0; r->n_tags = 0; memset(r->tag_hash, 0, sizeof(r->tag_hash)); r->rule_id = NULL; r->flags = 0; r->original = NULL; r->original_len = 0; } /** * @brief Add string field with static name (no copy). * * FAST PATH: For known field names, pass static string literal. */ static inline int ln_fast_add_string_static(ln_fast_result_t *r, const char *name, uint16_t name_len, const char *val, uint32_t val_len) { ln_fast_field_t *f; if (r->n_fields >= LN_FAST_MAX_FIELDS) return -1; f = &r->fields[r->n_fields++]; f->name = name; f->name_len = name_len; f->flags = LN_FFIELD_STATIC_NAME | LN_FFIELD_STATIC_VAL | ln_ffield_detect_nested(name, name_len); /* Inline small strings */ if (val_len < LN_FAST_INLINE_SIZE) { f->type = LN_FTYPE_STRING_INLINE; memcpy(f->v.inl, val, val_len); f->v.inl[val_len] = '\0'; } else { f->type = LN_FTYPE_STRING; f->v.str.ptr = val; f->v.str.len = val_len; } return 0; } /** * @brief Add integer field with static name. */ static inline int ln_fast_add_int_static(ln_fast_result_t *r, const char *name, uint16_t name_len, int64_t val) { ln_fast_field_t *f; if (r->n_fields >= LN_FAST_MAX_FIELDS) return -1; f = &r->fields[r->n_fields++]; f->name = name; f->name_len = name_len; f->type = LN_FTYPE_INT; f->flags = LN_FFIELD_STATIC_NAME | ln_ffield_detect_nested(name, name_len); f->v.i = val; return 0; } /** * @brief Add double field with static name. */ static inline int ln_fast_add_double_static(ln_fast_result_t *r, const char *name, uint16_t name_len, double val) { ln_fast_field_t *f; if (r->n_fields >= LN_FAST_MAX_FIELDS) return -1; f = &r->fields[r->n_fields++]; f->name = name; f->name_len = name_len; f->type = LN_FTYPE_DOUBLE; f->flags = LN_FFIELD_STATIC_NAME | ln_ffield_detect_nested(name, name_len); f->v.d = val; return 0; } /** * @brief Add tag with O(1) dedup check. * * Tags are assumed to be static strings (compile-time constants). */ static inline int ln_fast_add_tag(ln_fast_result_t *r, const char *tag) { uint32_t h; uint8_t slot; if (r->n_tags >= LN_FAST_MAX_TAGS) return -1; h = ln_fast_hash(tag); slot = h & (LN_FAST_TAG_HASH_SIZE - 1); /* Quick bitmap check for likely-unique */ if (r->tag_hash[slot]) { /* Possible collision - linear scan existing tags */ for (uint8_t i = 0; i < r->n_tags; i++) { if (r->tags[i].hash == h && r->tags[i].tag == tag) { return 0; /* Already present (pointer equality for static) */ } } } /* Add new tag */ r->tag_hash[slot] = 1; r->tags[r->n_tags].tag = tag; r->tags[r->n_tags].hash = h; r->n_tags++; return 0; } /** * @brief Set rule ID (static string). */ static inline void ln_fast_set_rule_id(ln_fast_result_t *r, const char *rule_id) { r->rule_id = rule_id; r->flags |= LN_FRESULT_MATCHED; } /** * @brief Set original message pointer. */ static inline void ln_fast_set_original(ln_fast_result_t *r, const char *msg, uint32_t len) { r->original = msg; r->original_len = len; r->flags |= LN_FRESULT_HAS_ORIG; } /** * @brief Check if result has a specific tag. */ static inline int ln_fast_has_tag(const ln_fast_result_t *r, const char *tag) { if (!r || !tag) return 0; for (int i = 0; i < r->n_tags; i++) { if (r->tags[i].tag && strcmp(r->tags[i].tag, tag) == 0) { return 1; } } return 0; } /*============================================================================ * JSON Serialization (Optimized) *============================================================================*/ /** * @brief Estimate JSON output size. */ size_t ln_fast_json_estimate(const ln_fast_result_t *r); /** * @brief Serialize to JSON string. * * @param r Result to serialize * @param buf Output buffer * @param buflen Buffer size * @param outlen Receives actual length (may be NULL) * @return 0 on success, -1 if buffer too small * * This function creates nested objects from dotted field names: * "timestamp_netscaler.day" -> {"timestamp_netscaler": {"day": ...}} */ int ln_fast_to_json(const ln_fast_result_t *r, char *buf, size_t buflen, size_t *outlen); /** * @brief Allocating version. */ int ln_fast_to_json_alloc(const ln_fast_result_t *r, char **json_str, size_t *json_len); #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_RESULT_FAST_H_INCLUDED */ liblognorm-2.1.0/src/turbo_simd.c000066400000000000000000001004531520037563000167640ustar00rootroot00000000000000/* * turbo_simd.c -- SIMD-accelerated parsing primitives * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "turbo_simd.h" #include /*============================================================================ * Backend Name *============================================================================*/ const char * ln_simd_backend_name(void) { #if defined(LN_SIMD_SSE42) return "sse42"; #elif defined(LN_SIMD_NEON) return "neon"; #else return "scalar"; #endif } /*============================================================================ * Character Set Helpers *============================================================================*/ /** * @brief Build a 256-byte character class lookup table. * * For fast "is char in set" checks. * * Cost: memset(256) + strlen(chars) stores = ~64 cycles on M1. * Called per-invocation in the scalar path. For the NEON fast path, * we use the nibble-parallel technique instead (32 bytes, ~10 cycles). */ static void __attribute__((unused)) build_char_class(const char *chars, uint8_t table[256]) { memset(table, 0, 256); while (*chars) { table[(uint8_t)*chars++] = 1; } } /*============================================================================ * SSE4.2 Implementation *============================================================================*/ #if defined(LN_SIMD_SSE42) /* @brief Optimized SSE4.2 char set loader */ static inline __m128i ln_simd_load_chars(const char *chars) { uint8_t padded[16] = {0}; if (chars) { strncpy((char*)padded, chars, 15); } return _mm_loadu_si128((const __m128i *)(const void *)padded); } /** * @brief Find character using SSE4.2 PCMPISTRI. */ size_t ln_simd_find_char(const char *buf, size_t len, char c) { __m128i needle; size_t i; if (!buf || len == 0) return 0; /* Create a vector of the search character */ needle = _mm_set1_epi8(c); i = 0; /* Process 16 bytes at a time */ while (i + 16 <= len) { __m128i chunk = _mm_loadu_si128((const __m128i *)(const void *)(buf + i)); __m128i cmp = _mm_cmpeq_epi8(chunk, needle); int mask = _mm_movemask_epi8(cmp); if (mask != 0) { return i + __builtin_ctz(mask); } i += 16; } /* Handle remaining bytes */ while (i < len) { if (buf[i] == c) return i; i++; } return len; } /** * @brief Find any char from set using SSE4.2. */ size_t ln_simd_find_char_set(const char *buf, size_t len, const char *chars) { __m128i set; size_t i; if (!buf || len == 0 || !chars || !*chars) return len; set = ln_simd_load_chars(chars); i = 0; while (i + 16 <= len) { __m128i chunk = _mm_loadu_si128((const __m128i *)(const void *)(buf + i)); int index = _mm_cmpistri(set, chunk, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY); if (index < 16) return i + index; i += 16; } while (i < len) { if (strchr(chars, buf[i])) return i; i++; } return len; } /** * @brief Find char NOT in set using SSE4.2. */ size_t ln_simd_find_not_char_set(const char *buf, size_t len, const char *chars) { uint8_t padded_set[16] = {0}; __m128i set; size_t i; if (!buf || len == 0 || !chars) return 0; strncpy((char*)padded_set, chars, 15); set = _mm_loadu_si128((const __m128i *)(const void *)padded_set); i = 0; while (i + 16 <= len) { __m128i chunk = _mm_loadu_si128((const __m128i *)(const void *)(buf + i)); /* Negative Polarity finds the first char that is NOT in the 'chars' set */ int index = _mm_cmpistri(set, chunk, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_NEGATIVE_POLARITY); if (index < 16) return i + index; i += 16; } while (i < len && strchr(chars, buf[i])) i++; return i; } /** * @brief Skip whitespace using SSE4.2. */ size_t ln_simd_skip_space(const char *buf, size_t len) { __m128i ranges; size_t i; if (!buf || len == 0) return 0; /* Ranges: 0x09-0x0D (TAB, LF, VT, FF, CR) and 0x20 (SPACE) */ ranges = _mm_setr_epi8(0x09, 0x0D, 0x20, 0x20, 0,0,0,0,0,0,0,0,0,0,0,0); i = 0; while (i + 16 <= len) { __m128i chunk = _mm_loadu_si128((const __m128i *)(const void *)(buf + i)); /* Find first character NOT in the whitespace range */ int index = _mm_cmpistri(ranges, chunk, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY); if (index < 16) return i + index; i += 16; } /* Handle remaining */ while (i < len && ln_is_space(buf[i])) { i++; } return i; } #elif defined(LN_SIMD_NEON) /*============================================================================ * NEON Implementation (ARM64) *============================================================================*/ size_t ln_simd_find_char(const char *buf, size_t len, char c) { uint8x16_t needle; size_t i; /* Pre-calculate bitmask components */ const uint8x16_t bit_weights = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; if (!buf || len == 0) return 0; needle = vdupq_n_u8((uint8_t)c); i = 0; while (i + 16 <= len) { uint8x16_t chunk = vld1q_u8((const uint8_t *)(buf + i)); uint8x16_t cmp = vceqq_u8(chunk, needle); /* Convert 128-bit comparison to 16-bit mask using bit weights */ uint8x16_t matched_bits = vandq_u8(cmp, bit_weights); uint8_t mask_lo = vaddv_u8(vget_low_u8(matched_bits)); uint8_t mask_hi = vaddv_u8(vget_high_u8(matched_bits)); uint16_t mask = (uint16_t)mask_lo | ((uint16_t)mask_hi << 8); if (mask != 0) { return i + __builtin_ctz(mask); // Instant index finding } i += 16; } while (i < len) { if (buf[i] == c) return i; i++; } return len; } /** * @brief Build NEON nibble lookup tables for character class matching. * * Technique: "Nibble-Parallel Lookup" (also used by Hyperscan, simdjson). * * For each character in the set, we encode its presence in two 16-entry * tables indexed by the low nibble and high nibble of the byte value. * * For a byte B, we set: * lo_nibbles[B & 0x0F] |= (1 << (B >> 4)) // bit for high nibble * hi_nibbles[B >> 4] |= (1 << (B & 0x0F)) // bit for low nibble (unused) * * Actually, the simpler correct approach for ≤8 distinct high nibbles: * lo_table[lo_nibble] = bitmask of which hi_nibbles have a char with this lo * hi_table[hi_nibble] = bitmask of which lo_nibbles have a char with this hi * * At runtime for 16 input bytes: * lo_bits = vqtbl1q_u8(lo_table, input & 0x0F) * hi_bits = vqtbl1q_u8(hi_table, input >> 4) * match = lo_bits & hi_bits // non-zero if char is in set * * This works because a byte is in the set iff both its low-nibble row * and high-nibble row agree on at least one bit position. * * Limitation: works perfectly when the character set spans ≤8 distinct * high nibbles (covers ASCII printable + control chars, which is all * we encounter in log parsing). For the general case we fall back to * the scalar table lookup. */ static inline int build_neon_nibble_tables(const char *chars, uint8_t lo_lut[16], uint8_t hi_lut[16]) { uint16_t hi_nibble_seen = 0; int n_hi; uint8_t hi_to_bit[16] = {0}; int bit = 0; int h; const char *p; memset(lo_lut, 0, 16); memset(hi_lut, 0, 16); /* Count distinct high nibbles to verify the technique is applicable */ for (p = chars; *p; p++) { uint8_t c = (uint8_t)*p; uint8_t lo = c & 0x0F; uint8_t hi = c >> 4; hi_nibble_seen |= (1u << hi); /* * For each character c = (hi:lo): * lo_lut[lo] gets bit 'hi' set * hi_lut[hi] gets bit 'lo' set * * But we only have 8 bits per entry, and hi can be 0-15. * Solution: use a different encoding. * * Actually the classic approach from Wojciech Mula: * lo_lut[lo] |= (1 << (hi & 7)) // uses 8 bits for hi groups * hi_lut[hi] |= (1 << (lo & 7)) // problem: lo can be 0-15 * * Better: since we have uint8_t entries in the LUT, and hi can be 0-15, * we need a different decomposition. Use the "compressed" approach: * * Assign each unique high nibble a bit position (0-7). * If more than 8 unique high nibbles, fall back to scalar. */ (void)lo; (void)hi; /* used below */ } /* Count distinct high nibbles */ n_hi = __builtin_popcount(hi_nibble_seen); if (n_hi > 8) return -1; /* too many high nibble groups, use scalar */ /* Build compressed hi_nibble → bit mapping */ for (h = 0; h < 16; h++) { if (hi_nibble_seen & (1u << h)) { hi_to_bit[h] = (uint8_t)(1u << bit); bit++; } } /* Build the two LUTs */ for (p = chars; *p; p++) { uint8_t c = (uint8_t)*p; uint8_t lo = c & 0x0F; uint8_t hi = c >> 4; lo_lut[lo] |= hi_to_bit[hi]; /* "which hi-groups have this lo nibble" */ hi_lut[hi] |= hi_to_bit[hi]; /* "which hi-group is this" */ } /* * Fix: hi_lut should answer "what bit mask does this hi-nibble produce?" * so that AND with lo_lut gives non-zero iff the (hi,lo) combo is in set. * * Actually, rethink: for input byte B = (hi_B:lo_B): * lo_bits = lo_lut[lo_B] → bitmask of hi-groups that have lo_B * hi_bits = hi_lut[hi_B] → bitmask of hi-group that hi_B belongs to * match = lo_bits & hi_bits → non-zero iff (hi_B, lo_B) is in the set * * This is correct! hi_lut[hi_B] = hi_to_bit[hi_B] which is the single * bit for hi_B's group. lo_lut[lo_B] has that bit set iff some char * with lo nibble = lo_B exists in hi_B's group. */ return 0; } /** * @brief Find any char from set using NEON nibble-parallel lookup. * * Processes 16 bytes per iteration using vqtbl1q_u8 for parallel * character class testing. Each iteration: * 1. Load 16 input bytes * 2. Split each byte into (hi_nibble, lo_nibble) via vshrq/vandq * 3. Lookup both nibbles through 16-byte LUTs via vqtbl1q_u8 * 4. AND results → non-zero lanes are matches * 5. Reduce to bitmask and find first set bit * * Throughput: ~16 bytes per ~6 NEON instructions ≈ 2.5 bytes/cycle on M1. * Previous scalar: ~1 byte per ~3 instructions ≈ 0.3 bytes/cycle. * Speedup: ~8x for the inner loop. */ size_t ln_simd_find_char_set(const char *buf, size_t len, const char *chars) { uint8_t lo_lut_data[16], hi_lut_data[16]; int neon_ok; uint8_t table[256]; size_t i; if (!buf || len == 0 || !chars || !*chars) return len; /* Build NEON nibble lookup tables */ neon_ok = build_neon_nibble_tables(chars, lo_lut_data, hi_lut_data); if (neon_ok == 0) { /* NEON fast path: nibble-parallel lookup */ uint8x16_t lo_lut = vld1q_u8(lo_lut_data); uint8x16_t hi_lut = vld1q_u8(hi_lut_data); uint8x16_t lo_mask = vdupq_n_u8(0x0F); /* Bit-weight for bitmask extraction */ const uint8x16_t bit_weights = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; i = 0; while (i + 16 <= len) { uint8x16_t chunk = vld1q_u8((const uint8_t *)(buf + i)); /* Split into nibbles */ uint8x16_t lo_nibbles = vandq_u8(chunk, lo_mask); uint8x16_t hi_nibbles = vshrq_n_u8(chunk, 4); /* Parallel table lookup */ uint8x16_t lo_bits = vqtbl1q_u8(lo_lut, lo_nibbles); uint8x16_t hi_bits = vqtbl1q_u8(hi_lut, hi_nibbles); /* AND → non-zero if char is in set */ uint8x16_t matches = vandq_u8(lo_bits, hi_bits); /* Convert to bitmask */ uint8x16_t matched_bits = vandq_u8( vcgtq_u8(matches, vdupq_n_u8(0)), /* non-zero → 0xFF */ bit_weights ); uint8_t mask_lo = vaddv_u8(vget_low_u8(matched_bits)); uint8_t mask_hi = vaddv_u8(vget_high_u8(matched_bits)); uint16_t mask = (uint16_t)mask_lo | ((uint16_t)mask_hi << 8); if (mask != 0) { return i + __builtin_ctz(mask); } i += 16; } /* Scalar tail */ build_char_class(chars, table); while (i < len) { if (table[(uint8_t)buf[i]]) return i; i++; } return len; } /* Fallback: scalar with lookup table (> 8 high nibble groups) */ build_char_class(chars, table); for (i = 0; i < len; i++) { if (table[(uint8_t)buf[i]]) return i; } return len; } /** * @brief Find first char NOT in set using NEON nibble-parallel lookup. * * Same nibble-parallel technique as find_char_set, but inverted: * we look for the first byte where the AND of lo_bits and hi_bits is ZERO. */ size_t ln_simd_find_not_char_set(const char *buf, size_t len, const char *chars) { uint8_t lo_lut_data[16], hi_lut_data[16]; int neon_ok; uint8_t table[256]; size_t i; if (!buf || len == 0) return 0; if (!chars || !*chars) return 0; /* Build NEON nibble lookup tables */ neon_ok = build_neon_nibble_tables(chars, lo_lut_data, hi_lut_data); if (neon_ok == 0) { /* NEON fast path */ uint8x16_t lo_lut = vld1q_u8(lo_lut_data); uint8x16_t hi_lut = vld1q_u8(hi_lut_data); uint8x16_t lo_mask = vdupq_n_u8(0x0F); const uint8x16_t bit_weights = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; i = 0; while (i + 16 <= len) { uint8x16_t chunk = vld1q_u8((const uint8_t *)(buf + i)); uint8x16_t lo_nibbles = vandq_u8(chunk, lo_mask); uint8x16_t hi_nibbles = vshrq_n_u8(chunk, 4); uint8x16_t lo_bits = vqtbl1q_u8(lo_lut, lo_nibbles); uint8x16_t hi_bits = vqtbl1q_u8(hi_lut, hi_nibbles); /* AND → non-zero if char IS in set */ uint8x16_t in_set = vandq_u8(lo_bits, hi_bits); /* We want NOT in set → lanes where in_set == 0 */ uint8x16_t not_in_set = vceqq_u8(in_set, vdupq_n_u8(0)); uint8x16_t matched_bits = vandq_u8(not_in_set, bit_weights); uint8_t mask_lo = vaddv_u8(vget_low_u8(matched_bits)); uint8_t mask_hi = vaddv_u8(vget_high_u8(matched_bits)); uint16_t mask = (uint16_t)mask_lo | ((uint16_t)mask_hi << 8); if (mask != 0) { return i + __builtin_ctz(mask); } i += 16; } /* Scalar tail */ build_char_class(chars, table); while (i < len) { if (!table[(uint8_t)buf[i]]) return i; i++; } return len; } /* Fallback: scalar */ build_char_class(chars, table); for (i = 0; i < len; i++) { if (!table[(uint8_t)buf[i]]) return i; } return len; } size_t ln_simd_skip_space(const char *buf, size_t len) { uint8x16_t space; uint8x16_t tab_range_lo; uint8x16_t tab_range_hi; /* Pre-calculated weights to create a bitmask from vector */ const uint8x16_t bit_weights = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; size_t i = 0; if (!buf || len == 0) return 0; space = vdupq_n_u8(' '); tab_range_lo = vdupq_n_u8(0x09); tab_range_hi = vdupq_n_u8(0x0D); while (i + 16 <= len) { uint8x16_t chunk = vld1q_u8((const uint8_t *)(buf + i)); /* Check for ' ' OR (>= 0x09 AND <= 0x0D) */ uint8x16_t is_space = vceqq_u8(chunk, space); uint8x16_t in_tab_range = vandq_u8(vcgeq_u8(chunk, tab_range_lo), vcleq_u8(chunk, tab_range_hi)); uint8x16_t is_ws = vorrq_u8(is_space, in_tab_range); /* Create a 16-bit mask from the 128-bit vector */ uint8x16_t matched_bits = vandq_u8(is_ws, bit_weights); uint16_t mask = (uint16_t)vaddv_u8(vget_low_u8(matched_bits)) | ((uint16_t)vaddv_u8(vget_high_u8(matched_bits)) << 8); if (mask != 0xFFFF) { /* Find first zero bit (first non-whitespace) */ return i + __builtin_ctz(~mask); } i += 16; } while (i < len && ln_is_space(buf[i])) i++; return i; } #else /*============================================================================ * Scalar Fallback Implementation *============================================================================*/ /*---------------------------------------------------------------------------- * Precomputed Character Class Cache (Thread-Local) * * The VM repeatedly calls find_char_set / find_not_char_set with the * same character set strings (e.g., " \t\n\r" for word boundaries). * Instead of rebuilding the 256-byte table on every call, we cache * the last-used table per thread. * * This is a simple 1-entry cache: if the pointer matches, reuse. * Since instruction data pointers are stable for the program lifetime, * pointer comparison is sufficient (no strcmp needed). * * Cache hit rate in practice: >95% (word extraction dominates). *----------------------------------------------------------------------------*/ #if defined(__GNUC__) || defined(__clang__) #define THREAD_LOCAL __thread #elif defined(_MSC_VER) #define THREAD_LOCAL __declspec(thread) #else #define THREAD_LOCAL /* nothing — falls back to rebuild every time */ #endif static THREAD_LOCAL const char *s_cached_chars = NULL; static THREAD_LOCAL uint8_t s_cached_table[256]; /** * @brief Get or build a character class table, with 1-entry TLS cache. * * @param[in] chars Null-terminated character set string (pointer-stable) * @return Pointer to the 256-byte TLS-cached table */ static inline const uint8_t * get_char_class(const char *chars) { if (chars == s_cached_chars && s_cached_chars != NULL) { return s_cached_table; /* Cache hit — same pointer, same data */ } /* Cache miss: build and cache */ build_char_class(chars, s_cached_table); s_cached_chars = chars; return s_cached_table; } size_t ln_simd_find_char(const char *buf, size_t len, char c) { if (!buf) return 0; for (size_t i = 0; i < len; i++) { if (buf[i] == c) return i; } return len; } size_t ln_simd_find_char_set(const char *buf, size_t len, const char *chars) { const uint8_t *table; size_t i; if (!buf || !chars || !*chars) return len; table = get_char_class(chars); for (i = 0; i < len; i++) { if (table[(uint8_t)buf[i]]) return i; } return len; } size_t ln_simd_find_not_char_set(const char *buf, size_t len, const char *chars) { const uint8_t *table; size_t i; if (!buf || len == 0) return 0; if (!chars || !*chars) return 0; table = get_char_class(chars); for (i = 0; i < len; i++) { if (!table[(uint8_t)buf[i]]) return i; } return len; } size_t ln_simd_skip_space(const char *buf, size_t len) { size_t i; if (!buf) return 0; i = 0; while (i < len && ln_is_space(buf[i])) { i++; } return i; } #endif /* Architecture selection */ /*============================================================================ * Common Implementation (uses architecture-specific primitives) *============================================================================*/ size_t ln_simd_skip_chars(const char *buf, size_t len, const char *chars) { return ln_simd_find_not_char_set(buf, len, chars); } /*============================================================================ * Field Extraction *============================================================================*/ int ln_simd_word(const char *buf, size_t len, ln_span_t *span) { size_t word_len; if (!buf || !span) return LN_SIMD_EINVAL; /* STRICT COMPLIANCE: Do NOT skip leading whitespace. * The VM handles skipping via OP_SKIP_SPACE. * If we start on a space, it's an empty word (or error). */ if (len > 0 && ln_is_space(buf[0])) { /* Legacy behavior: if starting on space, return error or empty. * parser.c ln_v2_parseWord returns -1 if i==0. */ return LN_SIMD_ENOTFOUND; } /* Find end of word (next whitespace) */ word_len = ln_simd_find_char_set(buf, len, " \t\n\r"); if (word_len == 0 && len > 0 && !ln_is_space(buf[0])) { /* No separators found, word is entire buffer */ word_len = len; } span->start = buf; span->len = word_len; span->consumed = word_len; /* Stop AT the space */ return LN_SIMD_OK; } int ln_simd_char_to(const char *buf, size_t len, char delim, ln_span_t *span) { size_t pos; if (!buf || !span) return LN_SIMD_EINVAL; pos = ln_simd_find_char(buf, len, delim); span->start = buf; span->len = pos; if (pos < len) { /* * We must stop AT the delimiter so the VM's next instruction (OP_LITERAL) * can match and consume it. This aligns with parser.c behavior. */ span->consumed = pos; return LN_SIMD_OK; } else { span->consumed = len; return LN_SIMD_ENOTFOUND; } } int ln_simd_string_to(const char *buf, size_t len, const char *delim, size_t delim_len, ln_span_t *span) { char first; size_t i; if (!buf || !span || !delim) return LN_SIMD_EINVAL; if (delim_len == 0) return LN_SIMD_EINVAL; if (delim_len == 1) { return ln_simd_char_to(buf, len, delim[0], span); } first = delim[0]; i = 0; while (i + delim_len <= len) { size_t pos = ln_simd_find_char(buf + i, len - i, first); if (pos >= len - i) break; i += pos; if (i + delim_len <= len && memcmp(buf + i, delim, delim_len) == 0) { span->start = buf; span->len = i; span->consumed = i; /* Stop AT the delimiter */ return LN_SIMD_OK; } i++; } span->start = buf; span->len = len; span->consumed = len; return LN_SIMD_ENOTFOUND; } int ln_simd_quoted(const char *buf, size_t len, ln_span_t *span) { char quote; size_t i; size_t content_start; bool escaped; if (!buf || !span || len < 2) return LN_SIMD_EINVAL; quote = buf[0]; if (quote != '"' && quote != '\'') { return LN_SIMD_EFORMAT; } i = 1; content_start = 1; escaped = false; while (i < len) { char c = buf[i]; if (escaped) { escaped = false; i++; continue; } if (c == '\\') { escaped = true; i++; continue; } if (c == quote) { /* Found closing quote */ span->start = buf + content_start; span->len = i - content_start; span->consumed = i + 1; return LN_SIMD_OK; } i++; } /* No closing quote found */ return LN_SIMD_EFORMAT; } int ln_simd_bracketed(const char *buf, size_t len, char open, char close, ln_span_t *span) { int depth; size_t i; if (!buf || !span || len < 2) return LN_SIMD_EINVAL; if (buf[0] != open) { return LN_SIMD_EFORMAT; } depth = 1; i = 1; while (i < len && depth > 0) { char c = buf[i]; if (c == open) { depth++; } else if (c == close) { depth--; } if (depth > 0) { i++; } } if (depth != 0) { return LN_SIMD_EFORMAT; } span->start = buf + 1; span->len = i - 1; span->consumed = i + 1; return LN_SIMD_OK; } /*============================================================================ * Numeric Parsing *============================================================================*/ int ln_simd_number(const char *buf, size_t len, ln_number_t *result) { size_t i; uint64_t val; const uint64_t overflow_threshold = UINT64_MAX / 10; if (!buf || !result) return LN_SIMD_EINVAL; result->value = 0; result->consumed = 0; result->negative = false; result->overflow = false; if (len == 0) return LN_SIMD_EFORMAT; i = 0; /* Handle sign */ if (buf[i] == '-') { result->negative = true; i++; } else if (buf[i] == '+') { i++; } if (i >= len || !ln_is_digit(buf[i])) { return LN_SIMD_EFORMAT; } /* Parse digits */ val = 0; while (i < len && ln_is_digit(buf[i])) { uint8_t digit = buf[i] - '0'; /* Check for overflow */ if (val > overflow_threshold || (val == overflow_threshold && digit > UINT64_MAX % 10)) { result->overflow = true; /* Continue parsing to consume all digits */ } val = val * 10 + digit; i++; } result->consumed = i; if (result->negative) { if (val > (uint64_t)INT64_MAX + 1) { result->overflow = true; } result->value = -(int64_t)val; } else { if (val > (uint64_t)INT64_MAX) { result->overflow = true; } result->value = (int64_t)val; } return LN_SIMD_OK; } int ln_simd_unsigned(const char *buf, size_t len, ln_number_t *result) { size_t i; uint64_t val; const uint64_t overflow_threshold = UINT64_MAX / 10; if (!buf || !result) return LN_SIMD_EINVAL; result->value = 0; result->consumed = 0; result->negative = false; result->overflow = false; if (len == 0) return LN_SIMD_EFORMAT; i = 0; /* Optional plus sign */ if (buf[i] == '+') { i++; } else if (buf[i] == '-') { return LN_SIMD_EFORMAT; /* Negative not allowed */ } if (i >= len || !ln_is_digit(buf[i])) { return LN_SIMD_EFORMAT; } val = 0; while (i < len && ln_is_digit(buf[i])) { uint8_t digit = buf[i] - '0'; if (val > overflow_threshold || (val == overflow_threshold && digit > UINT64_MAX % 10)) { result->overflow = true; } val = val * 10 + digit; i++; } result->consumed = i; result->value = (int64_t)val; return LN_SIMD_OK; } int ln_simd_hex(const char *buf, size_t len, ln_number_t *result) { size_t i; uint64_t val; bool has_digit; if (!buf || !result) return LN_SIMD_EINVAL; result->value = 0; result->consumed = 0; result->negative = false; result->overflow = false; if (len == 0) return LN_SIMD_EFORMAT; i = 0; /* Skip optional 0x/0X prefix */ if (len >= 2 && buf[0] == '0' && (buf[1] == 'x' || buf[1] == 'X')) { i = 2; } if (i >= len) return LN_SIMD_EFORMAT; val = 0; has_digit = false; while (i < len) { char c = buf[i]; uint8_t digit; if (c >= '0' && c <= '9') { digit = c - '0'; } else if (c >= 'A' && c <= 'F') { digit = c - 'A' + 10; } else if (c >= 'a' && c <= 'f') { digit = c - 'a' + 10; } else { break; } /* Check for overflow (shift by 4 bits) */ if (val >> 60) { result->overflow = true; } val = (val << 4) | digit; has_digit = true; i++; } if (!has_digit) return LN_SIMD_EFORMAT; result->consumed = i; result->value = (int64_t)val; return LN_SIMD_OK; } /*============================================================================ * IPv4 Parsing *============================================================================*/ int ln_simd_ipv4(const char *buf, size_t len, ln_ipv4_t *result) { size_t i; int octet; if (!buf || !result) return LN_SIMD_EINVAL; result->addr = 0; result->consumed = 0; result->valid = false; memset(result->octets, 0, 4); if (len < 7) return LN_SIMD_EFORMAT; /* Minimum: "0.0.0.0" */ i = 0; for (octet = 0; octet < 4; octet++) { unsigned int val = 0; int digits = 0; /* Parse octet value */ if (i >= len || !ln_is_digit(buf[i])) { return LN_SIMD_EFORMAT; } while (i < len && ln_is_digit(buf[i])) { val = val * 10 + (buf[i] - '0'); digits++; i++; if (digits > 3 || val > 255) { return LN_SIMD_EFORMAT; } } result->octets[octet] = (uint8_t)val; /* Expect dot between octets (except after last) */ if (octet < 3) { if (i >= len || buf[i] != '.') { return LN_SIMD_EFORMAT; } i++; /* Skip dot */ } } /* Build network-order address */ result->addr = ((uint32_t)result->octets[0] << 24) | ((uint32_t)result->octets[1] << 16) | ((uint32_t)result->octets[2] << 8) | ((uint32_t)result->octets[3]); result->consumed = i; result->valid = true; return LN_SIMD_OK; } int ln_simd_ipv4_port(const char *buf, size_t len, ln_ipv4_t *ip, uint16_t *port) { int r; size_t i; if (!buf || !ip || !port) return LN_SIMD_EINVAL; *port = 0; /* Parse IP first */ r = ln_simd_ipv4(buf, len, ip); if (r != LN_SIMD_OK) return r; /* Check for optional port */ i = ip->consumed; if (i < len && buf[i] == ':') { unsigned int port_val = 0; i++; /* Skip colon */ if (i >= len || !ln_is_digit(buf[i])) { return LN_SIMD_OK; /* No port after colon - still valid IP */ } while (i < len && ln_is_digit(buf[i])) { port_val = port_val * 10 + (buf[i] - '0'); if (port_val > 65535) { return LN_SIMD_EFORMAT; } i++; } *port = (uint16_t)port_val; ip->consumed = i; } return LN_SIMD_OK; } /*============================================================================ * Timestamp Parsing *============================================================================*/ /* Month name lookup */ static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; /** * @brief Parse RFC 3164 timestamp: "Jan 15 10:30:45" */ static int parse_rfc3164(const char *buf, size_t len, ln_span_t *span) { int month = -1; int m; size_t i; /* Minimum length: "Jan 1 0:0:0" = 12 */ if (len < 12) return LN_SIMD_EFORMAT; /* Check month */ for (m = 0; m < 12; m++) { if (memcmp(buf, months[m], 3) == 0) { month = m; break; } } if (month < 0) return LN_SIMD_EFORMAT; /* Skip month and space(s) */ i = 3; while (i < len && buf[i] == ' ') i++; /* Parse day (1 or 2 digits) */ if (!ln_is_digit(buf[i])) return LN_SIMD_EFORMAT; while (i < len && ln_is_digit(buf[i])) i++; /* Space before time */ if (i >= len || buf[i] != ' ') return LN_SIMD_EFORMAT; i++; /* Parse time HH:MM:SS */ /* Hours */ if (i + 2 > len || !ln_is_digit(buf[i])) return LN_SIMD_EFORMAT; i++; if (ln_is_digit(buf[i])) i++; if (i >= len || buf[i] != ':') return LN_SIMD_EFORMAT; i++; /* Minutes */ if (i + 2 > len || !ln_is_digit(buf[i]) || !ln_is_digit(buf[i+1])) return LN_SIMD_EFORMAT; i += 2; if (i >= len || buf[i] != ':') return LN_SIMD_EFORMAT; i++; /* Seconds */ if (i + 2 > len || !ln_is_digit(buf[i]) || !ln_is_digit(buf[i+1])) return LN_SIMD_EFORMAT; i += 2; span->start = buf; span->len = i; span->consumed = i; return LN_SIMD_OK; } /** * @brief Parse RFC 5424 / ISO 8601 timestamp: "2024-01-15T10:30:45.123Z" */ static int parse_rfc5424(const char *buf, size_t len, ln_span_t *span) { int j; size_t i; /* Minimum: "2024-01-15T10:30:45" = 19 */ if (len < 19) return LN_SIMD_EFORMAT; /* Quick validation of format */ if (buf[4] != '-' || buf[7] != '-' || (buf[10] != 'T' && buf[10] != ' ') || buf[13] != ':' || buf[16] != ':') { return LN_SIMD_EFORMAT; } /* Validate digits */ for (j = 0; j < 19; j++) { if (j == 4 || j == 7 || j == 10 || j == 13 || j == 16) continue; if (!ln_is_digit(buf[j])) return LN_SIMD_EFORMAT; } i = 19; /* Optional fractional seconds */ if (i < len && buf[i] == '.') { i++; while (i < len && ln_is_digit(buf[i])) i++; } /* Optional timezone */ if (i < len) { if (buf[i] == 'Z') { i++; } else if (buf[i] == '+' || buf[i] == '-') { i++; /* HH:MM */ if (i + 5 <= len && ln_is_digit(buf[i]) && ln_is_digit(buf[i+1]) && buf[i+2] == ':' && ln_is_digit(buf[i+3]) && ln_is_digit(buf[i+4])) { i += 5; } } } span->start = buf; span->len = i; span->consumed = i; return LN_SIMD_OK; } int ln_simd_timestamp(const char *buf, size_t len, ln_span_t *span, int64_t *epoch_ms) { if (!buf || !span) return LN_SIMD_EINVAL; if (epoch_ms) *epoch_ms = 0; /* Not implemented for now */ /* Try RFC 5424 first (starts with digit) */ if (len >= 4 && ln_is_digit(buf[0])) { if (parse_rfc5424(buf, len, span) == LN_SIMD_OK) { return LN_SIMD_OK; } } /* Try RFC 3164 (starts with month name) */ if (len >= 3 && buf[0] >= 'A' && buf[0] <= 'S') { if (parse_rfc3164(buf, len, span) == LN_SIMD_OK) { return LN_SIMD_OK; } } return LN_SIMD_EFORMAT; } /*============================================================================ * Utility Functions *============================================================================*/ size_t ln_simd_unescape(char *buf, size_t len) { size_t read; size_t write; if (!buf || len == 0) return 0; read = 0; write = 0; while (read < len) { if (buf[read] == '\\' && read + 1 < len) { char next = buf[read + 1]; char replacement; switch (next) { case '\\': replacement = '\\'; break; case '"': replacement = '"'; break; case '\'': replacement = '\''; break; case 'n': replacement = '\n'; break; case 'r': replacement = '\r'; break; case 't': replacement = '\t'; break; case '0': replacement = '\0'; break; case 'x': /* Hex escape \xHH */ if (read + 3 < len) { char h1 = buf[read + 2]; char h2 = buf[read + 3]; int v1 = -1, v2 = -1; if (h1 >= '0' && h1 <= '9') v1 = h1 - '0'; else if (h1 >= 'A' && h1 <= 'F') v1 = h1 - 'A' + 10; else if (h1 >= 'a' && h1 <= 'f') v1 = h1 - 'a' + 10; if (h2 >= '0' && h2 <= '9') v2 = h2 - '0'; else if (h2 >= 'A' && h2 <= 'F') v2 = h2 - 'A' + 10; else if (h2 >= 'a' && h2 <= 'f') v2 = h2 - 'a' + 10; if (v1 >= 0 && v2 >= 0) { buf[write++] = (char)((v1 << 4) | v2); read += 4; continue; } } /* Invalid hex - keep as-is */ buf[write++] = buf[read++]; continue; default: /* Unknown escape - keep backslash and char */ buf[write++] = buf[read++]; continue; } buf[write++] = replacement; read += 2; } else { buf[write++] = buf[read++]; } } return write; } liblognorm-2.1.0/src/turbo_simd.h000066400000000000000000000317111520037563000167710ustar00rootroot00000000000000/** * @file turbo_simd.h * @brief SIMD-accelerated parsing primitives *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_SIMD_H_INCLUDED #define LIBLOGNORM_TURBO_SIMD_H_INCLUDED #include #include #include #ifdef __cplusplus extern "C" { #endif /*============================================================================ * Architecture Detection *============================================================================*/ #if defined(__x86_64__) || defined(_M_X64) #if defined(__SSE4_2__) || defined(__AVX2__) #define LN_SIMD_SSE42 1 #include /* SSE4.2 */ #include /* SSE2 */ #endif #elif defined(__aarch64__) || defined(_M_ARM64) #if defined(__ARM_NEON) || defined(__ARM_NEON__) #define LN_SIMD_NEON 1 #include #endif #endif /* SIMD register width */ #if defined(LN_SIMD_SSE42) || defined(LN_SIMD_NEON) #define LN_SIMD_WIDTH 16 #else #define LN_SIMD_WIDTH 1 /* Scalar fallback */ #endif /*============================================================================ * Return Codes *============================================================================*/ #define LN_SIMD_OK 0 /**< Success */ #define LN_SIMD_EINVAL -1 /**< Invalid argument */ #define LN_SIMD_ENOTFOUND -2 /**< Pattern not found */ #define LN_SIMD_EOVERFLOW -3 /**< Numeric overflow */ #define LN_SIMD_EFORMAT -4 /**< Invalid format */ /*============================================================================ * Parse Result Structure *============================================================================*/ /** * @brief Result of a parsing operation. * * Contains the extracted value and how many bytes were consumed. */ typedef struct { const char *start; /**< Start of matched region */ size_t len; /**< Length of matched region */ size_t consumed; /**< Total bytes consumed (including delimiters) */ } ln_span_t; /** * @brief Result of numeric parsing. */ typedef struct { int64_t value; /**< Parsed value */ size_t consumed; /**< Bytes consumed */ bool negative; /**< Was negative */ bool overflow; /**< Overflow occurred */ } ln_number_t; /** * @brief Result of IPv4 parsing. */ typedef struct { uint32_t addr; /**< IPv4 address in network byte order */ uint8_t octets[4]; /**< Individual octets */ size_t consumed; /**< Bytes consumed */ bool valid; /**< Was valid IPv4 */ } ln_ipv4_t; /*============================================================================ * Character Classification (Vectorized) *============================================================================*/ /** * @brief Check if character is whitespace. */ static inline bool ln_is_space(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } /** * @brief Check if character is a digit. */ static inline bool ln_is_digit(char c) { return c >= '0' && c <= '9'; } /** * @brief Check if character is alphanumeric. */ static inline bool ln_is_alnum(char c) { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } /*============================================================================ * Core SIMD Primitives *============================================================================*/ /** * @brief Find first occurrence of a character. * * SIMD-accelerated scan for a single character. * * @param[in] buf Buffer to search * @param[in] len Buffer length * @param[in] c Character to find * @return Offset of character, or len if not found */ size_t ln_simd_find_char(const char *buf, size_t len, char c); /** * @brief Find first occurrence of any character from a set. * * Searches for the first character that matches any in the set. * Set is specified as a null-terminated string of characters. * * @param[in] buf Buffer to search * @param[in] len Buffer length * @param[in] chars Null-terminated set of characters to find * @return Offset of first match, or len if not found * * Example: * @code * // Find first whitespace or colon * size_t pos = ln_simd_find_char_set(buf, len, " \t:"); * @endcode */ size_t ln_simd_find_char_set(const char *buf, size_t len, const char *chars); /** * @brief Find first character NOT in a set. * * Inverse of find_char_set - finds first character that doesn't match. * * @param[in] buf Buffer to search * @param[in] len Buffer length * @param[in] chars Characters to skip * @return Offset of first non-matching char, or len if all match */ size_t ln_simd_find_not_char_set(const char *buf, size_t len, const char *chars); /** * @brief Skip leading whitespace. * * @param[in] buf Buffer * @param[in] len Buffer length * @return Number of whitespace characters skipped */ size_t ln_simd_skip_space(const char *buf, size_t len); /** * @brief Skip while characters match a set. * * @param[in] buf Buffer * @param[in] len Buffer length * @param[in] chars Characters to skip * @return Number of characters skipped */ size_t ln_simd_skip_chars(const char *buf, size_t len, const char *chars); /*============================================================================ * Field Extraction Primitives *============================================================================*/ /** * @brief Extract a whitespace-delimited word. * * Extracts characters until whitespace or end of buffer. * Leading whitespace is skipped. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] span Result span (start points into buf) * @return LN_SIMD_OK on success, LN_SIMD_ENOTFOUND if empty * * Example: " hello world" -> span={start="hello", len=5, consumed=7} */ int ln_simd_word(const char *buf, size_t len, ln_span_t *span); /** * @brief Extract characters until a delimiter. * * Extracts all characters until the delimiter is found. * The delimiter is NOT included in the span but IS consumed. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[in] delim Delimiter character * @param[out] span Result span * @return LN_SIMD_OK on success, LN_SIMD_ENOTFOUND if delimiter not found * * Example: "field:value" with delim=':' -> span={start="field", len=5, consumed=6} */ int ln_simd_char_to(const char *buf, size_t len, char delim, ln_span_t *span); /** * @brief Extract characters until a delimiter string. * * Like char_to but matches a multi-character delimiter. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[in] delim Delimiter string * @param[in] delim_len Delimiter length * @param[out] span Result span * @return LN_SIMD_OK on success, LN_SIMD_ENOTFOUND if delimiter not found * * Example: "data]]more" with delim="]]" -> span={start="data", len=4, consumed=6} */ int ln_simd_string_to(const char *buf, size_t len, const char *delim, size_t delim_len, ln_span_t *span); /** * @brief Extract a quoted string. * * Extracts content between matching quotes, handling escapes. * Supports both single and double quotes. * Escape sequences: \\ \" \' \n \r \t * * @param[in] buf Buffer to parse (should start with quote) * @param[in] len Buffer length * @param[out] span Result span (content without quotes) * @return LN_SIMD_OK on success, LN_SIMD_EFORMAT if malformed * * Example: "\"hello\\nworld\"" -> span={start=, len=11, consumed=14} * * @note The span points into the original buffer. If escapes were present, * caller must unescape separately if needed. */ int ln_simd_quoted(const char *buf, size_t len, ln_span_t *span); /** * @brief Extract content between brackets/braces. * * Handles nested brackets of the same type. * * @param[in] buf Buffer (should start with opening bracket) * @param[in] len Buffer length * @param[in] open Opening bracket character * @param[in] close Closing bracket character * @param[out] span Result span (content without brackets) * @return LN_SIMD_OK on success, LN_SIMD_EFORMAT if unbalanced */ int ln_simd_bracketed(const char *buf, size_t len, char open, char close, ln_span_t *span); /*============================================================================ * Numeric Parsing *============================================================================*/ /** * @brief Parse an integer. * * Parses a decimal integer with optional sign. * Stops at first non-digit. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] result Parse result * @return LN_SIMD_OK on success, LN_SIMD_EFORMAT if no digits * * Example: "-12345abc" -> result={value=-12345, consumed=6} */ int ln_simd_number(const char *buf, size_t len, ln_number_t *result); /** * @brief Parse an unsigned integer. * * Like number() but rejects negative values. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] result Parse result (value is always positive) * @return LN_SIMD_OK on success */ int ln_simd_unsigned(const char *buf, size_t len, ln_number_t *result); /** * @brief Parse a hexadecimal integer. * * Parses hex with optional 0x/0X prefix. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] result Parse result * @return LN_SIMD_OK on success */ int ln_simd_hex(const char *buf, size_t len, ln_number_t *result); /*============================================================================ * Network Address Parsing *============================================================================*/ /** * @brief Parse an IPv4 address. * * Parses dotted-decimal notation (e.g., "192.168.1.1"). * Validates octet ranges (0-255). * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] result Parse result * @return LN_SIMD_OK on success, LN_SIMD_EFORMAT if invalid * * Example: "192.168.1.1:8080" -> result={addr=0xC0A80101, consumed=11} */ int ln_simd_ipv4(const char *buf, size_t len, ln_ipv4_t *result); /** * @brief Parse an IPv4:port combination. * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] ip IP result * @param[out] port Port number (0 if not present) * @return LN_SIMD_OK on success */ int ln_simd_ipv4_port(const char *buf, size_t len, ln_ipv4_t *ip, uint16_t *port); /*============================================================================ * Timestamp Parsing *============================================================================*/ /** * @brief Parse a syslog-style timestamp. * * Formats supported: * - RFC 3164: "Jan 15 10:30:45" * - RFC 5424: "2024-01-15T10:30:45.123Z" * * @param[in] buf Buffer to parse * @param[in] len Buffer length * @param[out] span Span of timestamp * @param[out] epoch_ms Unix epoch milliseconds (optional, may be NULL) * @return LN_SIMD_OK on success, LN_SIMD_EFORMAT if not recognized */ int ln_simd_timestamp(const char *buf, size_t len, ln_span_t *span, int64_t *epoch_ms); /*============================================================================ * Utility Functions *============================================================================*/ /** * @brief Unescape a string in-place. * * Processes escape sequences: \\ \" \' \n \r \t \xHH * * @param[in,out] buf Buffer to unescape * @param[in] len Buffer length * @return New length after unescaping */ size_t ln_simd_unescape(char *buf, size_t len); /** * @brief Compare two spans for equality. * * @param[in] a First span * @param[in] b Second span * @return true if equal, false otherwise */ static inline bool ln_span_eq(const ln_span_t *a, const ln_span_t *b) { if (a->len != b->len) return false; if (a->len == 0) return true; return __builtin_memcmp(a->start, b->start, a->len) == 0; } /** * @brief Compare span to a string literal. * * @param[in] span Span to compare * @param[in] str Null-terminated string * @return true if equal, false otherwise */ static inline bool ln_span_eq_str(const ln_span_t *span, const char *str) { size_t slen = __builtin_strlen(str); if (span->len != slen) return false; if (slen == 0) return true; return __builtin_memcmp(span->start, str, slen) == 0; } /*============================================================================ * Runtime Feature Detection *============================================================================*/ /** * @brief Get SIMD backend name. * * @return "sse42", "neon", or "scalar" */ const char *ln_simd_backend_name(void); /** * @brief Get SIMD register width in bytes. * * @return 16 for SSE/NEON, 1 for scalar */ static inline int ln_simd_width(void) { return LN_SIMD_WIDTH; } /** * @brief Check if SIMD is available. */ static inline bool ln_simd_available(void) { return LN_SIMD_WIDTH > 1; } #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_SIMD_H_INCLUDED */ liblognorm-2.1.0/src/turbo_snapshot.c000066400000000000000000000113441520037563000176670ustar00rootroot00000000000000/* * turbo_snapshot.c -- Deep-copy snapshot of turbo parse results * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #ifdef ENABLE_TURBO #include "turbo_snapshot.h" #include "turbo_result_fast.h" #include "turbo_arena.h" #include #include #include /** * @brief Check if a pointer falls within the arena region. */ static inline int ptr_in_arena(const void *ptr, const uint8_t *arena_base, size_t arena_used) { const uint8_t *p = (const uint8_t *)ptr; return (p >= arena_base && p < arena_base + arena_used); } /** * @brief Rebase a pointer from old arena to new arena copy. */ static inline const char * rebase_ptr(const char *ptr, const uint8_t *old_base, const char *new_base) { ptrdiff_t offset = (const uint8_t *)ptr - old_base; return new_base + offset; } ln_fast_result_snapshot_t * ln_fast_result_snapshot_create(const ln_fast_result_t *src, const ln_arena_t *arena) { size_t arena_used = 0; const uint8_t *arena_base = NULL; size_t total; ln_fast_result_snapshot_t *snap; if (!src) return NULL; /* Determine arena copy size — may be 0 if no arena or no overflow strings */ if (arena && arena->base && arena->used > 0) { arena_used = arena->used; arena_base = arena->base; } /* Single allocation: header + arena data */ total = sizeof(ln_fast_result_snapshot_t) + arena_used; snap = malloc(total); if (!snap) return NULL; /* Copy the result struct */ memcpy(&snap->result, src, sizeof(ln_fast_result_t)); snap->arena_size = arena_used; /* Copy arena bytes */ if (arena_used > 0) { memcpy(snap->arena_data, arena_base, arena_used); } /* Detach from original arena — snapshot is self-contained */ snap->result.arena = NULL; /* Rebase all pointers that reference the arena region */ for (int i = 0; i < snap->result.n_fields; i++) { ln_fast_field_t *f = &snap->result.fields[i]; /* Rebase field name if it points into the arena */ if (f->name && arena_base && !(f->flags & LN_FFIELD_STATIC_NAME) && ptr_in_arena(f->name, arena_base, arena_used)) { f->name = rebase_ptr(f->name, arena_base, (const char *)snap->arena_data); } /* Rebase string value if it points into the arena */ if (f->type == LN_FTYPE_STRING && f->v.str.ptr && arena_base && ptr_in_arena(f->v.str.ptr, arena_base, arena_used)) { f->v.str.ptr = rebase_ptr(f->v.str.ptr, arena_base, (const char *)snap->arena_data); } /* LN_FTYPE_STRING_INLINE: data is inline in the struct, already copied */ /* LN_FTYPE_INT/DOUBLE/BOOL: no pointers to rebase */ } /* Rebase rule_id if it points into the arena (unlikely, usually static) */ if (snap->result.rule_id && arena_base && ptr_in_arena(snap->result.rule_id, arena_base, arena_used)) { snap->result.rule_id = rebase_ptr(snap->result.rule_id, arena_base, (const char *)snap->arena_data); } /* Note: original message pointer (result.original) typically points * into the input buffer, NOT the arena. We leave it as-is because * the input buffer outlives the snapshot in the rsyslog pipeline * (message string is on the smsg_t). If it pointed into the arena, * we'd rebase it too. */ if (snap->result.original && arena_base && ptr_in_arena(snap->result.original, arena_base, arena_used)) { snap->result.original = rebase_ptr(snap->result.original, arena_base, (const char *)snap->arena_data); } /* Tag strings are static (compile-time constants), no rebasing needed */ return snap; } const ln_fast_result_t * ln_fast_result_snapshot_get(const ln_fast_result_snapshot_t *snap) { if (!snap) return NULL; return &snap->result; } void ln_fast_result_snapshot_free(ln_fast_result_snapshot_t *snap) { /* Single allocation — single free */ free(snap); } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/src/turbo_snapshot.h000066400000000000000000000053031520037563000176720ustar00rootroot00000000000000/** * @file turbo_snapshot.h * @brief Deep-copy snapshot of turbo parse results *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_SNAPSHOT_H_INCLUDED #define LIBLOGNORM_TURBO_SNAPSHOT_H_INCLUDED #ifdef HAVE_CONFIG_H #include "config.h" #endif #if defined(ENABLE_TURBO) || defined(LOGNORM_TURBO_SUPPORTED) #include #include "turbo_result_fast.h" #include "turbo_arena.h" #ifdef __cplusplus extern "C" { #endif /** * Self-contained snapshot of a turbo parse result. * * Memory layout: header struct followed by inline arena data copy. * All arena pointers in the result are rebased to point into arena_data[]. */ typedef struct ln_fast_result_snapshot_s { ln_fast_result_t result; /**< Deep copy of result (pointers rebased) */ size_t arena_size;/**< Size of arena data copy */ char arena_data[];/**< Flexible array: arena bytes follow inline */ } ln_fast_result_snapshot_t; /** * @brief Create a snapshot from a turbo parse result. * * Performs a single malloc(sizeof(snapshot) + arena->used), copies the * result struct and arena bytes, then rebases all arena pointers. * * @param src Source result (from ln_turbo_normalize_raw) * @param arena Arena that backs the result's string data * @return Snapshot (caller owns), or NULL on allocation failure * * Cost: 1 malloc + ~4.4KB memcpy (result) + arena->used memcpy + field walk * Typically under 6KB total for a ~20-field log message. */ ln_fast_result_snapshot_t * ln_fast_result_snapshot_create(const ln_fast_result_t *src, const ln_arena_t *arena); /** * @brief Get the result from a snapshot. * * The returned pointer is valid for the lifetime of the snapshot. * It can be used with all ln_fast_result_* accessor functions. * * @param snap Snapshot to read from * @return Pointer to the result, or NULL if snap is NULL */ const ln_fast_result_t * ln_fast_result_snapshot_get(const ln_fast_result_snapshot_t *snap); /** * @brief Free a snapshot. * * Single free() — the snapshot is a single allocation. * * @param snap Snapshot to free (NULL-safe) */ void ln_fast_result_snapshot_free(ln_fast_result_snapshot_t *snap); #ifdef __cplusplus } #endif #else /* !ENABLE_TURBO && !LOGNORM_TURBO_SUPPORTED */ /* Stubs when turbo is disabled */ typedef void ln_fast_result_snapshot_t; #define ln_fast_result_snapshot_create(src, arena) ((void*)0) #define ln_fast_result_snapshot_get(snap) ((void*)0) #define ln_fast_result_snapshot_free(snap) ((void)(snap)) #endif /* ENABLE_TURBO || LOGNORM_TURBO_SUPPORTED */ #endif /* LIBLOGNORM_TURBO_SNAPSHOT_H_INCLUDED */ liblognorm-2.1.0/src/turbo_vm.c000066400000000000000000002163541520037563000164620ustar00rootroot00000000000000/* * turbo_vm.c -- Virtual machine for executing TurboVM bytecode * * Part of the TurboVM bytecode engine for high-performance log parsing. * * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "turbo_vm.h" #include "turbo_vm_opt.h" #include "turbo_simd.h" #include #include #include #include /*============================================================================ * Debug Tracing *============================================================================*/ #ifdef LN_VM_TRACE static bool g_trace_enabled = true; #define TRACE(...) do { if (g_trace_enabled) fprintf(stderr, __VA_ARGS__); } while(0) #else #define TRACE(...) ((void)0) #endif /*============================================================================ * Helper macros *============================================================================*/ #define myisdigit(c) ((c) >= '0' && (c) <= '9') /*============================================================================ * Opcode Names *============================================================================*/ const char * ln_opcode_name(ln_opcode_t op) { switch (op) { case OP_HALT: return "HALT"; case OP_MATCH: return "MATCH"; case OP_JUMP: return "JUMP"; case OP_FORK: return "FORK"; case OP_FAIL: return "FAIL"; case OP_CALL: return "CALL"; case OP_RET: return "RET"; case OP_LITERAL: return "LITERAL"; case OP_LITERAL_EXT: return "LITERAL_EXT"; case OP_LITERAL_CI: return "LITERAL_CI"; case OP_CHAR: return "CHAR"; case OP_ANY: return "ANY"; case OP_CHARSET: return "CHARSET"; case OP_FIELD_WORD: return "FIELD_WORD"; case OP_FIELD_INT: return "FIELD_INT"; case OP_FIELD_UINT: return "FIELD_UINT"; case OP_FIELD_FLOAT: return "FIELD_FLOAT"; case OP_FIELD_IPV4: return "FIELD_IPV4"; case OP_FIELD_IPV6: return "FIELD_IPV6"; case OP_FIELD_HEX: return "FIELD_HEX"; case OP_FIELD_QUOTED: return "FIELD_QUOTED"; case OP_FIELD_CHAR_TO: return "FIELD_CHAR_TO"; case OP_FIELD_STR_TO: return "FIELD_STR_TO"; case OP_FIELD_REST: return "FIELD_REST"; case OP_FIELD_JSON: return "FIELD_JSON"; case OP_FIELD_MAC: return "FIELD_MAC"; case OP_FIELD_DATE: return "FIELD_DATE"; case OP_FIELD_REGEX: return "FIELD_REGEX"; case OP_FIELD_NAME_VALUE: return "FIELD_NAME_VALUE"; case OP_SKIP_SPACE: return "SKIP_SPACE"; case OP_SKIP_SPACE1: return "SKIP_SPACE1"; case OP_SKIP_N: return "SKIP_N"; case OP_SKIP_TO: return "SKIP_TO"; case OP_SKIP_PAST: return "SKIP_PAST"; case OP_SKIP_LINE: return "SKIP_LINE"; case OP_TAG: return "TAG"; case OP_RULE_ID: return "RULE_ID"; case OP_STATIC_FIELD: return "STATIC_FIELD"; case OP_CTX_PUSH: return "CTX_PUSH"; case OP_CTX_POP: return "CTX_POP"; case OP_CTX_NEST: return "CTX_NEST"; case OP_CTX_UNNEST: return "CTX_UNNEST"; case OP_ASSERT_CHAR: return "ASSERT_CHAR"; case OP_ASSERT_END: return "ASSERT_END"; case OP_ASSERT_START: return "ASSERT_START"; case OP_SYSLOG_PRI: return "SYSLOG_PRI"; case OP_SYSLOG_TS: return "SYSLOG_TS"; case OP_CEF_HDR: return "CEF_HDR"; case OP_V2_IPTABLES: return "V2_IPTABLES"; case OP_CEE_SYSLOG: return "CEE_SYSLOG"; case OP_CHECKPOINT_LEA: return "CHECKPOINT_LEA"; case OP_NOP: return "NOP"; case OP_DEBUG: return "DEBUG"; case OP_INVALID: return "INVALID"; default: return "UNKNOWN"; } } /*============================================================================ * Disassembly *============================================================================*/ int ln_instr_disasm(const ln_instr_t *inst, char *buf, size_t len) { const char *name; int n; if (!inst || !buf || len == 0) return 0; name = ln_opcode_name(inst->op); n = 0; switch (inst->op) { case OP_LITERAL: case OP_LITERAL_CI: n = snprintf(buf, len, "%s \"%.*s\"", name, (int)inst->aux, inst->data.str); break; case OP_CHAR: if (isprint((unsigned char)inst->data.str[0])) { n = snprintf(buf, len, "%s '%c'", name, inst->data.str[0]); } else { n = snprintf(buf, len, "%s 0x%02x", name, (unsigned char)inst->data.str[0]); } break; case OP_JUMP: case OP_FORK: case OP_CALL: n = snprintf(buf, len, "%s %+d", name, inst->data.jump.offset); break; case OP_MATCH: case OP_TAG: case OP_RULE_ID: case OP_CTX_PUSH: case OP_CTX_NEST: case OP_FIELD_WORD: case OP_FIELD_INT: case OP_FIELD_UINT: case OP_FIELD_FLOAT: case OP_FIELD_IPV4: case OP_FIELD_IPV6: case OP_FIELD_HEX: case OP_FIELD_QUOTED: case OP_FIELD_REST: case OP_FIELD_JSON: case OP_FIELD_MAC: case OP_FIELD_DATE: case OP_V2_IPTABLES: case OP_CEE_SYSLOG: n = snprintf(buf, len, "%s \"%s\"", name, inst->data.str); break; case OP_STATIC_FIELD: n = snprintf(buf, len, "%s \"%s\"=\"%s\"", name, inst->data.kv.key, inst->data.kv.val); break; case OP_FIELD_CHAR_TO: case OP_FIELD_STR_TO: if (isprint(inst->data.char_to.delim)) { n = snprintf(buf, len, "%s \"%s\" delim='%c'", name, inst->data.char_to.name, inst->data.char_to.delim); } else { n = snprintf(buf, len, "%s \"%s\" delim=0x%02x", name, inst->data.char_to.name, inst->data.char_to.delim); } break; case OP_CHECKPOINT_LEA: if (inst->data.char_to.delim) { n = snprintf(buf, len, "%s \"%s\" term='%c'", name, inst->data.char_to.name, inst->data.char_to.delim); } else { n = snprintf(buf, len, "%s \"%s\"", name, inst->data.char_to.name); } break; case OP_FIELD_NAME_VALUE: n = snprintf(buf, len, "%s \"%s\" sep=%s ass=%s", name, inst->data.char_to.name, inst->data.char_to.delim ? (char[]){inst->data.char_to.delim, 0} : "ws", inst->data.char_to.ass ? (char[]){inst->data.char_to.ass, 0} : "'='"); break; case OP_SKIP_N: n = snprintf(buf, len, "%s %u", name, inst->aux); break; default: n = snprintf(buf, len, "%s", name); break; } return n; } void ln_program_disasm(const ln_program_t *prog, FILE *fp) { char buf[128]; uint32_t i; if (!prog || !fp) return; fprintf(fp, "=== Program: %s ===\n", prog->name ? prog->name : "(unnamed)"); fprintf(fp, "Instructions: %u\n\n", prog->code_len); for (i = 0; i < prog->code_len; i++) { ln_instr_disasm(&prog->code[i], buf, sizeof(buf)); fprintf(fp, "[%4u] %s\n", i, buf); } fprintf(fp, "\n"); } /*============================================================================ * VM Lifecycle *============================================================================*/ int ln_vm_init(ln_vm_t *vm, ln_arena_t *arena) { if (!vm || !arena) return LN_VM_ERROR; memset(vm, 0, sizeof(*vm)); vm->arena = arena; return LN_VM_OK; } void ln_vm_reset(ln_vm_t *vm) { if (!vm) return; vm->prog = NULL; vm->input = NULL; vm->input_end = NULL; vm->pc = 0; vm->ip = NULL; vm->fork_sp = 0; vm->call_sp = 0; vm->field_ctx_sp = 0; vm->result = NULL; vm->instr_count = 0; vm->backtrack_count = 0; vm->matched_rule = NULL; vm->error = NULL; } /*============================================================================ * Field Context Stack Operations (for ".." substitution) *============================================================================*/ static inline bool vm_push_field_ctx(ln_vm_t *vm, const char *name, bool is_nested) { ln_field_ctx_t *ctx; if (vm->field_ctx_sp >= LN_VM_MAX_FIELD_CTX) { vm->error = "field context stack overflow"; return false; } ctx = &vm->field_ctx[vm->field_ctx_sp++]; ctx->name = name; ctx->name_len = name ? (uint16_t)strlen(name) : 0; ctx->is_nested = is_nested ? 1 : 0; TRACE("[CTX] push \"%s\" (sp=%u, nested=%d)\n", name ? name : "(null)", vm->field_ctx_sp, is_nested); return true; } static inline bool vm_pop_field_ctx(ln_vm_t *vm) { if (vm->field_ctx_sp == 0) { vm->error = "field context stack underflow"; return false; } vm->field_ctx_sp--; TRACE("[CTX] pop (sp=%u)\n", vm->field_ctx_sp); return true; } static inline const char * vm_get_context_name(ln_vm_t *vm) { if (vm->field_ctx_sp == 0) return NULL; return vm->field_ctx[vm->field_ctx_sp - 1].name; } static inline uint16_t vm_get_context_name_len(ln_vm_t *vm) { if (vm->field_ctx_sp == 0) return 0; return vm->field_ctx[vm->field_ctx_sp - 1].name_len; } /** * @brief Resolve ".." field name to parent context name. */ static inline const char * __attribute__((unused)) vm_resolve_field_name(ln_vm_t *vm, const char *name, uint16_t *out_len) { if (name && name[0] == '.' && name[1] == '.' && (name[2] == '\0' || name[2] == '.')) { const char *ctx_name = vm_get_context_name(vm); if (ctx_name && ctx_name[0]) { *out_len = vm_get_context_name_len(vm); TRACE("[RESOLVE] \"%s\" -> \"%s\"\n", name, ctx_name); return ctx_name; } TRACE("[RESOLVE] \"%s\" -> NULL (no context)\n", name); *out_len = 0; return NULL; } *out_len = (uint16_t)strlen(name); return name; } /*============================================================================ * Fork Stack Operations *============================================================================*/ static inline bool vm_push_fork(ln_vm_t *vm, uint32_t alt_pc) { ln_fork_t *f; if (vm->fork_sp >= LN_VM_MAX_FORKS) { vm->error = "fork stack overflow"; return false; } f = &vm->forks[vm->fork_sp++]; f->pc = alt_pc; f->ip = vm->ip; f->n_fields = vm->result ? vm->result->n_fields : 0; f->n_tags = vm->result ? vm->result->n_tags : 0; f->call_sp = vm->call_sp; f->field_ctx_sp = vm->field_ctx_sp; return true; } static inline bool vm_pop_fork(ln_vm_t *vm) { ln_fork_t *f; if (vm->fork_sp == 0) return false; vm->backtrack_count++; f = &vm->forks[--vm->fork_sp]; vm->pc = f->pc; vm->ip = f->ip; vm->call_sp = f->call_sp; vm->field_ctx_sp = f->field_ctx_sp; if (vm->result) { vm->result->n_fields = f->n_fields; vm->result->n_tags = f->n_tags; } return true; } /*============================================================================ * Optimized Field Extraction Helpers *============================================================================*/ /** * @brief Build prefixed field name for nested context. * * When inside custom type context "foo" and field is "bar", * produces "foo.bar". For ".." produces just "foo". * Uses arena for allocation. */ static inline const char * vm_build_field_name(ln_vm_t *vm, const char *name, uint16_t *out_len) { const char *ctx_name; uint16_t ctx_len; size_t name_slen; uint16_t name_len; bool all_dots; uint16_t i; uint16_t total_len; char *prefixed; /* Handle ".." - resolve to context name only */ if (name && name[0] == '.' && name[1] == '.' && (name[2] == '\0' || name[2] == '.')) { ctx_name = vm_get_context_name(vm); /* Skip root context "." */ if (ctx_name && ctx_name[0] && !(ctx_name[0] == '.' && ctx_name[1] == '\0')) { *out_len = vm_get_context_name_len(vm); return ctx_name; } *out_len = 0; return NULL; } /* If no context, use field name as-is */ if (vm->field_ctx_sp == 0 || !name || !name[0]) { if (name) { size_t slen = strlen(name); if (slen > UINT16_MAX) { *out_len = 0; return NULL; } *out_len = (uint16_t)slen; } else { *out_len = 0; } return name; } /* Get context name */ ctx_name = vm_get_context_name(vm); ctx_len = vm_get_context_name_len(vm); /* Skip root context "." or empty - don't prefix */ if (!ctx_name || ctx_len == 0 || (ctx_len == 1 && ctx_name[0] == '.') || (ctx_len == 0 && ctx_name && ctx_name[0] == '\0')) { size_t slen = strlen(name); if (slen > UINT16_MAX) { *out_len = 0; return NULL; } *out_len = (uint16_t)slen; return name; } name_slen = strlen(name); if (name_slen > UINT16_MAX) { *out_len = 0; return NULL; } name_len = (uint16_t)name_slen; /* SAFETY: Skip if context name contains only dots */ all_dots = true; for (i = 0; i < ctx_len; i++) { if (ctx_name[i] != '.') { all_dots = false; break; } } if (all_dots) { *out_len = name_len; return name; } /* Guard against uint16_t overflow on total length */ if ((size_t)ctx_len + 1 + name_len > UINT16_MAX) { *out_len = name_len; return name; /* Fallback to unprefixed */ } total_len = ctx_len + 1 + name_len; /* "ctx.name" */ /* Allocate from arena */ prefixed = ln_arena_alloc(vm->arena, total_len + 1); if (!prefixed) { *out_len = name_len; return name; /* Fallback to unprefixed */ } memcpy(prefixed, ctx_name, ctx_len); prefixed[ctx_len] = '.'; memcpy(prefixed + ctx_len + 1, name, name_len); prefixed[total_len] = '\0'; *out_len = total_len; return prefixed; } /** * @brief Add string field using fast result. * * The name pointer comes from instruction data which is stable * for the lifetime of the program. No copying needed. */ static inline bool vm_add_string_field(ln_vm_t *vm, const char *name, const char *val, size_t len) { uint16_t name_len; const char *full_name; if (!vm->result) return true; /* Build prefixed name if inside context */ full_name = vm_build_field_name(vm, name, &name_len); if (!full_name || !full_name[0]) return true; /* Direct add - name pointer is stable (from instruction or arena) */ return ln_fast_add_string_static(vm->result, full_name, name_len, val, len) == 0; } /** * @brief Add integer field using fast result. */ static inline bool vm_add_int_field(ln_vm_t *vm, const char *name, int64_t val) { uint16_t name_len; const char *full_name; if (!vm->result) return true; full_name = vm_build_field_name(vm, name, &name_len); if (!full_name || !full_name[0]) return true; return ln_fast_add_int_static(vm->result, full_name, name_len, val) == 0; } /*============================================================================ * Inline Parser Implementations *============================================================================*/ static inline int parse_word(const char *buf, size_t len, size_t *out_len) { size_t i = 0; while (i < len && buf[i] != ' ') i++; if (i == 0) return -1; *out_len = i; return 0; } static inline int parse_number(const char *buf, size_t len, int64_t *value, size_t *out_len) { size_t i = 0; uint64_t val = 0; int neg = 0; size_t start; if (len == 0) return -1; if (buf[0] == '-') { neg = 1; i++; } else if (buf[0] == '+') { i++; } start = i; while (i < len && myisdigit(buf[i])) { int digit = buf[i] - '0'; if (val > (UINT64_MAX - (uint64_t)digit) / 10) return -1; /* overflow */ val = val * 10 + (uint64_t)digit; i++; } if (i == start) return -1; /* Range-check before converting to signed */ if (neg) { if (val > (uint64_t)INT64_MAX + 1) return -1; /* underflow: value more negative than INT64_MIN */ *value = -(int64_t)val; } else { if (val > (uint64_t)INT64_MAX) return -1; /* overflow: value exceeds INT64_MAX */ *value = (int64_t)val; } *out_len = i; return 0; } static inline int parse_float(const char *buf, size_t len, size_t *out_len) { size_t i = 0; int seen_point = 0; int digits = 0; if (len == 0) return -1; if (buf[0] == '-') i++; for (; i < len; i++) { if (buf[i] == '.') { if (seen_point) break; seen_point = 1; } else if (myisdigit(buf[i])) { digits++; } else { break; } } if (digits == 0) return -1; *out_len = i; return 0; } static inline int parse_hex(const char *buf, size_t len, int64_t *value, size_t *out_len) { size_t i; uint64_t val; int ndigits; if (len < 3) return -1; if (buf[0] != '0' || (buf[1] != 'x' && buf[1] != 'X')) return -1; i = 2; val = 0; ndigits = 0; while (i < len && isxdigit((unsigned char)buf[i])) { char c; int digit; ndigits++; if (ndigits > 16) return -1; /* overflow: >16 hex digits exceeds uint64_t */ c = buf[i]; if (c >= '0' && c <= '9') digit = c - '0'; else if (c >= 'a' && c <= 'f') digit = c - 'a' + 10; else digit = c - 'A' + 10; val = val * 16 + digit; i++; } if (i == 2) return -1; *value = (int64_t)val; *out_len = i; return 0; } static inline int check_ipv4_byte(const char *buf, size_t len, size_t *pos) { int val = 0; size_t i = *pos; if (i >= len || !myisdigit(buf[i])) return -1; val = buf[i++] - '0'; if (i < len && myisdigit(buf[i])) { val = val * 10 + buf[i++] - '0'; if (i < len && myisdigit(buf[i])) val = val * 10 + buf[i++] - '0'; } if (val > 255) return -1; *pos = i; return 0; } static inline int parse_ipv4(const char *buf, size_t len, size_t *out_len) { size_t i = 0; if (len < 7) return -1; if (check_ipv4_byte(buf, len, &i) != 0) return -1; if (i >= len || buf[i++] != '.') return -1; if (check_ipv4_byte(buf, len, &i) != 0) return -1; if (i >= len || buf[i++] != '.') return -1; if (check_ipv4_byte(buf, len, &i) != 0) return -1; if (i >= len || buf[i++] != '.') return -1; if (check_ipv4_byte(buf, len, &i) != 0) return -1; *out_len = i; return 0; } static inline int parse_ipv6(const char *buf, size_t len, size_t *out_len) { size_t i = 0; while (i < len) { if (isxdigit((unsigned char)buf[i])) { i++; } else if (buf[i] == ':' && i > 0) { i++; } else if (buf[i] == '.' && i >= 2) { while (i < len && (myisdigit(buf[i]) || buf[i] == '.')) i++; break; } else { break; } } if (i < 2) return -1; *out_len = i; return 0; } static inline int parse_char_to(const char *buf, size_t len, char delim, size_t *out_len) { size_t i = 0; while (i < len && buf[i] != delim) i++; if (i == 0) return -1; *out_len = i; return 0; } static inline int parse_op_quoted(const char *buf, size_t len, size_t *start, size_t *out_len, size_t *consumed) { char quote; if (len == 0) return -1; quote = buf[0]; if (quote == '"' || quote == '\'') { size_t i = 1; while (i < len) { if (buf[i] == '\\' && i + 1 < len) { i += 2; } else if (buf[i] == quote) { *start = 1; *out_len = i - 1; *consumed = i + 1; return 0; } else { i++; } } return -1; } else { size_t wlen; if (parse_word(buf, len, &wlen) != 0) return -1; *start = 0; *out_len = wlen; *consumed = wlen; return 0; } } static inline int parse_json(const char *buf, size_t len, size_t *out_len) { char open; char close; int depth = 0; bool in_string = false; size_t i; if (len == 0) return -1; open = buf[0]; if (open == '{') close = '}'; else if (open == '[') close = ']'; else return -1; for (i = 0; i < len; i++) { char c = buf[i]; if (in_string) { if (c == '\\' && i + 1 < len) { i++; } else if (c == '"') { in_string = false; } } else { if (c == '"') { in_string = true; } else if (c == open) { depth++; } else if (c == close) { depth--; if (depth == 0) { *out_len = i + 1; return 0; } } } } return -1; } static inline int parse_mac48(const char *buf, size_t len, size_t *out_len) { char sep; size_t i; int octet; if (len < 17) return -1; sep = 0; i = 0; for (octet = 0; octet < 6; octet++) { if (!isxdigit((unsigned char)buf[i]) || !isxdigit((unsigned char)buf[i + 1])) return -1; i += 2; if (octet < 5) { if (sep == 0) { if (buf[i] == ':' || buf[i] == '-') sep = buf[i]; else return -1; } else if (buf[i] != sep) return -1; i++; } } *out_len = i; return 0; } /*============================================================================ * Instruction Execution (legacy switch-based — kept for reference/fallback) *============================================================================ * * NOTE: The primary execution path is now ln_vm_continue() below, which * uses computed goto dispatch (via turbo_vm_opt.h macros). * vm_exec_instr() is retained for: * - Disassembly/debug tools that need per-instruction stepping * - Compilers without computed goto support (MSVC fallback path) * * When LN_VM_COMPUTED_GOTO == 1, ln_vm_continue() bypasses this function * entirely and dispatches inline for maximum throughput. *============================================================================*/ static int __attribute__((unused)) vm_exec_instr(ln_vm_t *vm) { const ln_instr_t *inst; size_t remaining; if (UNLIKELY(vm->pc >= vm->prog->code_len)) { vm->error = "PC out of bounds"; return -1; } inst = &vm->prog->code[vm->pc]; remaining = (size_t)(vm->input_end - vm->ip); vm->instr_count++; TRACE("[%u] %s ip=%zu rem=%zu ctx_sp=%u\n", vm->pc, ln_opcode_name(inst->op), (size_t)(vm->ip - vm->input), remaining, vm->field_ctx_sp); switch (inst->op) { /*=== Control ===*/ case OP_HALT: return -1; case OP_MATCH: vm->matched_rule = inst->data.str; if (vm->result) { ln_fast_set_rule_id(vm->result, inst->data.str); } return 0; case OP_JUMP: vm->pc += inst->data.jump.offset; return 1; case OP_FORK: { uint32_t alt_pc = vm->pc + inst->data.jump.offset; if (!vm_push_fork(vm, alt_pc)) return -1; vm->pc++; return 1; } case OP_FAIL: return -1; case OP_CALL: { if (vm->call_sp >= LN_VM_MAX_CALLS) { vm->error = "call stack overflow"; return -1; } vm->calls[vm->call_sp++] = vm->pc + 1; vm->pc += inst->data.jump.offset; return 1; } case OP_RET: { if (vm->call_sp == 0) { vm->error = "call stack underflow"; return -1; } vm->pc = vm->calls[--vm->call_sp]; return 1; } /*=== Field Context ===*/ case OP_CTX_PUSH: { if (!vm_push_field_ctx(vm, inst->data.str, false)) return -1; vm->pc++; return 1; } case OP_CTX_POP: { if (!vm_pop_field_ctx(vm)) return -1; vm->pc++; return 1; } case OP_CTX_NEST: { if (!vm_push_field_ctx(vm, inst->data.str, true)) return -1; vm->pc++; return 1; } case OP_CTX_UNNEST: { if (!vm_pop_field_ctx(vm)) return -1; vm->pc++; return 1; } /*=== Literals ===*/ case OP_LITERAL: { uint16_t len = inst->aux; if (remaining < len) return -1; if (memcmp(vm->ip, inst->data.str, len) != 0) return -1; vm->ip += len; vm->pc++; return 1; } case OP_LITERAL_CI: { uint16_t len = inst->aux; if (remaining < len) return -1; for (uint16_t i = 0; i < len; i++) { if (tolower((unsigned char)vm->ip[i]) != tolower((unsigned char)inst->data.str[i])) { return -1; } } vm->ip += len; vm->pc++; return 1; } case OP_CHAR: { if (remaining < 1) return -1; if (*vm->ip != inst->data.str[0]) return -1; vm->ip++; vm->pc++; return 1; } case OP_ANY: { if (remaining < 1) return -1; vm->ip++; vm->pc++; return 1; } /*=== Fields ===*/ case OP_FIELD_WORD: { const char *name = inst->data.str; ln_span_t span; if (ln_simd_word(vm->ip, remaining, &span) != LN_SIMD_OK) return -1; vm_add_string_field(vm, name, span.start, span.len); vm->ip += span.consumed; vm->pc++; return 1; } case OP_FIELD_INT: { const char *name = inst->data.str; int64_t value; size_t len; if (parse_number(vm->ip, remaining, &value, &len) != 0) return -1; vm_add_int_field(vm, name, value); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_UINT: { const char *name = inst->data.str; int64_t value; size_t len; if (parse_number(vm->ip, remaining, &value, &len) != 0) return -1; if (vm->ip[0] == '-') return -1; vm_add_int_field(vm, name, value); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_FLOAT: { const char *name = inst->data.str; size_t len; if (parse_float(vm->ip, remaining, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_IPV4: { const char *name = inst->data.str; size_t len; if (parse_ipv4(vm->ip, remaining, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_IPV6: { const char *name = inst->data.str; size_t len; if (parse_ipv6(vm->ip, remaining, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_HEX: { const char *name = inst->data.str; int64_t value; size_t len; if (parse_hex(vm->ip, remaining, &value, &len) != 0) return -1; vm_add_int_field(vm, name, value); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_QUOTED: { const char *name = inst->data.str; size_t start, len, consumed; if (parse_op_quoted(vm->ip, remaining, &start, &len, &consumed) != 0) return -1; vm_add_string_field(vm, name, vm->ip + start, len); vm->ip += consumed; vm->pc++; return 1; } case OP_FIELD_CHAR_TO: { const char *name = inst->data.char_to.name; char delim = (char)inst->data.char_to.delim; ln_span_t span; if (ln_simd_char_to(vm->ip, remaining, delim, &span) != LN_SIMD_OK) return -1; vm_add_string_field(vm, name, span.start, span.len); vm->ip += span.consumed; vm->pc++; return 1; } case OP_FIELD_STR_TO: { const char *name = inst->data.char_to.name; char delim = (char)inst->data.char_to.delim; size_t len; if (parse_char_to(vm->ip, remaining, delim, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_REST: { const char *name = inst->data.str; vm_add_string_field(vm, name, vm->ip, remaining); vm->ip += remaining; vm->pc++; return 1; } case OP_FIELD_DATE: { const char *name = inst->data.str; ln_span_t span; int rc = ln_simd_timestamp(vm->ip, remaining, &span, NULL); if (rc != LN_SIMD_OK) return -1; vm_add_string_field(vm, name, span.start, span.len); vm->ip += span.consumed; vm->pc++; return 1; } case OP_FIELD_JSON: { const char *name = inst->data.str; size_t len; if (parse_json(vm->ip, remaining, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } case OP_FIELD_MAC: { const char *name = inst->data.str; size_t len; if (parse_mac48(vm->ip, remaining, &len) != 0) return -1; vm_add_string_field(vm, name, vm->ip, len); vm->ip += len; vm->pc++; return 1; } /*=== Name-Value-List ===*/ case OP_FIELD_NAME_VALUE: { /* * Parse name=value pairs from the remaining input. * Supports: name=value, name="value", name='value', name= (empty) * Separator between pairs: whitespace (default) or custom char. * Assignator between name/value: '=' (default) or custom char. * * Each extracted field is added with the context prefix from the * instruction's name field (e.g., "sns" → fields become "sns.key"). * * Uses SIMD primitives for scanning where possible. */ const char *ctx_name = inst->data.char_to.name; const char sep = (char)inst->data.char_to.delim; /* 0 = whitespace */ const char ass = (char)inst->data.char_to.ass; /* 0 = '=' */ const char ass_char = ass ? ass : '='; const char *p; const char *end; int n_pairs; size_t consumed; /* Push the context so fields are nested under ctx_name */ if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } p = vm->ip; end = vm->ip + remaining; n_pairs = 0; while (p < end) { /* --- Parse name --- */ const char *name_start = p; size_t name_end_off; size_t name_len; const char *val_start; size_t val_len; char *arena_name; /* Scan for assignator char using SIMD find_char */ name_end_off = ln_simd_find_char(p, (size_t)(end - p), ass_char); if (name_end_off >= (size_t)(end - p)) { /* No more assignator found — done */ break; } /* Validate name: must be non-empty */ name_len = name_end_off; if (name_len == 0) break; /* Validate name characters: alnum, '.', '_', '-' */ if (ass == 0) { /* Default mode: strict name validation */ int valid = 1; size_t k; for (k = 0; k < name_len; k++) { unsigned char c = (unsigned char)p[k]; if (!(isalnum(c) || c == '.' || c == '_' || c == '-')) { valid = 0; break; } } if (!valid) break; } /* else: custom assignator mode — name is anything before ass */ p += name_len + 1; /* skip name + assignator */ /* --- Parse value --- */ if (p < end && (*p == '"' || *p == '\'')) { /* Quoted value */ char quote = *p; int backslashes = 0; p++; /* skip opening quote */ val_start = p; /* Scan for closing quote, handling backslash escapes */ while (p < end) { if (*p == quote && (backslashes % 2 == 0)) break; if (*p == '\\') backslashes++; else backslashes = 0; p++; } val_len = (size_t)(p - val_start); if (p < end && *p == quote) { p++; /* skip closing quote */ } else { /* Unterminated quote — fail */ break; } } else { /* Unquoted value: scan to separator */ val_start = p; if (sep) { /* Custom separator */ size_t off = ln_simd_find_char(p, (size_t)(end - p), sep); val_len = (off < (size_t)(end - p)) ? off : (size_t)(end - p); } else { /* Whitespace separator — scan to first whitespace */ val_len = 0; while (p + val_len < end && !isspace((unsigned char)p[val_len])) val_len++; } p += val_len; } /* --- Store the field --- */ /* Arena-allocate name so it outlives this stack frame */ if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; /* Add field directly (context prefix handled by vm_add_string_field) */ vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; /* --- Skip separator(s) --- */ if (sep) { while (p < end && *p == sep) p++; } else { while (p < end && isspace((unsigned char)*p)) p++; } } /* Pop context */ if (ctx_name[0]) { vm_pop_field_ctx(vm); } /* Must have parsed at least one pair */ if (n_pairs == 0) return -1; consumed = (size_t)(p - vm->ip); vm->ip += consumed; vm->pc++; return 1; } /*=== Skipping ===*/ case OP_SKIP_SPACE: { vm->ip += ln_simd_skip_space(vm->ip, remaining); vm->pc++; return 1; } case OP_SKIP_SPACE1: { size_t skipped; if (remaining == 0 || !isspace((unsigned char)vm->ip[0])) return -1; skipped = 1; while (skipped < remaining && isspace((unsigned char)vm->ip[skipped])) skipped++; vm->ip += skipped; vm->pc++; return 1; } case OP_SKIP_N: { uint16_t n = inst->aux; if (remaining < n) return -1; vm->ip += n; vm->pc++; return 1; } case OP_SKIP_TO: { char c = inst->data.str[0]; size_t pos = 0; while (pos < remaining && vm->ip[pos] != c) pos++; if (pos >= remaining) return -1; vm->ip += pos; vm->pc++; return 1; } case OP_SKIP_PAST: { char c = inst->data.str[0]; size_t pos = 0; while (pos < remaining && vm->ip[pos] != c) pos++; if (pos >= remaining) return -1; vm->ip += pos + 1; vm->pc++; return 1; } case OP_SKIP_LINE: { size_t pos = 0; while (pos < remaining && vm->ip[pos] != '\n') pos++; vm->ip += (pos < remaining) ? pos + 1 : remaining; vm->pc++; return 1; } /*=== Tags ===*/ case OP_TAG: { if (vm->result && inst->data.str[0]) { ln_fast_add_tag(vm->result, inst->data.str); } vm->pc++; return 1; } case OP_RULE_ID: { if (vm->result && inst->data.str[0]) { ln_fast_set_rule_id(vm->result, inst->data.str); } vm->pc++; return 1; } case OP_STATIC_FIELD: { /* Add a pre-baked key=value field from compile-time annotation resolution. * key is in data.kv.key (null-terminated), key length in aux. * value is in data.kv.val (null-terminated). * Zero per-message cost beyond the ln_fast_add_string_static call. */ if (vm->result && inst->data.kv.key[0]) { uint16_t klen = inst->aux; size_t vlen = strlen(inst->data.kv.val); ln_fast_add_string_static(vm->result, inst->data.kv.key, klen, inst->data.kv.val, (uint32_t)vlen); } vm->pc++; return 1; } /*=== Assertions ===*/ case OP_ASSERT_CHAR: { if (remaining < 1) return -1; if (*vm->ip != inst->data.str[0]) return -1; vm->pc++; return 1; } case OP_ASSERT_END: { if (remaining > 0) return -1; vm->pc++; return 1; } case OP_ASSERT_START: { if (vm->ip != vm->input) return -1; vm->pc++; return 1; } /*=== Special ===*/ case OP_SYSLOG_PRI: { const char *p; const char *end; uint32_t pri; int digits; uint8_t facility; uint8_t severity; if (remaining < 3) return -1; if (vm->ip[0] != '<') return -1; p = vm->ip + 1; end = vm->input_end; pri = 0; digits = 0; while (p < end && *p >= '0' && *p <= '9' && digits < 3) { pri = pri * 10 + (*p - '0'); p++; digits++; } if (p >= end || *p != '>' || digits == 0) return -1; if (pri > 191) return -1; p++; facility = pri / 8; severity = pri % 8; vm_add_int_field(vm, "facility", facility); vm_add_int_field(vm, "severity", severity); vm->ip = p; vm->pc++; return 1; } /*=== iptables name=value ===*/ case OP_V2_IPTABLES: { const char *ctx_name = inst->data.str; const char *p = vm->ip; const char *end = vm->input_end; int n_pairs = 0; if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } while (p < end) { const char *name_start; size_t name_len; size_t eq_off; const char *val_start; size_t val_len; char *arena_name; /* Skip leading spaces */ while (p < end && *p == ' ') p++; if (p >= end) break; name_start = p; /* Scan for '=' or space (flag without value) */ eq_off = ln_simd_find_char(p, (size_t)(end - p), '='); /* Check if a space comes before '=' (flag) */ { size_t sp_off; sp_off = ln_simd_find_char(p, (size_t)(end - p), ' '); if (sp_off < eq_off) { /* Flag: name without '=' — store as null */ name_len = sp_off; if (name_len == 0) break; if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, "", 0); n_pairs++; p += sp_off; continue; } } if (eq_off >= (size_t)(end - p)) { /* No '=' found — check if rest is a flag */ name_len = (size_t)(end - p); if (name_len == 0) break; if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, "", 0); n_pairs++; p = end; break; } name_len = eq_off; if (name_len == 0) break; /* Validate name: alphanumeric + underscore */ { int valid = 1; size_t k; for (k = 0; k < name_len; k++) { unsigned char c = (unsigned char)p[k]; if (!(isalnum(c) || c == '_')) { valid = 0; break; } } if (!valid) break; } p += name_len + 1; /* skip name + '=' */ /* Parse value: everything until space or end */ val_start = p; { size_t sp_off; sp_off = ln_simd_find_char(p, (size_t)(end - p), ' '); val_len = (sp_off < (size_t)(end - p)) ? sp_off : (size_t)(end - p); } p += val_len; /* Store field */ if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; } if (ctx_name[0]) { vm_pop_field_ctx(vm); } /* Require minimum 2 pairs */ if (n_pairs < 2) return -1; vm->ip = p; vm->pc++; return 1; } /*=== CEE-syslog ===*/ case OP_CEE_SYSLOG: { const char *name = inst->data.str; const char *p = vm->ip; size_t rem = remaining; size_t json_len; /* Must start with "@cee:" (5 chars) */ if (rem < 6) return -1; if (memcmp(p, "@cee:", 5) != 0) return -1; p += 5; rem -= 5; /* Skip optional whitespace after ':' */ while (rem > 0 && (*p == ' ' || *p == '\t')) { p++; rem--; } /* Must have '{' for JSON object */ if (rem == 0 || *p != '{') return -1; /* Parse JSON body */ if (parse_json(p, rem, &json_len) != 0) return -1; /* JSON must consume rest of input */ if (json_len != rem) return -1; vm_add_string_field(vm, name, p, json_len); vm->ip = p + json_len; vm->pc++; return 1; } /*=== Checkpoint LEA ===*/ case OP_CHECKPOINT_LEA: { const char *ctx_name = inst->data.char_to.name; const char term = (char)inst->data.char_to.delim; const char *p = vm->ip; const char *end = vm->input_end; int n_pairs = 0; if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } while (p < end) { const char *name_start; size_t name_len; size_t colon_off; const char *val_start; size_t val_len; size_t semi_off; char *arena_name; /* Skip leading spaces */ while (p < end && *p == ' ') p++; if (p >= end) break; /* Check for early terminator */ if (term && *p == term) break; name_start = p; /* Scan for ':' */ colon_off = ln_simd_find_char(p, (size_t)(end - p), ':'); if (colon_off >= (size_t)(end - p)) break; name_len = colon_off; if (name_len == 0) break; p += name_len + 1; /* skip name + ':' */ /* Skip spaces after ':' */ while (p < end && *p == ' ') p++; /* Scan for ';' */ val_start = p; semi_off = ln_simd_find_char(p, (size_t)(end - p), ';'); if (semi_off >= (size_t)(end - p)) { /* No semicolon — take rest as value */ val_len = (size_t)(end - p); p = end; } else { val_len = semi_off; p += val_len + 1; /* skip value + ';' */ } /* Store field */ if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; } if (ctx_name[0]) { vm_pop_field_ctx(vm); } /* Require minimum 1 pair */ if (n_pairs < 1) return -1; vm->ip = p; vm->pc++; return 1; } /*=== CEF header ===*/ case OP_CEF_HDR: { const char *name = inst->data.str; const char *p = vm->ip; const char *end = vm->input_end; size_t rem = remaining; int hdr_idx; /* Fixed header field names */ static const char *hdr_names[6] = { "DeviceVendor", "DeviceProduct", "DeviceVersion", "SignatureID", "Name", "Severity" }; /* Must start with "CEF:0|" (6 chars) */ if (rem < 6) return -1; if (memcmp(p, "CEF:0|", 6) != 0) return -1; p += 6; if (name[0]) { vm_push_field_ctx(vm, name, false); } /* Parse 6 pipe-delimited header fields with escape handling */ for (hdr_idx = 0; hdr_idx < 6; hdr_idx++) { const char *field_start = p; size_t field_len; /* Scan for unescaped '|' or end of input */ while (p < end) { if (*p == '\\' && p + 1 < end) { p += 2; /* skip escaped char */ continue; } if (*p == '|') break; p++; } field_len = (size_t)(p - field_start); vm_add_string_field(vm, hdr_names[hdr_idx], field_start, field_len); if (hdr_idx < 5) { /* Expect pipe separator between fields */ if (p >= end || *p != '|') { if (name[0]) vm_pop_field_ctx(vm); return -1; } p++; /* skip '|' */ } else { /* After last header field, skip '|' if present */ if (p < end && *p == '|') p++; } } /* Parse extensions: key=value pairs separated by spaces. * Push "Extensions" sub-context. */ if (p < end) { const char *ext_ctx = "Extensions"; int n_ext; vm_push_field_ctx(vm, ext_ctx, false); n_ext = 0; while (p < end) { const char *key_start; size_t eq_off; size_t key_len; const char *val_start; size_t val_len; const char *next_eq; const char *scan; char *arena_key; /* Skip spaces */ while (p < end && *p == ' ') p++; if (p >= end) break; key_start = p; /* Find '=' */ eq_off = ln_simd_find_char(p, (size_t)(end - p), '='); if (eq_off >= (size_t)(end - p)) break; key_len = eq_off; if (key_len == 0) break; p += key_len + 1; /* skip key + '=' */ /* Value: greedy — take everything until next key= * Scan backwards from next '=' to find the space * before the next key. */ val_start = p; next_eq = NULL; scan = p; while (scan < end) { size_t off = ln_simd_find_char(scan, (size_t)(end - scan), '='); if (off >= (size_t)(end - scan)) break; next_eq = scan + off; break; } if (next_eq && next_eq > val_start) { /* Walk back from '=' to find space before key */ const char *kstart = next_eq; while (kstart > val_start && *(kstart - 1) != ' ') kstart--; if (kstart > val_start) { val_len = (size_t)(kstart - val_start); /* Trim trailing space */ while (val_len > 0 && val_start[val_len - 1] == ' ') val_len--; p = kstart; } else { val_len = (size_t)(end - val_start); p = end; } } else { val_len = (size_t)(end - val_start); p = end; } if (key_len > 63) key_len = 63; arena_key = ln_arena_strndup(vm->arena, key_start, key_len); if (!arena_key) break; vm_add_string_field(vm, arena_key, val_start, val_len); n_ext++; } vm_pop_field_ctx(vm); /* pop Extensions */ (void)n_ext; } if (name[0]) { vm_pop_field_ctx(vm); } vm->ip = p; vm->pc++; return 1; } /*=== Debug ===*/ case OP_NOP: vm->pc++; return 1; case OP_DEBUG: fprintf(stderr, "[DEBUG] pc=%u ip=%zu ctx_sp=%u\n", vm->pc, (size_t)(vm->ip - vm->input), vm->field_ctx_sp); vm->pc++; return 1; default: { snprintf(vm->error_buf, sizeof(vm->error_buf), "unknown opcode 0x%02x at pc=%u", inst->op, vm->pc); vm->error = vm->error_buf; return -1; } } } /*============================================================================ * Main Execution Loop *============================================================================ * * Two implementations: * * 1. COMPUTED GOTO (GCC/Clang): Flat dispatch loop with direct jumps * between opcode handlers. Each handler ends with NEXT() which * increments pc and jumps directly to the next handler — no loop * overhead, no switch bounds check. Failed handlers jump to * backtrack: which pops the fork stack or returns NOMATCH. * * 2. SWITCH FALLBACK (MSVC): Traditional loop calling vm_exec_instr(). * Same semantics, ~20-30% slower dispatch. *============================================================================*/ int ln_vm_exec(ln_vm_t *vm, const ln_program_t *prog, const char *input, size_t len, ln_fast_result_t *result) { size_t leading_ws; if (UNLIKELY(!vm || !prog || !prog->code || !input)) { return LN_VM_ERROR; } vm->prog = prog; vm->input = input; vm->input_end = input + len; vm->pc = 0; vm->ip = input; vm->fork_sp = 0; vm->call_sp = 0; vm->field_ctx_sp = 0; vm->result = result; vm->instr_count = 0; vm->backtrack_count = 0; vm->matched_rule = NULL; vm->error = NULL; /* Skip leading whitespace */ leading_ws = ln_simd_skip_space(vm->ip, (size_t)(vm->input_end - vm->ip)); vm->ip += leading_ws; return ln_vm_continue(vm); } #if LN_VM_COMPUTED_GOTO /* * Computed goto dispatch loop. * * All opcode handlers are labels within this single function. * The dispatch table maps opcode bytes → label addresses. * Each handler ends with NEXT() (success → advance pc and dispatch) * or BACKTRACK() (failure → pop fork stack or return NOMATCH). * * Local variables used by handlers: * inst - current instruction pointer * remaining - bytes remaining in input * pc - program counter (local copy, written back to vm on exit) * prog - program pointer (const, avoids vm-> indirection) * * The hot path is: DISPATCH → handler → NEXT → DISPATCH → ... * Each DISPATCH does: prefetch(code[pc+1]), goto *table[code[pc].op] */ HOT_FUNC int ln_vm_continue(ln_vm_t *vm) { const ln_program_t *prog; uint32_t pc; const char *ip; const char *input_end; const uint64_t MAX_INSTRUCTIONS = 100000000; DISPATCH_INIT(); if (UNLIKELY(!vm || !vm->prog)) { return LN_VM_ERROR; } /* Hoist frequently accessed fields into local variables */ prog = vm->prog; pc = vm->pc; ip = vm->ip; input_end = vm->input_end; /* Convenience macros for the handler bodies */ #define REMAINING() ((size_t)(input_end - ip)) #define INST() (&prog->code[pc]) #define WRITEBACK() do { vm->pc = pc; vm->ip = ip; } while(0) /* Start dispatch */ DISPATCH(); /*================================================================= * Control Flow Opcodes *=================================================================*/ CASE(halt) WRITEBACK(); BACKTRACK(); CASE(match) { const ln_instr_t *inst = INST(); vm->matched_rule = inst->data.str; if (vm->result) { ln_fast_set_rule_id(vm->result, inst->data.str); } WRITEBACK(); return LN_VM_OK; } CASE(jump) { const ln_instr_t *inst = INST(); vm->instr_count++; pc += inst->data.jump.offset; DISPATCH(); } CASE(fork) { const ln_instr_t *inst = INST(); uint32_t alt_pc; vm->instr_count++; alt_pc = pc + inst->data.jump.offset; vm->pc = pc; vm->ip = ip; /* push_fork reads vm state */ if (UNLIKELY(!vm_push_fork(vm, alt_pc))) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } CASE(fail) WRITEBACK(); BACKTRACK(); CASE(call) { const ln_instr_t *inst; vm->instr_count++; if (UNLIKELY(vm->call_sp >= LN_VM_MAX_CALLS)) { vm->error = "call stack overflow"; WRITEBACK(); BACKTRACK(); } inst = INST(); vm->calls[vm->call_sp++] = pc + 1; pc += inst->data.jump.offset; DISPATCH(); } CASE(ret) { vm->instr_count++; if (UNLIKELY(vm->call_sp == 0)) { vm->error = "call stack underflow"; WRITEBACK(); BACKTRACK(); } pc = vm->calls[--vm->call_sp]; DISPATCH(); } /*================================================================= * Field Context Opcodes *=================================================================*/ CASE(ctx_push) { const ln_instr_t *inst = INST(); vm->instr_count++; if (UNLIKELY(!vm_push_field_ctx(vm, inst->data.str, false))) { WRITEBACK(); BACKTRACK(); } NEXT(); } CASE(ctx_pop) { vm->instr_count++; if (UNLIKELY(!vm_pop_field_ctx(vm))) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } CASE(ctx_nest) { const ln_instr_t *inst = INST(); vm->instr_count++; if (UNLIKELY(!vm_push_field_ctx(vm, inst->data.str, true))) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } CASE(ctx_unnest) { vm->instr_count++; if (UNLIKELY(!vm_pop_field_ctx(vm))) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } /*================================================================= * Literal Matching Opcodes *=================================================================*/ CASE(literal) { const ln_instr_t *inst = INST(); uint16_t len; vm->instr_count++; len = inst->aux; if (UNLIKELY(REMAINING() < len)) { WRITEBACK(); BACKTRACK(); } if (UNLIKELY(memcmp(ip, inst->data.str, len) != 0)) { WRITEBACK(); BACKTRACK(); } ip += len; pc++; DISPATCH(); } CASE(literal_ext) { /* External literal — same as LITERAL but data is a pointer. * Currently unused in compiled programs, but reserved. */ WRITEBACK(); BACKTRACK(); } CASE(literal_ci) { const ln_instr_t *inst = INST(); uint16_t len; vm->instr_count++; len = inst->aux; if (UNLIKELY(REMAINING() < len)) { WRITEBACK(); BACKTRACK(); } for (uint16_t i = 0; i < len; i++) { if (tolower((unsigned char)ip[i]) != tolower((unsigned char)inst->data.str[i])) { WRITEBACK(); BACKTRACK(); } } ip += len; pc++; DISPATCH(); } CASE(char) { vm->instr_count++; if (UNLIKELY(REMAINING() < 1)) { WRITEBACK(); BACKTRACK(); } if (UNLIKELY(*ip != INST()->data.str[0])) { WRITEBACK(); BACKTRACK(); } ip++; pc++; DISPATCH(); } CASE(any) { vm->instr_count++; if (UNLIKELY(REMAINING() < 1)) { WRITEBACK(); BACKTRACK(); } ip++; pc++; DISPATCH(); } CASE(charset) { /* External charset bitmap — reserved but not yet used */ WRITEBACK(); BACKTRACK(); } /*================================================================= * Field Extraction Opcodes *=================================================================*/ CASE(field_word) { const ln_instr_t *inst = INST(); ln_span_t span; vm->instr_count++; if (UNLIKELY(ln_simd_word(ip, REMAINING(), &span) != LN_SIMD_OK)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; /* vm_add reads vm->ip */ vm_add_string_field(vm, inst->data.str, span.start, span.len); ip += span.consumed; pc++; DISPATCH(); } CASE(field_int) { const ln_instr_t *inst = INST(); int64_t value; size_t len; vm->instr_count++; if (UNLIKELY(parse_number(ip, REMAINING(), &value, &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_int_field(vm, inst->data.str, value); ip += len; pc++; DISPATCH(); } CASE(field_uint) { const ln_instr_t *inst = INST(); int64_t value; size_t len; vm->instr_count++; if (UNLIKELY(parse_number(ip, REMAINING(), &value, &len) != 0)) { WRITEBACK(); BACKTRACK(); } if (UNLIKELY(ip[0] == '-')) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_int_field(vm, inst->data.str, value); ip += len; pc++; DISPATCH(); } CASE(field_float) { const ln_instr_t *inst = INST(); size_t len; vm->instr_count++; if (UNLIKELY(parse_float(ip, REMAINING(), &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_ipv4) { const ln_instr_t *inst = INST(); size_t len; vm->instr_count++; if (UNLIKELY(parse_ipv4(ip, REMAINING(), &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_ipv6) { const ln_instr_t *inst = INST(); size_t len; vm->instr_count++; if (UNLIKELY(parse_ipv6(ip, REMAINING(), &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_hex) { const ln_instr_t *inst = INST(); int64_t value; size_t len; vm->instr_count++; if (UNLIKELY(parse_hex(ip, REMAINING(), &value, &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_int_field(vm, inst->data.str, value); ip += len; pc++; DISPATCH(); } CASE(field_quoted) { const ln_instr_t *inst = INST(); size_t start, len, consumed; vm->instr_count++; if (UNLIKELY(parse_op_quoted(ip, REMAINING(), &start, &len, &consumed) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip + start, len); ip += consumed; pc++; DISPATCH(); } CASE(field_char_to) { const ln_instr_t *inst = INST(); char delim; ln_span_t span; vm->instr_count++; delim = (char)inst->data.char_to.delim; if (UNLIKELY(ln_simd_char_to(ip, REMAINING(), delim, &span) != LN_SIMD_OK)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.char_to.name, span.start, span.len); ip += span.consumed; pc++; DISPATCH(); } CASE(field_str_to) { const ln_instr_t *inst = INST(); char delim; size_t len; vm->instr_count++; delim = (char)inst->data.char_to.delim; if (UNLIKELY(parse_char_to(ip, REMAINING(), delim, &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.char_to.name, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_rest) { const ln_instr_t *inst = INST(); size_t rem; vm->instr_count++; rem = REMAINING(); vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, rem); ip += rem; pc++; DISPATCH(); } CASE(field_json) { const ln_instr_t *inst = INST(); size_t len; vm->instr_count++; if (UNLIKELY(parse_json(ip, REMAINING(), &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_mac) { const ln_instr_t *inst = INST(); size_t len; vm->instr_count++; if (UNLIKELY(parse_mac48(ip, REMAINING(), &len) != 0)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, ip, len); ip += len; pc++; DISPATCH(); } CASE(field_date) { const ln_instr_t *inst = INST(); ln_span_t span; vm->instr_count++; if (UNLIKELY(ln_simd_timestamp(ip, REMAINING(), &span, NULL) != LN_SIMD_OK)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, inst->data.str, span.start, span.len); ip += span.consumed; pc++; DISPATCH(); } CASE(field_regex) { /* OP_FIELD_REGEX: handled via external regex engine. * For now, fall through to backtrack if encountered. */ WRITEBACK(); BACKTRACK(); } CASE(field_name_value) { /* * Parse name=value pairs. This handler modifies vm->ip directly * because it uses vm_add_string_field() and vm_push/pop_field_ctx() * which read vm state. */ const ln_instr_t *inst = INST(); const char *ctx_name; const char *p; const char *end; char sep; char ass; char ass_char; int n_pairs; vm->instr_count++; vm->ip = ip; vm->pc = pc; ctx_name = inst->data.char_to.name; sep = (char)inst->data.char_to.delim; ass = (char)inst->data.char_to.ass; ass_char = ass ? ass : '='; if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } p = ip; end = input_end; n_pairs = 0; while (p < end) { const char *name_start = p; size_t name_end_off; size_t name_len; const char *val_start; size_t val_len; char *arena_name; name_end_off = ln_simd_find_char(p, (size_t)(end - p), ass_char); if (name_end_off >= (size_t)(end - p)) break; name_len = name_end_off; if (name_len == 0) break; if (ass == 0) { int valid = 1; size_t k; for (k = 0; k < name_len; k++) { unsigned char c = (unsigned char)p[k]; if (!(isalnum(c) || c == '.' || c == '_' || c == '-')) { valid = 0; break; } } if (!valid) break; } p += name_len + 1; if (p < end && (*p == '"' || *p == '\'')) { char quote = *p; int backslashes = 0; p++; val_start = p; while (p < end) { if (*p == quote && (backslashes % 2 == 0)) break; if (*p == '\\') backslashes++; else backslashes = 0; p++; } val_len = (size_t)(p - val_start); if (p < end && *p == quote) { p++; } else { break; } } else { val_start = p; if (sep) { size_t off = ln_simd_find_char(p, (size_t)(end - p), sep); val_len = (off < (size_t)(end - p)) ? off : (size_t)(end - p); } else { val_len = 0; while (p + val_len < end && !isspace((unsigned char)p[val_len])) val_len++; } p += val_len; } /* Arena-allocate name so it outlives this stack frame */ if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; if (sep) { while (p < end && *p == sep) p++; } else { while (p < end && isspace((unsigned char)*p)) p++; } } if (ctx_name[0]) { vm_pop_field_ctx(vm); } if (UNLIKELY(n_pairs == 0)) { WRITEBACK(); BACKTRACK(); } ip = p; pc++; DISPATCH(); } /*================================================================= * Skipping Opcodes *=================================================================*/ CASE(skip_space) { vm->instr_count++; ip += ln_simd_skip_space(ip, REMAINING()); pc++; DISPATCH(); } CASE(skip_space1) { size_t rem; size_t skipped; vm->instr_count++; rem = REMAINING(); if (UNLIKELY(rem == 0 || !isspace((unsigned char)ip[0]))) { WRITEBACK(); BACKTRACK(); } skipped = 1; while (skipped < rem && isspace((unsigned char)ip[skipped])) skipped++; ip += skipped; pc++; DISPATCH(); } CASE(skip_n) { const ln_instr_t *inst = INST(); uint16_t n; vm->instr_count++; n = inst->aux; if (UNLIKELY(REMAINING() < n)) { WRITEBACK(); BACKTRACK(); } ip += n; pc++; DISPATCH(); } CASE(skip_to) { const ln_instr_t *inst = INST(); char c; size_t rem; size_t pos; vm->instr_count++; c = inst->data.str[0]; rem = REMAINING(); pos = ln_simd_find_char(ip, rem, c); if (UNLIKELY(pos >= rem)) { WRITEBACK(); BACKTRACK(); } ip += pos; pc++; DISPATCH(); } CASE(skip_past) { const ln_instr_t *inst = INST(); char c; size_t rem; size_t pos; vm->instr_count++; c = inst->data.str[0]; rem = REMAINING(); pos = ln_simd_find_char(ip, rem, c); if (UNLIKELY(pos >= rem)) { WRITEBACK(); BACKTRACK(); } ip += pos + 1; pc++; DISPATCH(); } CASE(skip_line) { size_t rem; size_t pos; vm->instr_count++; rem = REMAINING(); pos = ln_simd_find_char(ip, rem, '\n'); ip += (pos < rem) ? pos + 1 : rem; pc++; DISPATCH(); } /*================================================================= * Tag / Rule ID Opcodes *=================================================================*/ CASE(tag) { const ln_instr_t *inst = INST(); vm->instr_count++; if (vm->result && inst->data.str[0]) { ln_fast_add_tag(vm->result, inst->data.str); } pc++; DISPATCH(); } CASE(rule_id) { const ln_instr_t *inst = INST(); vm->instr_count++; if (vm->result && inst->data.str[0]) { ln_fast_set_rule_id(vm->result, inst->data.str); } pc++; DISPATCH(); } CASE(static_field) { const ln_instr_t *inst = INST(); vm->instr_count++; if (vm->result && inst->data.kv.key[0]) { uint16_t klen = inst->aux; size_t vlen = strlen(inst->data.kv.val); ln_fast_add_string_static(vm->result, inst->data.kv.key, klen, inst->data.kv.val, (uint32_t)vlen); } pc++; DISPATCH(); } /*================================================================= * Assertion Opcodes *=================================================================*/ CASE(assert_char) { vm->instr_count++; if (UNLIKELY(REMAINING() < 1 || *ip != INST()->data.str[0])) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } CASE(assert_end) { vm->instr_count++; if (UNLIKELY(REMAINING() > 0)) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } CASE(assert_start) { vm->instr_count++; if (UNLIKELY(ip != vm->input)) { WRITEBACK(); BACKTRACK(); } pc++; DISPATCH(); } /*================================================================= * Special Opcodes *=================================================================*/ CASE(syslog_pri) { size_t rem; const char *p; uint32_t pri; int digits; vm->instr_count++; rem = REMAINING(); if (UNLIKELY(rem < 3 || ip[0] != '<')) { WRITEBACK(); BACKTRACK(); } p = ip + 1; pri = 0; digits = 0; while (p < input_end && *p >= '0' && *p <= '9' && digits < 3) { pri = pri * 10 + (*p - '0'); p++; digits++; } if (UNLIKELY(p >= input_end || *p != '>' || digits == 0 || pri > 191)) { WRITEBACK(); BACKTRACK(); } p++; vm->ip = ip; vm_add_int_field(vm, "facility", pri / 8); vm_add_int_field(vm, "severity", pri % 8); ip = p; pc++; DISPATCH(); } CASE(syslog_ts) { /* OP_SYSLOG_TS: parse syslog timestamp — delegates to SIMD */ const ln_instr_t *inst = INST(); ln_span_t span; const char *name; vm->instr_count++; if (UNLIKELY(ln_simd_timestamp(ip, REMAINING(), &span, NULL) != LN_SIMD_OK)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; /* Store as "timestamp" field if no name specified */ name = inst->data.str[0] ? inst->data.str : "timestamp"; vm_add_string_field(vm, name, span.start, span.len); ip += span.consumed; pc++; DISPATCH(); } CASE(cef_hdr) { /* * Parse CEF header: CEF:0|vendor|product|version|sigID|name|severity|ext * 6 pipe-delimited fields with escape handling, then key=value extensions. */ const ln_instr_t *inst = INST(); const char *name; const char *p; int hdr_idx; static const char *cef_hdr_names[6] = { "DeviceVendor", "DeviceProduct", "DeviceVersion", "SignatureID", "Name", "Severity" }; vm->instr_count++; vm->ip = ip; vm->pc = pc; /* Must start with "CEF:0|" */ if (UNLIKELY(REMAINING() < 6 || memcmp(ip, "CEF:0|", 6) != 0)) { WRITEBACK(); BACKTRACK(); } name = inst->data.str; p = ip + 6; if (name[0]) { vm_push_field_ctx(vm, name, false); } /* Parse 6 pipe-delimited header fields */ for (hdr_idx = 0; hdr_idx < 6; hdr_idx++) { const char *field_start = p; size_t field_len; while (p < input_end) { if (*p == '\\' && p + 1 < input_end) { p += 2; continue; } if (*p == '|') break; p++; } field_len = (size_t)(p - field_start); vm_add_string_field(vm, cef_hdr_names[hdr_idx], field_start, field_len); if (hdr_idx < 5) { if (UNLIKELY(p >= input_end || *p != '|')) { if (name[0]) vm_pop_field_ctx(vm); WRITEBACK(); BACKTRACK(); } p++; } else { if (p < input_end && *p == '|') p++; } } /* Parse extensions: key=value pairs */ if (p < input_end) { const char *ext_ctx = "Extensions"; int n_ext = 0; vm_push_field_ctx(vm, ext_ctx, false); while (p < input_end) { const char *key_start; size_t eq_off; size_t key_len; const char *val_start; size_t val_len; const char *next_eq; const char *scan; char *arena_key; while (p < input_end && *p == ' ') p++; if (p >= input_end) break; key_start = p; eq_off = ln_simd_find_char(p, (size_t)(input_end - p), '='); if (eq_off >= (size_t)(input_end - p)) break; key_len = eq_off; if (key_len == 0) break; p += key_len + 1; val_start = p; next_eq = NULL; scan = p; while (scan < input_end) { size_t off = ln_simd_find_char(scan, (size_t)(input_end - scan), '='); if (off >= (size_t)(input_end - scan)) break; next_eq = scan + off; break; } if (next_eq && next_eq > val_start) { const char *kstart = next_eq; while (kstart > val_start && *(kstart - 1) != ' ') kstart--; if (kstart > val_start) { val_len = (size_t)(kstart - val_start); while (val_len > 0 && val_start[val_len - 1] == ' ') val_len--; p = kstart; } else { val_len = (size_t)(input_end - val_start); p = input_end; } } else { val_len = (size_t)(input_end - val_start); p = input_end; } if (key_len > 63) key_len = 63; arena_key = ln_arena_strndup(vm->arena, key_start, key_len); if (!arena_key) break; vm_add_string_field(vm, arena_key, val_start, val_len); n_ext++; } vm_pop_field_ctx(vm); (void)n_ext; } if (name[0]) { vm_pop_field_ctx(vm); } ip = p; pc++; DISPATCH(); } CASE(v2_iptables) { /* * Parse iptables-format name=value pairs. * Minimum 2 pairs required. Flags (names without '=') * stored as empty string values. */ const ln_instr_t *inst = INST(); const char *ctx_name; const char *p; int n_pairs; vm->instr_count++; vm->ip = ip; vm->pc = pc; ctx_name = inst->data.str; if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } p = ip; n_pairs = 0; while (p < input_end) { const char *name_start; size_t eq_off; size_t sp_off; size_t name_len; const char *val_start; size_t val_len; char *arena_name; /* Skip leading spaces */ while (p < input_end && *p == ' ') p++; if (p >= input_end) break; name_start = p; /* Scan for '=' */ eq_off = ln_simd_find_char(p, (size_t)(input_end - p), '='); /* Check if space comes before '=' (flag) */ sp_off = ln_simd_find_char(p, (size_t)(input_end - p), ' '); if (sp_off < eq_off) { /* Flag: name without value */ name_len = sp_off; if (name_len == 0) break; if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, "", 0); n_pairs++; p += sp_off; continue; } if (eq_off >= (size_t)(input_end - p)) { /* No '=' — rest is a flag */ name_len = (size_t)(input_end - p); if (name_len == 0) break; if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, "", 0); n_pairs++; p = input_end; break; } name_len = eq_off; if (name_len == 0) break; /* Validate name: alphanumeric + underscore */ { int valid = 1; size_t k; for (k = 0; k < name_len; k++) { unsigned char c = (unsigned char)p[k]; if (!(isalnum(c) || c == '_')) { valid = 0; break; } } if (!valid) break; } p += name_len + 1; /* skip name + '=' */ /* Value: everything until space or end */ val_start = p; { size_t off; off = ln_simd_find_char(p, (size_t)(input_end - p), ' '); val_len = (off < (size_t)(input_end - p)) ? off : (size_t)(input_end - p); } p += val_len; if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; } if (ctx_name[0]) { vm_pop_field_ctx(vm); } if (UNLIKELY(n_pairs < 2)) { WRITEBACK(); BACKTRACK(); } ip = p; pc++; DISPATCH(); } CASE(cee_syslog) { /* * Parse CEE-syslog format: "@cee:" prefix + JSON object. * The JSON body is stored as the field value. */ const ln_instr_t *inst = INST(); const char *fname; const char *p; size_t rem; size_t json_len; vm->instr_count++; fname = inst->data.str; rem = REMAINING(); /* Must start with "@cee:" (5 chars) + at least '{' */ if (UNLIKELY(rem < 6 || memcmp(ip, "@cee:", 5) != 0)) { WRITEBACK(); BACKTRACK(); } p = ip + 5; rem -= 5; /* Skip optional whitespace after ':' */ while (rem > 0 && (*p == ' ' || *p == '\t')) { p++; rem--; } /* Must have '{' */ if (UNLIKELY(rem == 0 || *p != '{')) { WRITEBACK(); BACKTRACK(); } /* Parse JSON body */ if (UNLIKELY(parse_json(p, rem, &json_len) != 0)) { WRITEBACK(); BACKTRACK(); } /* JSON must consume rest of input */ if (UNLIKELY(json_len != rem)) { WRITEBACK(); BACKTRACK(); } vm->ip = ip; vm_add_string_field(vm, fname, p, json_len); ip = p + json_len; pc++; DISPATCH(); } CASE(checkpoint_lea) { /* * Parse Checkpoint LEA format: name: value; name: value; ... * Minimum 1 field required. */ const ln_instr_t *inst = INST(); const char *ctx_name; char term; const char *p; int n_pairs; vm->instr_count++; vm->ip = ip; vm->pc = pc; ctx_name = inst->data.char_to.name; term = (char)inst->data.char_to.delim; if (ctx_name[0]) { vm_push_field_ctx(vm, ctx_name, false); } p = ip; n_pairs = 0; while (p < input_end) { const char *name_start; size_t colon_off; size_t name_len; const char *val_start; size_t val_len; size_t semi_off; char *arena_name; /* Skip leading spaces */ while (p < input_end && *p == ' ') p++; if (p >= input_end) break; /* Check for early terminator */ if (term && *p == term) break; name_start = p; /* Scan for ':' */ colon_off = ln_simd_find_char(p, (size_t)(input_end - p), ':'); if (colon_off >= (size_t)(input_end - p)) break; name_len = colon_off; if (name_len == 0) break; p += name_len + 1; /* skip name + ':' */ /* Skip spaces after ':' */ while (p < input_end && *p == ' ') p++; /* Scan for ';' */ val_start = p; semi_off = ln_simd_find_char(p, (size_t)(input_end - p), ';'); if (semi_off >= (size_t)(input_end - p)) { val_len = (size_t)(input_end - p); p = input_end; } else { val_len = semi_off; p += val_len + 1; /* skip value + ';' */ } if (name_len > 63) name_len = 63; arena_name = ln_arena_strndup(vm->arena, name_start, name_len); if (!arena_name) break; vm_add_string_field(vm, arena_name, val_start, val_len); n_pairs++; } if (ctx_name[0]) { vm_pop_field_ctx(vm); } if (UNLIKELY(n_pairs < 1)) { WRITEBACK(); BACKTRACK(); } ip = p; pc++; DISPATCH(); } /*================================================================= * Debug / NOP Opcodes *=================================================================*/ CASE(nop) { vm->instr_count++; pc++; DISPATCH(); } CASE(debug) { vm->instr_count++; WRITEBACK(); fprintf(stderr, "[DEBUG] pc=%u ip=%zu ctx_sp=%u\n", pc, (size_t)(ip - vm->input), vm->field_ctx_sp); pc++; DISPATCH(); } CASE(invalid) { snprintf(vm->error_buf, sizeof(vm->error_buf), "unknown opcode 0x%02x at pc=%u", prog->code[pc].op, pc); vm->error = vm->error_buf; WRITEBACK(); return LN_VM_ERROR; } /*================================================================= * Backtrack Handler *=================================================================*/ backtrack: vm->pc = pc; vm->ip = ip; if (UNLIKELY(vm->instr_count > MAX_INSTRUCTIONS)) { vm->error = "instruction limit exceeded"; return LN_VM_LIMIT; } if (!vm_pop_fork(vm)) { return LN_VM_NOMATCH; } /* Restore local variables from vm state (pop_fork wrote them) */ pc = vm->pc; ip = vm->ip; DISPATCH(); #undef REMAINING #undef INST #undef WRITEBACK } #else /* !LN_VM_COMPUTED_GOTO — switch-based fallback */ int ln_vm_continue(ln_vm_t *vm) { if (!vm || !vm->prog) { return LN_VM_ERROR; } const uint64_t MAX_INSTRUCTIONS = 100000000; for (;;) { if (UNLIKELY(vm->instr_count > MAX_INSTRUCTIONS)) { vm->error = "instruction limit exceeded"; return LN_VM_LIMIT; } int rc = vm_exec_instr(vm); if (rc == 0) { return LN_VM_OK; } if (rc < 0) { if (!vm_pop_fork(vm)) { return LN_VM_NOMATCH; } } } } #endif /* LN_VM_COMPUTED_GOTO */ /*============================================================================ * Debug *============================================================================*/ void ln_vm_dump(const ln_vm_t *vm, FILE *fp) { if (!vm || !fp) return; fprintf(fp, "=== VM State ===\n"); fprintf(fp, "Program: %s\n", vm->prog ? (vm->prog->name ? vm->prog->name : "(unnamed)") : "(none)"); fprintf(fp, "PC: %u\n", vm->pc); fprintf(fp, "IP offset: %zu\n", vm->ip ? (size_t)(vm->ip - vm->input) : 0); fprintf(fp, "Remaining: %zu\n", ln_vm_remaining(vm)); fprintf(fp, "Fork SP: %u\n", vm->fork_sp); fprintf(fp, "Call SP: %u\n", vm->call_sp); fprintf(fp, "Field Ctx: %u\n", vm->field_ctx_sp); fprintf(fp, "Instr cnt: %lu\n", (unsigned long)vm->instr_count); fprintf(fp, "Backtracks: %lu\n", (unsigned long)vm->backtrack_count); fprintf(fp, "Matched: %s\n", vm->matched_rule ? vm->matched_rule : "(none)"); fprintf(fp, "Error: %s\n", vm->error ? vm->error : "(none)"); if (vm->field_ctx_sp > 0) { fprintf(fp, "Field context stack:\n"); for (uint32_t i = 0; i < vm->field_ctx_sp; i++) { fprintf(fp, " [%u] \"%s\" (nested=%d)\n", i, vm->field_ctx[i].name ? vm->field_ctx[i].name : "(null)", vm->field_ctx[i].is_nested); } } if (vm->ip && vm->input_end > vm->ip) { size_t show = (size_t)(vm->input_end - vm->ip); if (show > 40) show = 40; fprintf(fp, "At input: \"%.*s%s\"\n", (int)show, vm->ip, show < (size_t)(vm->input_end - vm->ip) ? "..." : ""); } fprintf(fp, "\n"); } void ln_vm_set_trace(ln_vm_t *vm, bool enable) { (void)vm; (void)enable; #ifdef LN_VM_TRACE g_trace_enabled = enable; #endif } liblognorm-2.1.0/src/turbo_vm.h000066400000000000000000000141171520037563000164600ustar00rootroot00000000000000/** * @file turbo_vm.h * @brief Virtual machine for executing TurboVM bytecode *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_VM_H_INCLUDED #define LIBLOGNORM_TURBO_VM_H_INCLUDED #include "turbo_opcode.h" #include "turbo_result_fast.h" #include "turbo_arena.h" #include #include #include #ifdef __cplusplus extern "C" { #endif /*============================================================================ * Constants *============================================================================*/ /** Maximum fork stack depth (backtracking limit) */ #define LN_VM_MAX_FORKS 64 /** Maximum call stack depth (subroutine nesting) */ #define LN_VM_MAX_CALLS 16 /** Maximum field context depth (for ".." substitution) */ #define LN_VM_MAX_FIELD_CTX 16 /** VM return codes */ #define LN_VM_OK 0 /**< Match successful */ #define LN_VM_NOMATCH -1 /**< No rule matched */ #define LN_VM_ERROR -2 /**< Execution error */ #define LN_VM_LIMIT -3 /**< Resource limit exceeded */ /*============================================================================ * Program Structure *============================================================================*/ typedef struct { const ln_instr_t *code; /**< Instruction array */ uint32_t code_len; /**< Number of instructions */ const char *name; /**< Program name (optional) */ uint32_t flags; /**< Program flags */ } ln_program_t; /*============================================================================ * Field Context (for ".." substitution) *============================================================================*/ /** * @brief Field name context for custom type inheritance. * * When entering %net_src_ip:@netscaler_ip%, push "net_src_ip". * If the custom type has %..:ipv4%, resolve ".." to "net_src_ip". */ typedef struct { const char *name; /**< Parent field name */ uint16_t name_len; /**< Name length */ uint8_t is_nested; /**< Create nested object for this context */ uint8_t _pad; } ln_field_ctx_t; /*============================================================================ * Fork State *============================================================================*/ typedef struct { uint32_t pc; /**< Saved program counter */ const char *ip; /**< Saved input position */ uint8_t n_fields; /**< Saved field count */ uint8_t n_tags; /**< Saved tag count */ uint8_t call_sp; /**< Saved call stack pointer */ uint8_t field_ctx_sp; /**< Saved field context stack pointer */ } ln_fork_t; /*============================================================================ * VM State *============================================================================*/ typedef struct { /* Program */ const ln_program_t *prog; /**< Current program */ /* Input */ const char *input; /**< Input buffer (log message) */ const char *input_end; /**< End of input */ /* Execution state */ uint32_t pc; /**< Program counter (instruction index) */ const char *ip; /**< Input pointer */ /* Backtracking */ ln_fork_t forks[LN_VM_MAX_FORKS]; uint32_t fork_sp; /**< Fork stack pointer */ /* Call stack (for subroutines) */ uint32_t calls[LN_VM_MAX_CALLS]; uint32_t call_sp; /**< Call stack pointer */ /* Field context stack (for ".." substitution) */ ln_field_ctx_t field_ctx[LN_VM_MAX_FIELD_CTX]; uint32_t field_ctx_sp; /**< Field context stack pointer */ /* Output - FAST RESULT */ ln_fast_result_t *result; /**< Parse result (optimized) */ ln_arena_t *arena; /**< Arena for overflow storage */ /* Statistics */ uint64_t instr_count; /**< Instructions executed */ uint64_t backtrack_count;/**< Backtrack operations */ /* Matched rule info */ const char *matched_rule; /**< Rule ID if matched */ /* Error info */ const char *error; /**< Error message if failed */ char error_buf[64]; /**< Per-instance error buffer (thread-safe) */ } ln_vm_t; /*============================================================================ * VM Lifecycle *============================================================================*/ int ln_vm_init(ln_vm_t *vm, ln_arena_t *arena); void ln_vm_reset(ln_vm_t *vm); /*============================================================================ * Execution *============================================================================*/ int ln_vm_exec(ln_vm_t *vm, const ln_program_t *prog, const char *input, size_t len, ln_fast_result_t *result); int ln_vm_continue(ln_vm_t *vm); /*============================================================================ * Debug *============================================================================*/ void ln_vm_dump(const ln_vm_t *vm, FILE *fp); void ln_program_disasm(const ln_program_t *prog, FILE *fp); void ln_vm_set_trace(ln_vm_t *vm, bool enable); /*============================================================================ * Inline Helpers *============================================================================*/ static inline ln_program_t ln_program_make(const ln_instr_t *code, uint32_t code_len, const char *name) { ln_program_t p = {0}; p.code = code; p.code_len = code_len; p.name = name; return p; } static inline bool ln_vm_matched(const ln_vm_t *vm) { return vm && vm->matched_rule != NULL; } static inline size_t ln_vm_remaining(const ln_vm_t *vm) { if (!vm || vm->ip >= vm->input_end) return 0; return (size_t)(vm->input_end - vm->ip); } static inline size_t ln_vm_consumed(const ln_vm_t *vm) { if (!vm || !vm->input) return 0; return (size_t)(vm->ip - vm->input); } static inline const char * ln_vm_get_field_context(const ln_vm_t *vm) { if (!vm || vm->field_ctx_sp == 0) return NULL; return vm->field_ctx[vm->field_ctx_sp - 1].name; } #ifdef __cplusplus } #endif #endif /* LIBLOGNORM_TURBO_VM_H_INCLUDED */ liblognorm-2.1.0/src/turbo_vm_opt.h000066400000000000000000000174631520037563000173510ustar00rootroot00000000000000/** * @file turbo_vm_opt.h * @brief Optimized VM dispatch using computed goto *//* * Copyright 2024-2026 by Advens and Jeremie Jourdin. * Copyright 2015-2026 by Rainer Gerhards and Adiscon GmbH. * * Released under ASL 2.0. */ #ifndef LIBLOGNORM_TURBO_VM_OPT_H_INCLUDED #define LIBLOGNORM_TURBO_VM_OPT_H_INCLUDED #include "turbo_opcode.h" /*============================================================================ * Computed Goto Dispatch (GCC/Clang Extension) *============================================================================ * * Traditional switch dispatch: * switch (op) { // 1. Load op * case OP_LITERAL: ... // 2. Bounds check * case OP_MATCH: ... // 3. Jump through compiler table * } // 4. Execute handler * // 5. Jump back to switch (break) * * Computed goto dispatch: * void *dispatch[] = { [OP_LITERAL] = &&op_literal, ... }; * goto *dispatch[op]; // 1. Load op * op_literal: ... NEXT(); // 2. Index into table (no bounds check) * op_match: ... NEXT(); // 3. Direct jump to handler * // 4. Execute handler * // 5. Direct jump to next handler * * The key win: steps 2+3 collapse into a single indexed load+jump, * and the CPU branch predictor sees a unique indirect branch site * per opcode handler (better prediction than a single switch site). */ #if defined(__GNUC__) || defined(__clang__) #define LN_VM_COMPUTED_GOTO 1 #else #define LN_VM_COMPUTED_GOTO 0 #endif /* * OP_COUNT must cover the full opcode byte range (0x00-0xFF). * Sparse opcodes are fine — unused slots point to op_invalid. * 256 × 8 bytes = 2KB table, fits in L1d cache. */ #define OP_COUNT 256 #if LN_VM_COMPUTED_GOTO /* * DISPATCH_INIT: build the dispatch table as a local variable. * * We use procedural initialization instead of designated initializers * because Apple Clang requires labels to be visible before taking their * address in static initializers. A local array with procedural init * works correctly on both GCC and Clang. * * The compiler optimizes this into a single memcpy from rodata in * practice (verified with -O2 on both GCC 13 and Clang 17). * * Cost: ~2KB stack + memset + ~44 stores = negligible vs. per-message * VM execution time (50-500 instructions per message). */ #define DISPATCH_INIT() \ void *dispatch_table[OP_COUNT]; \ do { \ /* Fill all slots with op_invalid first */ \ for (int _i = 0; _i < OP_COUNT; _i++) \ dispatch_table[_i] = &&op_invalid; \ /* Control (0x00-0x06) */ \ dispatch_table[OP_HALT] = &&op_halt; \ dispatch_table[OP_MATCH] = &&op_match; \ dispatch_table[OP_JUMP] = &&op_jump; \ dispatch_table[OP_FORK] = &&op_fork; \ dispatch_table[OP_FAIL] = &&op_fail; \ dispatch_table[OP_CALL] = &&op_call; \ dispatch_table[OP_RET] = &&op_ret; \ /* Literals (0x10-0x15) */ \ dispatch_table[OP_LITERAL] = &&op_literal; \ dispatch_table[OP_LITERAL_EXT] = &&op_literal_ext; \ dispatch_table[OP_LITERAL_CI] = &&op_literal_ci; \ dispatch_table[OP_CHAR] = &&op_char; \ dispatch_table[OP_ANY] = &&op_any; \ dispatch_table[OP_CHARSET] = &&op_charset; \ /* Fields (0x20-0x2F) */ \ dispatch_table[OP_FIELD_WORD] = &&op_field_word; \ dispatch_table[OP_FIELD_INT] = &&op_field_int; \ dispatch_table[OP_FIELD_UINT] = &&op_field_uint; \ dispatch_table[OP_FIELD_FLOAT] = &&op_field_float; \ dispatch_table[OP_FIELD_IPV4] = &&op_field_ipv4; \ dispatch_table[OP_FIELD_IPV6] = &&op_field_ipv6; \ dispatch_table[OP_FIELD_HEX] = &&op_field_hex; \ dispatch_table[OP_FIELD_QUOTED] = &&op_field_quoted; \ dispatch_table[OP_FIELD_CHAR_TO] = &&op_field_char_to; \ dispatch_table[OP_FIELD_STR_TO] = &&op_field_str_to; \ dispatch_table[OP_FIELD_REST] = &&op_field_rest; \ dispatch_table[OP_FIELD_JSON] = &&op_field_json; \ dispatch_table[OP_FIELD_MAC] = &&op_field_mac; \ dispatch_table[OP_FIELD_DATE] = &&op_field_date; \ dispatch_table[OP_FIELD_REGEX] = &&op_field_regex; \ dispatch_table[OP_FIELD_NAME_VALUE]= &&op_field_name_value; \ /* Skipping (0x40-0x45) */ \ dispatch_table[OP_SKIP_SPACE] = &&op_skip_space; \ dispatch_table[OP_SKIP_SPACE1] = &&op_skip_space1; \ dispatch_table[OP_SKIP_N] = &&op_skip_n; \ dispatch_table[OP_SKIP_TO] = &&op_skip_to; \ dispatch_table[OP_SKIP_PAST] = &&op_skip_past; \ dispatch_table[OP_SKIP_LINE] = &&op_skip_line; \ /* Tags (0x50-0x52) */ \ dispatch_table[OP_TAG] = &&op_tag; \ dispatch_table[OP_RULE_ID] = &&op_rule_id; \ dispatch_table[OP_STATIC_FIELD] = &&op_static_field; \ /* Field Context (0x58-0x5B) */ \ dispatch_table[OP_CTX_PUSH] = &&op_ctx_push; \ dispatch_table[OP_CTX_POP] = &&op_ctx_pop; \ dispatch_table[OP_CTX_NEST] = &&op_ctx_nest; \ dispatch_table[OP_CTX_UNNEST] = &&op_ctx_unnest; \ /* Assertions (0x60-0x62) */ \ dispatch_table[OP_ASSERT_CHAR] = &&op_assert_char; \ dispatch_table[OP_ASSERT_END] = &&op_assert_end; \ dispatch_table[OP_ASSERT_START] = &&op_assert_start; \ /* Special (0x70-0x72) */ \ dispatch_table[OP_SYSLOG_PRI] = &&op_syslog_pri; \ dispatch_table[OP_SYSLOG_TS] = &&op_syslog_ts; \ dispatch_table[OP_CEF_HDR] = &&op_cef_hdr; \ dispatch_table[OP_V2_IPTABLES] = &&op_v2_iptables; \ dispatch_table[OP_CEE_SYSLOG] = &&op_cee_syslog; \ dispatch_table[OP_CHECKPOINT_LEA] = &&op_checkpoint_lea; \ /* Debug (0xF0-0xFF) */ \ dispatch_table[OP_NOP] = &&op_nop; \ dispatch_table[OP_DEBUG] = &&op_debug; \ dispatch_table[OP_INVALID] = &&op_invalid; \ } while (0) /* * DISPATCH: jump to the handler for the current instruction at pc. * The prefetch of the NEXT instruction's cache line overlaps with * the current handler's execution. */ #define DISPATCH() \ do { \ PREFETCH(&prog->code[pc + 1]); \ goto *dispatch_table[prog->code[pc].op]; \ } while (0) #define DISPATCH_NEXT() do { pc++; DISPATCH(); } while(0) #define CASE(op) op_##op: #define NEXT() DISPATCH_NEXT() #define BACKTRACK() goto backtrack #else /* !LN_VM_COMPUTED_GOTO (MSVC fallback) */ #define DISPATCH_INIT() /* nothing */ #define DISPATCH() continue #define DISPATCH_NEXT() do { pc++; continue; } while(0) #define CASE(op) case op: #define NEXT() break #define BACKTRACK() goto backtrack #endif /* LN_VM_COMPUTED_GOTO */ /*============================================================================ * Branch Prediction Hints *============================================================================*/ #if defined(__GNUC__) || defined(__clang__) #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) #define PREFETCH(addr) __builtin_prefetch((addr), 0, 3) /* read, high locality */ #define PREFETCH_W(addr) __builtin_prefetch((addr), 1, 3) /* write, high locality */ #else #define LIKELY(x) (x) #define UNLIKELY(x) (x) #define PREFETCH(addr) /* nothing */ #define PREFETCH_W(addr) /* nothing */ #endif /*============================================================================ * Hot/Cold Function Attributes *============================================================================*/ #if defined(__GNUC__) || defined(__clang__) #define HOT_FUNC __attribute__((hot)) #define COLD_FUNC __attribute__((cold)) #define ALWAYS_INLINE __attribute__((always_inline)) inline #define NOINLINE __attribute__((noinline)) #else #define HOT_FUNC #define COLD_FUNC #define ALWAYS_INLINE inline #define NOINLINE #endif #endif /* LIBLOGNORM_TURBO_VM_OPT_H_INCLUDED */ liblognorm-2.1.0/src/v1_liblognorm.c000066400000000000000000000051061520037563000173660ustar00rootroot00000000000000/* This file implements the liblognorm API. * See header file for descriptions. * * liblognorm - a fast samples-based log normalization library * Copyright 2013-2015 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include "liblognorm.h" #include "v1_liblognorm.h" #include "v1_ptree.h" #include "lognorm.h" #include "annot.h" #include "v1_samp.h" #define ERR_ABORT {r = 1; goto done; } #define CHECK_CTX \ if(ctx->objID != LN_ObjID_CTX) { \ r = -1; \ goto done; \ } ln_ctx ln_v1_inherittedCtx(ln_ctx parent) { ln_ctx child = ln_initCtx(); if (child != NULL) { child->opts = parent->opts; child->dbgCB = parent->dbgCB; child->dbgCookie = parent->dbgCookie; child->version = parent->version; child->ptree = ln_newPTree(child, NULL); } return child; } int ln_v1_loadSample(ln_ctx ctx, const char *buf) { struct ln_v1_samp *samp; // Something bad happened - no new sample if ((samp = ln_v1_processSamp(ctx, buf, strlen(buf))) == NULL) { return 1; } ln_v1_sampFree(ctx, samp); return 0; } int ln_v1_loadSamples(ln_ctx ctx, const char *file) { int r = 0; FILE *repo; struct ln_v1_samp *samp; int isEof = 0; char *fn_to_free = NULL; CHECK_CTX; ctx->conf_file = fn_to_free = strdup(file); ctx->conf_ln_nbr = 0; if(file == NULL) ERR_ABORT; if((repo = fopen(file, "r")) == NULL) { ln_errprintf(ctx, errno, "cannot open file %s", file); ERR_ABORT; } while(!isEof) { if((samp = ln_v1_sampRead(ctx, repo, &isEof)) == NULL) { /* TODO: what exactly to do? */ } else { ln_v1_sampFree(ctx, samp); } } fclose(repo); ctx->conf_file = NULL; done: free((void*)fn_to_free); return r; } liblognorm-2.1.0/src/v1_liblognorm.h000066400000000000000000000131041520037563000173700ustar00rootroot00000000000000/** * @file liblognorm.h * @brief The public liblognorm API. * * Functions other than those defined here MUST not be called by * a liblognorm "user" application. * * This file is meant to be included by applications using liblognorm. * For lognorm library files themselves, include "lognorm.h". *//** * @mainpage * Liblognorm is an easy to use and fast samples-based log normalization * library. * * It can be passed a stream of arbitrary log messages, one at a time, and for * each message it will output well-defined name-value pairs and a set of * tags describing the message. * * For further details, see it's initial announcement available at * https://rainer.gerhards.net/2010/10/introducing-liblognorm.html * * The public interface of this library is describe in liblognorm.h. * * Liblognorm fully supports Unicode. Like most Linux tools, it operates * on UTF-8 natively, called "passive mode". This was decided because we * so can keep the size of data structures small while still supporting * all of the world's languages (actually more than when we did UCS-2). * * At the technical level, we can handle UTF-8 multibyte sequences transparently. * Liblognorm needs to look at a few US-ASCII characters to do the * sample base parsing (things to indicate fields), so this is no * issue. Inside the parse tree, a multibyte sequence can simple be processed * as if it were a sequence of different characters that make up a their * own symbol each. In fact, this even allows for somewhat greater parsing * speed. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010-2013 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef V1_LIBLOGNORM_H_INCLUDED #define V1_LIBLOGNORM_H_INCLUDED #include "liblognorm.h" /** * Inherit control attributes from a library context. * * This does not copy the parse-tree, but does copy * behaviour-controlling attributes such as enableRegex. * * Just as with ln_initCtx, ln_exitCtx() must be called on a library * context that is no longer needed. * * @return new library context or NULL if an error occurred */ ln_ctx ln_v1_inherittedCtx(ln_ctx parent); /** * Reads a sample stored in buffer buf and creates a new ln_samp object * out of it. * * @note * It is the caller's responsibility to delete the newly * created ln_samp object if it is no longer needed. * * @param[ctx] ctx current library context * @param[buf] NULL terminated cstr containing the contents of the sample * @return Returns zero on success, something else otherwise. */ int ln_v1_loadSample(ln_ctx ctx, const char *buf); /** * Load a (log) sample file. * * The file must contain log samples in syntactically correct format. Samples are added * to set already loaded in the current context. If there is a sample with duplicate * semantics, this sample will be ignored. Most importantly, this can \b not be used * to change tag assignments for a given sample. * * @param[in] ctx The library context to apply callback to. * @param[in] file Name of file to be loaded. * * @return Returns zero on success, something else otherwise. */ int ln_v1_loadSamples(ln_ctx ctx, const char *file); /** * Normalize a message. * * This is the main library entry point. It is called with a message * to normalize and will return a normalized in-memory representation * of it. * * If an error occurs, the function returns -1. In that case, an * in-memory event representation has been generated if event is * non-NULL. In that case, the event contains further error details in * normalized form. * * @note * This function works on byte-counted strings and as such is able to * process NUL bytes if they occur inside the message. On the other hand, * this means the the correct messages size, \b excluding the NUL byte, * must be provided. * * @param[in] ctx The library context to use. * @param[in] str The message string (see note above). * @param[in] strLen The length of the message in bytes. * @param[out] json_p A new event record or NULL if an error occurred. Must be * destructed if no longer needed. * * @return Returns zero on success, something else otherwise. */ int ln_v1_normalize(ln_ctx ctx, const char *str, size_t strLen, struct json_object **json_p); /** * create a single sample. */ struct ln_v1_samp* ln_v1_sampCreate(ln_ctx __attribute__((unused)) ctx); /* here we add some stuff from the compatibility layer. A separate include * would be cleaner, but would potentially require changes all over the * place. So doing it here is better. The respective replacement * functions should usually be found under ./compat -- rgerhards, 2015-05-20 */ #endif /* #ifndef LOGNORM_H_INCLUDED */ liblognorm-2.1.0/src/v1_parser.c000066400000000000000000002475561520037563000165370ustar00rootroot00000000000000/* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include #include #include "v1_liblognorm.h" #include "internal.h" #include "lognorm.h" #include "v1_parser.h" #include "v1_samp.h" #ifdef FEATURE_REGEXP #define PCRE2_CODE_UNIT_WIDTH 8 #include #include #endif /* some helpers */ static inline int hParseInt(const unsigned char **buf, size_t *lenBuf) { const unsigned char *p = *buf; size_t len = *lenBuf; int i = 0; while(len > 0 && isdigit(*p)) { i = i * 10 + *p - '0'; ++p; --len; } *buf = p; *lenBuf = len; return i; } /* parsers for the primitive types * * All parsers receive * * @param[in] str the to-be-parsed string * @param[in] strLen length of the to-be-parsed string * @param[in] offs an offset into the string * @param[in] node fieldlist with additional data; for simple * parsers, this sets variable "ed", which just is * string data. * @param[out] parsed bytes * @param[out] value ptr to json object containing parsed data * (can be unused, but if used *value MUST be NULL on entry) * * They will try to parse out "their" object from the string. If they * succeed, they: * * return 0 on success and LN_WRONGPARSER if this parser could * not successfully parse (but all went well otherwise) and something * else in case of an error. */ #define PARSER(ParserName) \ int ln_parse##ParserName(const char *const str, const size_t strLen, \ size_t *const offs, \ __attribute__((unused)) const ln_fieldList_t *node, \ size_t *parsed, \ __attribute__((unused)) struct json_object **value) \ { \ int r = LN_WRONGPARSER; \ __attribute__((unused)) es_str_t *ed = node->data; \ *parsed = 0; #define FAILParser \ goto parserdone; /* suppress warnings */ \ parserdone: \ r = 0; \ goto done; /* suppress warnings */ \ done: #define ENDFailParser \ return r; \ } /** * Utilities to allow constructors of complex parser's to * easily process field-declaration arguments. */ #define FIELD_ARG_SEPERATOR ":" #define MAX_FIELD_ARGS 10 struct pcons_args_s { int argc; char *argv[MAX_FIELD_ARGS]; }; typedef struct pcons_args_s pcons_args_t; static void free_pcons_args(pcons_args_t** dat_p) { pcons_args_t *dat = *dat_p; *dat_p = NULL; if (! dat) { return; } while((--(dat->argc)) >= 0) { if (dat->argv[dat->argc] != NULL) free(dat->argv[dat->argc]); } free(dat); } static pcons_args_t* pcons_args(es_str_t *args, int expected_argc) { pcons_args_t *dat = NULL; char* orig_str = NULL; if ((dat = malloc(sizeof(pcons_args_t))) == NULL) goto fail; dat->argc = 0; if (args != NULL) { orig_str = es_str2cstr(args, NULL); char *str = orig_str; while (dat->argc < MAX_FIELD_ARGS) { int i = dat->argc++; char *next = (dat->argc == expected_argc) ? NULL : strstr(str, FIELD_ARG_SEPERATOR); if (next == NULL) { if ((dat->argv[i] = strdup(str)) == NULL) goto fail; break; } else { if ((dat->argv[i] = strndup(str, next - str)) == NULL) goto fail; next++; } str = next; } } goto done; fail: if (dat != NULL) free_pcons_args(&dat); done: if (orig_str != NULL) free(orig_str); return dat; } static const char* pcons_arg(pcons_args_t *dat, int i, const char* dflt_val) { if (i >= dat->argc) return dflt_val; return dat->argv[i]; } static char* pcons_arg_copy(pcons_args_t *dat, int i, const char* dflt_val) { const char *str = pcons_arg(dat, i, dflt_val); return (str == NULL) ? NULL : strdup(str); } static void pcons_unescape_arg(pcons_args_t *dat, int i) { char *arg = (char*) pcons_arg(dat, i, NULL); es_str_t *str = NULL; if (arg != NULL) { str = es_newStrFromCStr(arg, strlen(arg)); if (str != NULL) { es_unescapeStr(str); free(arg); dat->argv[i] = es_str2cstr(str, NULL); es_deleteStr(str); } } } /** * Parse a TIMESTAMP as specified in RFC5424 (subset of RFC3339). */ PARSER(RFC5424Date) const unsigned char *pszTS; /* variables to temporarily hold time information while we parse */ __attribute__((unused)) int year; int month; int day; int hour; /* 24 hour clock */ int minute; int second; __attribute__((unused)) int secfrac; /* fractional seconds (must be 32 bit!) */ __attribute__((unused)) int secfracPrecision; int OffsetHour; /* UTC offset in hours */ int OffsetMinute; /* UTC offset in minutes */ size_t len; size_t orglen; /* end variables to temporarily hold time information while we parse */ pszTS = (unsigned char*) str + *offs; len = orglen = strLen - *offs; year = hParseInt(&pszTS, &len); /* We take the liberty to accept slightly malformed timestamps e.g. in * the format of 2003-9-1T1:0:0. */ if(len == 0 || *pszTS++ != '-') goto done; --len; month = hParseInt(&pszTS, &len); if(month < 1 || month > 12) goto done; if(len == 0 || *pszTS++ != '-') goto done; --len; day = hParseInt(&pszTS, &len); if(day < 1 || day > 31) goto done; if(len == 0 || *pszTS++ != 'T') goto done; --len; hour = hParseInt(&pszTS, &len); if(hour < 0 || hour > 23) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; minute = hParseInt(&pszTS, &len); if(minute < 0 || minute > 59) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; second = hParseInt(&pszTS, &len); if(second < 0 || second > 60) goto done; /* Now let's see if we have secfrac */ if(len > 0 && *pszTS == '.') { --len; const unsigned char *pszStart = ++pszTS; secfrac = hParseInt(&pszTS, &len); secfracPrecision = (int) (pszTS - pszStart); } else { secfracPrecision = 0; secfrac = 0; } /* check the timezone */ if(len == 0) goto done; if(*pszTS == 'Z') { --len; pszTS++; /* eat Z */ } else if((*pszTS == '+') || (*pszTS == '-')) { --len; pszTS++; OffsetHour = hParseInt(&pszTS, &len); if(OffsetHour < 0 || OffsetHour > 23) goto done; if(len == 0 || *pszTS++ != ':') goto done; --len; OffsetMinute = hParseInt(&pszTS, &len); if(OffsetMinute < 0 || OffsetMinute > 59) goto done; } else { /* there MUST be TZ information */ goto done; } if(len > 0) { if(*pszTS != ' ') /* if it is not a space, it can not be a "good" time */ goto done; } /* we had success, so update parse pointer */ *parsed = orglen - len; r = 0; /* success */ done: return r; } /** * Parse a RFC3164 Date. */ PARSER(RFC3164Date) const unsigned char *p; size_t len, orglen; /* variables to temporarily hold time information while we parse */ __attribute__((unused)) int month; int day; #if 0 /* TODO: why does this still exist? */ int year = 0; /* 0 means no year provided */ #endif int hour; /* 24 hour clock */ int minute; int second; p = (unsigned char*) str + *offs; orglen = len = strLen - *offs; /* If we look at the month (Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec), * we may see the following character sequences occur: * * J(an/u(n/l)), Feb, Ma(r/y), A(pr/ug), Sep, Oct, Nov, Dec * * We will use this for parsing, as it probably is the * fastest way to parse it. */ if(len < 3) goto done; switch(*p++) { case 'j': case 'J': if(*p == 'a' || *p == 'A') { ++p; if(*p == 'n' || *p == 'N') { ++p; month = 1; } else goto done; } else if(*p == 'u' || *p == 'U') { ++p; if(*p == 'n' || *p == 'N') { ++p; month = 6; } else if(*p == 'l' || *p == 'L') { ++p; month = 7; } else goto done; } else goto done; break; case 'f': case 'F': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'b' || *p == 'B') { ++p; month = 2; } else goto done; } else goto done; break; case 'm': case 'M': if(*p == 'a' || *p == 'A') { ++p; if(*p == 'r' || *p == 'R') { ++p; month = 3; } else if(*p == 'y' || *p == 'Y') { ++p; month = 5; } else goto done; } else goto done; break; case 'a': case 'A': if(*p == 'p' || *p == 'P') { ++p; if(*p == 'r' || *p == 'R') { ++p; month = 4; } else goto done; } else if(*p == 'u' || *p == 'U') { ++p; if(*p == 'g' || *p == 'G') { ++p; month = 8; } else goto done; } else goto done; break; case 's': case 'S': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'p' || *p == 'P') { ++p; month = 9; } else goto done; } else goto done; break; case 'o': case 'O': if(*p == 'c' || *p == 'C') { ++p; if(*p == 't' || *p == 'T') { ++p; month = 10; } else goto done; } else goto done; break; case 'n': case 'N': if(*p == 'o' || *p == 'O') { ++p; if(*p == 'v' || *p == 'V') { ++p; month = 11; } else goto done; } else goto done; break; case 'd': case 'D': if(*p == 'e' || *p == 'E') { ++p; if(*p == 'c' || *p == 'C') { ++p; month = 12; } else goto done; } else goto done; break; default: goto done; } len -= 3; /* done month */ if(len == 0 || *p++ != ' ') goto done; --len; /* we accept a slightly malformed timestamp with one-digit days. */ if(*p == ' ') { --len; ++p; } day = hParseInt(&p, &len); if(day < 1 || day > 31) goto done; if(len == 0 || *p++ != ' ') goto done; --len; /* time part */ hour = hParseInt(&p, &len); if(hour > 1970 && hour < 2100) { /* if so, we assume this actually is a year. This is a format found * e.g. in Cisco devices. * year = hour; */ /* re-query the hour, this time it must be valid */ if(len == 0 || *p++ != ' ') goto done; --len; hour = hParseInt(&p, &len); } if(hour < 0 || hour > 23) goto done; if(len == 0 || *p++ != ':') goto done; --len; minute = hParseInt(&p, &len); if(minute < 0 || minute > 59) goto done; if(len == 0 || *p++ != ':') goto done; --len; second = hParseInt(&p, &len); if(second < 0 || second > 60) goto done; /* we provide support for an extra ":" after the date. While this is an * invalid format, it occurs frequently enough (e.g. with Cisco devices) * to permit it as a valid case. -- rgerhards, 2008-09-12 */ if(len > 0 && *p == ':') { ++p; /* just skip past it */ --len; } /* we had success, so update parse pointer */ *parsed = orglen - len; r = 0; /* success */ done: return r; } /** * Parse a Number. * Note that a number is an abstracted concept. We always represent it * as 64 bits (but may later change our mind if performance dictates so). */ PARSER(Number) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; for (i = *offs; i < strLen && isdigit(c[i]); i++); if (i == *offs) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse a Real-number in floating-pt form. */ PARSER(Float) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; int seen_point = 0; i = *offs; if (c[i] == '-') i++; for (; i < strLen; i++) { if (c[i] == '.') { if (seen_point != 0) break; seen_point = 1; } else if (! isdigit(c[i])) { break; } } if (i == *offs) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse a hex Number. * A hex number begins with 0x and contains only hex digits until the terminating * whitespace. Note that if a non-hex character is detected inside the number string, * this is NOT considered to be a number. */ PARSER(HexNumber) const char *c; size_t i = *offs; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; if(c[i] != '0' || c[i+1] != 'x') goto done; for (i += 2 ; i < strLen && isxdigit(c[i]); i++); if (i == *offs || !isspace(c[i])) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse a kernel timestamp. * This is a fixed format, see * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/printk/printk.c?id=refs/tags/v4.0#n1011 * This is the code that generates it: * sprintf(buf, "[%5lu.%06lu] ", (unsigned long)ts, rem_nsec / 1000); * We accept up to 12 digits for ts, everything above that for sure is * no timestamp. */ #define LEN_KERNEL_TIMESTAMP 14 PARSER(KernelTimestamp) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(c[i] != '[' || i+LEN_KERNEL_TIMESTAMP > strLen || !isdigit(c[i+1]) || !isdigit(c[i+2]) || !isdigit(c[i+3]) || !isdigit(c[i+4]) || !isdigit(c[i+5]) ) goto done; i += 6; for(int j = 0 ; j < 7 && i < strLen && isdigit(c[i]) ; ) ++i, ++j; /* just scan */ if(i >= strLen || c[i] != '.') goto done; ++i; /* skip over '.' */ if( i+7 > strLen || !isdigit(c[i+0]) || !isdigit(c[i+1]) || !isdigit(c[i+2]) || !isdigit(c[i+3]) || !isdigit(c[i+4]) || !isdigit(c[i+5]) || c[i+6] != ']' ) goto done; i += 7; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse whitespace. * This parses all whitespace until the first non-whitespace character * is found. This is primarily a tool to skip to the next "word" if * the exact number of whitespace characters (and type of whitespace) * is not known. The current parsing position MUST be on a whitespace, * else the parser does not match. * This parser is also a forward-compatibility tool for the upcoming * slsa (simple log structure analyser) tool. */ PARSER(Whitespace) const char *c; size_t i = *offs; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; if(!isspace(c[i])) goto done; for (i++ ; i < strLen && isspace(c[i]); i++); /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse a word. * A word is a SP-delimited entity. The parser always works, except if * the offset is position on a space upon entry. */ PARSER(Word) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; /* search end of word */ while(i < strLen && c[i] != ' ') i++; if(i == *offs) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse everything up to a specific string. * swisskid, 2015-01-21 */ PARSER(StringTo) const char *c; char *toFind = NULL; size_t i, j, k, m; int chkstr; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); assert(ed != NULL); k = es_strlen(ed) - 1; toFind = es_str2cstr(ed, NULL); c = str; i = *offs; chkstr = 0; /* Total hunt for letter */ while(chkstr == 0 && i < strLen ) { i++; if(c[i] == toFind[0]) { /* Found the first letter, now find the rest of the string */ j = 0; m = i; while(m < strLen && j < k ) { m++; j++; if(c[m] != toFind[j]) break; if (j == k) chkstr = 1; } } } if(i == *offs || i == strLen || c[i] != toFind[0]) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: if(toFind != NULL) free(toFind); return r; } /** * Parse a alphabetic word. * A alpha word is composed of characters for which isalpha returns true. * The parser dones if there is no alpha character at all. */ PARSER(Alpha) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; /* search end of word */ while(i < strLen && isalpha(c[i])) i++; if(i == *offs) { goto done; } /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse everything up to a specific character. * The character must be the only char inside extra data passed to the parser. * It is a program error if strlen(ed) != 1. It is considered a format error if * a) the to-be-parsed buffer is already positioned on the terminator character * b) there is no terminator until the end of the buffer * In those cases, the parsers declares itself as not being successful, in all * other cases a string is extracted. */ PARSER(CharTo) const char *c; unsigned char cTerm; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); assert(es_strlen(ed) == 1); cTerm = *(es_getBufAddr(ed)); c = str; i = *offs; /* search end of word */ while(i < strLen && c[i] != cTerm) i++; if(i == *offs || i == strLen || c[i] != cTerm) goto done; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /** * Parse everything up to a specific character, or up to the end of string. * The character must be the only char inside extra data passed to the parser. * It is a program error if strlen(ed) != 1. * This parser always returns success. * By nature of the parser, it is required that end of string or the separator * follows this field in rule. */ PARSER(CharSeparated) const char *c; unsigned char cTerm; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); assert(es_strlen(ed) == 1); cTerm = *(es_getBufAddr(ed)); c = str; i = *offs; /* search end of word */ while(i < strLen && c[i] != cTerm) i++; /* success, persist */ *parsed = i - *offs; r = 0; /* success */ return r; } /** * Parse yet-to-be-matched portion of string by re-applying * top-level rules again. */ #define DEFAULT_REMAINING_FIELD_NAME "tail" struct recursive_parser_data_s { ln_ctx ctx; char* remaining_field; int free_ctx; }; PARSER(Recursive) assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); struct recursive_parser_data_s* pData = (struct recursive_parser_data_s*) node->parser_data; if (pData != NULL) { int remaining_len = strLen - *offs; const char *remaining_str = str + *offs; json_object *unparsed = NULL; CHKN(*value = json_object_new_object()); ln_normalize(pData->ctx, remaining_str, remaining_len, value); if (json_object_object_get_ex(*value, UNPARSED_DATA_KEY, &unparsed)) { json_object_put(*value); *value = NULL; *parsed = 0; } else if (pData->remaining_field != NULL && json_object_object_get_ex(*value, pData->remaining_field, &unparsed)) { *parsed = strLen - *offs - json_object_get_string_len(unparsed); json_object_object_del(*value, pData->remaining_field); } else { *parsed = strLen - *offs; } } r = 0; /* success */ done: return r; } typedef ln_ctx (ctx_constructor)(ln_ctx, pcons_args_t*, const char*); static void* _recursive_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx, int no_of_args, int remaining_field_arg_idx, int free_ctx, ctx_constructor *fn) { int r = LN_BADCONFIG; char* name = NULL; struct recursive_parser_data_s *pData = NULL; pcons_args_t *args = NULL; CHKN(name = es_str2cstr(node->name, NULL)); CHKN(pData = calloc(1, sizeof(struct recursive_parser_data_s))); pData->free_ctx = free_ctx; pData->remaining_field = NULL; CHKN(args = pcons_args(node->raw_data, no_of_args)); CHKN(pData->ctx = fn(ctx, args, name)); CHKN(pData->remaining_field = pcons_arg_copy(args, remaining_field_arg_idx, DEFAULT_REMAINING_FIELD_NAME)); r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for recursive/descent field name"); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for parser-data for field: %s", name); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (pData->ctx == NULL) ln_dbgprintf(ctx, "recursive/descent normalizer context creation " "doneed for field: %s", name); else if (pData->remaining_field == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for remaining-field name for " "recursive/descent field: %s", name); recursive_parser_data_destructor((void**) &pData); } free(name); free_pcons_args(&args); return pData; } static ln_ctx identity_recursive_parse_ctx_constructor(ln_ctx parent_ctx, __attribute__((unused)) pcons_args_t* args, __attribute__((unused)) const char* field_name) { return parent_ctx; } void* recursive_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { return _recursive_parser_data_constructor(node, ctx, 1, 0, 0, identity_recursive_parse_ctx_constructor); } static ln_ctx child_recursive_parse_ctx_constructor(ln_ctx parent_ctx, pcons_args_t* args, const char* field_name) { int r = LN_BADCONFIG; const char* rb = NULL; ln_ctx ctx = NULL; pcons_unescape_arg(args, 0); CHKN(rb = pcons_arg(args, 0, NULL)); CHKN(ctx = ln_v1_inherittedCtx(parent_ctx)); CHKR(ln_v1_loadSamples(ctx, rb)); done: if (r != 0) { if (rb == NULL) ln_dbgprintf(parent_ctx, "file-name for descent rulebase not provided for field: %s", field_name); else if (ctx == NULL) ln_dbgprintf(parent_ctx, "couldn't allocate memory to create descent-field normalizer " "context for field: %s", field_name); else ln_dbgprintf(parent_ctx, "couldn't load samples into descent context for field: %s", field_name); if (ctx != NULL) ln_exitCtx(ctx); ctx = NULL; } return ctx; } void* descent_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { return _recursive_parser_data_constructor(node, ctx, 2, 1, 1, child_recursive_parse_ctx_constructor); } void recursive_parser_data_destructor(void** dataPtr) { if (*dataPtr != NULL) { struct recursive_parser_data_s *pData = (struct recursive_parser_data_s*) *dataPtr; if (pData->free_ctx && pData->ctx != NULL) { ln_exitCtx(pData->ctx); pData->ctx = NULL; } if (pData->remaining_field != NULL) free(pData->remaining_field); free(pData); *dataPtr = NULL; } }; /** * Parse string tokenized by given char-sequence * The sequence may appear 0 or more times, but zero times means 1 token. * NOTE: its not 0 tokens, but 1 token. * * The token found is parsed according to the field-type provided after * tokenizer char-seq. */ #define DEFAULT_MATCHED_FIELD_NAME "default" struct tokenized_parser_data_s { es_str_t *tok_str; ln_ctx ctx; char *remaining_field; int use_default_field; int free_ctx; }; typedef struct tokenized_parser_data_s tokenized_parser_data_t; PARSER(Tokenized) assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); tokenized_parser_data_t *pData = (tokenized_parser_data_t*) node->parser_data; if (pData != NULL ) { json_object *json_p = NULL; if (pData->use_default_field) CHKN(json_p = json_object_new_object()); json_object *matches = NULL; CHKN(matches = json_object_new_array()); int remaining_len = strLen - *offs; const char *remaining_str = str + *offs; json_object *remaining = NULL; json_object *match = NULL; while (remaining_len > 0) { if (! pData->use_default_field) { json_object_put(json_p); json_p = json_object_new_object(); } /*TODO: handle null condition gracefully*/ ln_normalize(pData->ctx, remaining_str, remaining_len, &json_p); if (remaining) json_object_put(remaining); if (pData->use_default_field && json_object_object_get_ex(json_p, DEFAULT_MATCHED_FIELD_NAME, &match)) { json_object_array_add(matches, json_object_get(match)); } else if (! (pData->use_default_field || json_object_object_get_ex(json_p, UNPARSED_DATA_KEY, &match))) { json_object_array_add(matches, json_object_get(json_p)); } else { if (json_object_array_length(matches) > 0) { remaining_len += es_strlen(pData->tok_str); break; } else { json_object_put(json_p); json_object_put(matches); FAIL(LN_WRONGPARSER); } } if (json_object_object_get_ex(json_p, pData->remaining_field, &remaining)) { remaining_len = json_object_get_string_len(remaining); if (remaining_len > 0) { remaining_str = json_object_get_string(json_object_get(remaining)); json_object_object_del(json_p, pData->remaining_field); if (es_strbufcmp(pData->tok_str, (const unsigned char *)remaining_str, es_strlen(pData->tok_str))) { json_object_put(remaining); break; } else { remaining_str += es_strlen(pData->tok_str); remaining_len -= es_strlen(pData->tok_str); } } } else { remaining_len = 0; break; } if (pData->use_default_field) json_object_object_del(json_p, DEFAULT_MATCHED_FIELD_NAME); } json_object_put(json_p); /* success, persist */ *parsed = (strLen - *offs) - remaining_len; *value = matches; } else { FAIL(LN_BADPARSERSTATE); } r = 0; /* success */ done: return r; } void tokenized_parser_data_destructor(void** dataPtr) { tokenized_parser_data_t *data = (tokenized_parser_data_t*) *dataPtr; if (data->tok_str != NULL) es_deleteStr(data->tok_str); if (data->free_ctx && (data->ctx != NULL)) ln_exitCtx(data->ctx); if (data->remaining_field != NULL) free(data->remaining_field); free(data); *dataPtr = NULL; } static void load_generated_parser_samples(ln_ctx ctx, const char* const field_descr, const int field_descr_len, const char* const suffix, const int length) { static const char* const RULE_PREFIX = "rule=:%"DEFAULT_MATCHED_FIELD_NAME":";/*TODO: extract nice constants*/ static const int RULE_PREFIX_LEN = 15; char *sample_str = NULL; es_str_t *field_decl = es_newStrFromCStr(RULE_PREFIX, RULE_PREFIX_LEN); if (! field_decl) goto free; if (es_addBuf(&field_decl, field_descr, field_descr_len) || es_addBuf(&field_decl, "%", 1) || es_addBuf(&field_decl, suffix, length)) { ln_dbgprintf(ctx, "couldn't prepare field for tokenized field-picking: '%s'", field_descr); goto free; } sample_str = es_str2cstr(field_decl, NULL); if (! sample_str) { ln_dbgprintf(ctx, "couldn't prepare sample-string for: '%s'", field_descr); goto free; } ln_v1_loadSample(ctx, sample_str); free: if (sample_str) free(sample_str); if (field_decl) es_deleteStr(field_decl); } static ln_ctx generate_context_with_field_as_prefix(ln_ctx parent, const char* field_descr, int field_descr_len) { int r = LN_BADCONFIG; const char* remaining_field = "%"DEFAULT_REMAINING_FIELD_NAME":rest%"; ln_ctx ctx = NULL; CHKN(ctx = ln_v1_inherittedCtx(parent)); load_generated_parser_samples(ctx, field_descr, field_descr_len, remaining_field, strlen(remaining_field)); load_generated_parser_samples(ctx, field_descr, field_descr_len, "", 0); r = 0; done: if (r != 0) { ln_exitCtx(ctx); ctx = NULL; } return ctx; } static ln_fieldList_t* parse_tokenized_content_field(ln_ctx ctx, const char* field_descr, size_t field_descr_len) { es_str_t* tmp = NULL; es_str_t* descr = NULL; ln_fieldList_t *node = NULL; int r = 0; CHKN(tmp = es_newStr(80)); CHKN(descr = es_newStr(80)); const char* field_prefix = "%" DEFAULT_MATCHED_FIELD_NAME ":"; CHKR(es_addBuf(&descr, field_prefix, strlen(field_prefix))); CHKR(es_addBuf(&descr, field_descr, field_descr_len)); CHKR(es_addChar(&descr, '%')); es_size_t offset = 0; CHKN(node = ln_v1_parseFieldDescr(ctx, descr, &offset, &tmp, &r)); if (offset != es_strlen(descr)) FAIL(LN_BADPARSERSTATE); done: if (r != 0) { if (node != NULL) ln_deletePTreeNode(node); node = NULL; } if (descr != NULL) es_deleteStr(descr); if (tmp != NULL) es_deleteStr(tmp); return node; } void* tokenized_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { int r = LN_BADCONFIG; char* name = es_str2cstr(node->name, NULL); pcons_args_t *args = NULL; tokenized_parser_data_t *pData = NULL; const char *field_descr = NULL; ln_fieldList_t* field = NULL; const char *tok = NULL; CHKN(args = pcons_args(node->raw_data, 2)); CHKN(pData = calloc(1, sizeof(tokenized_parser_data_t))); pcons_unescape_arg(args, 0); CHKN(tok = pcons_arg(args, 0, NULL)); CHKN(pData->tok_str = es_newStrFromCStr(tok, strlen(tok))); es_unescapeStr(pData->tok_str); CHKN(field_descr = pcons_arg(args, 1, NULL)); const int field_descr_len = strlen(field_descr); pData->free_ctx = 1; CHKN(field = parse_tokenized_content_field(ctx, field_descr, field_descr_len)); if (field->parser == ln_parseRecursive) { pData->use_default_field = 0; struct recursive_parser_data_s *dat = (struct recursive_parser_data_s*) field->parser_data; if (dat != NULL) { CHKN(pData->remaining_field = strdup(dat->remaining_field)); pData->free_ctx = dat->free_ctx; pData->ctx = dat->ctx; dat->free_ctx = 0; } } else { pData->use_default_field = 1; CHKN(pData->ctx = generate_context_with_field_as_prefix(ctx, field_descr, field_descr_len)); } if (pData->remaining_field == NULL) CHKN(pData->remaining_field = strdup(DEFAULT_REMAINING_FIELD_NAME)); r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for tokenized-field name"); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for parser-data for field: %s", name); else if (tok == NULL) ln_dbgprintf(ctx, "token-separator not provided for field: %s", name); else if (pData->tok_str == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for token-separator " "for field: %s", name); else if (field_descr == NULL) ln_dbgprintf(ctx, "field-type not provided for field: %s", name); else if (field == NULL) ln_dbgprintf(ctx, "couldn't resolve single-token field-type for tokenized field: %s", name); else if (pData->ctx == NULL) ln_dbgprintf(ctx, "couldn't initialize normalizer-context for field: %s", name); else if (pData->remaining_field == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for " "remaining-field-name for field: %s", name); if (pData) tokenized_parser_data_destructor((void**) &pData); } if (name != NULL) free(name); if (field != NULL) ln_deletePTreeNode(field); if (args) free_pcons_args(&args); return pData; } #ifdef FEATURE_REGEXP /** * Parse string matched by provided posix extended regex. * * Please note that using regex field in most cases will be * significantly slower than other field-types. */ struct regex_parser_data_s { pcre2_code *re; int consume_group; int return_group; int max_groups; }; PARSER(Regex) assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); PCRE2_SIZE *ovector; pcre2_match_data *match_data = NULL; struct regex_parser_data_s *pData = (struct regex_parser_data_s*) node->parser_data; if (pData != NULL) { match_data = pcre2_match_data_create_from_pattern(pData->re, NULL); if (match_data == NULL) FAIL(LN_NOMEM); int result = pcre2_match( pData->re, /* the compiled pattern */ (PCRE2_SPTR)str, /* the subject string */ (PCRE2_SIZE)strLen, /* the length of the subject */ (PCRE2_SIZE)*offs, /* start at offset 0 in the subject */ 0, /* default options */ match_data, /* block for storing the result */ NULL); /* use default match context */ if (result == 0) result = pData->max_groups; if (result > pData->consume_group) { ovector = pcre2_get_ovector_pointer(match_data); printf("Match succeeded at offset %d\n", (int)ovector[0]); /* please check 'man 3 pcre2api' for cryptic '2 * n' and '2 * n + 1' magic * in a nutshell, within the ovector, the first in each pair of values is set to the * offset of the first code unit of a substring, and the second is set to the * offset of the first code unit after the end of a substring. */ if (ovector[2 * pData->consume_group] == *offs) { *parsed = ovector[2 * pData->consume_group + 1] - ovector[2 * pData->consume_group]; if (pData->consume_group != pData->return_group) { char* val = NULL; if((val = strndup(str + ovector[2 * pData->return_group], ovector[2 * pData->return_group + 1] - ovector[2 * pData->return_group])) == NULL) { FAIL(LN_NOMEM); } *value = json_object_new_string(val); free(val); if (*value == NULL) { FAIL(LN_NOMEM); } } } } } r = 0; /* success */ done: pcre2_match_data_free(match_data); return r; } static const char* regex_parser_configure_consume_and_return_group(pcons_args_t* args, struct regex_parser_data_s *pData) { const char* consume_group_parse_error = "couldn't parse consume-group number"; const char* return_group_parse_error = "couldn't parse return-group number"; char* tmp = NULL; const char* consume_grp_str = NULL; const char* return_grp_str = NULL; if ((consume_grp_str = pcons_arg(args, 1, "0")) == NULL || strlen(consume_grp_str) == 0) return consume_group_parse_error; if ((return_grp_str = pcons_arg(args, 2, consume_grp_str)) == NULL || strlen(return_grp_str) == 0) return return_group_parse_error; errno = 0; pData->consume_group = strtol(consume_grp_str, &tmp, 10); if (errno != 0 || strlen(tmp) != 0) return consume_group_parse_error; pData->return_group = strtol(return_grp_str, &tmp, 10); if (errno != 0 || strlen(tmp) != 0) return return_group_parse_error; return NULL; } void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { int r = LN_BADCONFIG; char* exp = NULL; const char* grp_parse_err = NULL; pcons_args_t* args = NULL; char* name = NULL; struct regex_parser_data_s *pData = NULL; const char *unescaped_exp = NULL; PCRE2_SIZE erroffset = 0; int errcode = 0; CHKN(name = es_str2cstr(node->name, NULL)); if (! ctx->opts & LN_CTXOPT_ALLOW_REGEX) FAIL(LN_BADCONFIG); CHKN(pData = malloc(sizeof(struct regex_parser_data_s))); pData->re = NULL; CHKN(args = pcons_args(node->raw_data, 3)); pData->consume_group = pData->return_group = 0; CHKN(unescaped_exp = pcons_arg(args, 0, NULL)); pcons_unescape_arg(args, 0); CHKN(exp = pcons_arg_copy(args, 0, NULL)); if ((grp_parse_err = regex_parser_configure_consume_and_return_group(args, pData)) != NULL) FAIL(LN_BADCONFIG); CHKN(pData->re = pcre2_compile((PCRE2_SPTR)exp, PCRE2_ZERO_TERMINATED, 0, &errcode, &erroffset, NULL)); pData->max_groups = ((pData->consume_group > pData->return_group) ? pData->consume_group : pData->return_group) + 1; r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory regex-field name"); else if (! ctx->opts & LN_CTXOPT_ALLOW_REGEX) ln_dbgprintf(ctx, "regex support is not enabled for: '%s' " "(please check lognorm context initialization)", name); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for parser-data for field: %s", name); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (unescaped_exp == NULL) ln_dbgprintf(ctx, "regular-expression missing for field: '%s'", name); else if (exp == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for regex-string for field: '%s'", name); else if (grp_parse_err != NULL) ln_dbgprintf(ctx, "%s for: '%s'", grp_parse_err, name); else if (pData->re == NULL) { PCRE2_UCHAR errbuffer[256]; pcre2_get_error_message(errcode, errbuffer, sizeof(errbuffer)); ln_dbgprintf(ctx, "couldn't compile regex(encountered error '%s' at char '%d' in pattern) " "for regex-matched field: '%s'", errbuffer, (int)erroffset, name); } regex_parser_data_destructor((void**)&pData); } if (exp != NULL) free(exp); if (args != NULL) free_pcons_args(&args); if (name != NULL) free(name); return pData; } void regex_parser_data_destructor(void** dataPtr) { if ((*dataPtr) != NULL) { struct regex_parser_data_s *pData = (struct regex_parser_data_s*) *dataPtr; if (pData->re != NULL) pcre2_code_free(pData->re); free(pData); *dataPtr = NULL; } } #endif /** * Parse yet-to-be-matched portion of string by re-applying * top-level rules again. */ typedef enum interpret_type { /* If you change this, be sure to update json_type_to_name() too */ it_b10int, it_b16int, it_floating_pt, it_boolean } interpret_type; struct interpret_parser_data_s { ln_ctx ctx; enum interpret_type intrprt; }; static json_object* interpret_as_int(json_object *value, int base) { if (json_object_is_type(value, json_type_string)) { return json_object_new_int64(strtol(json_object_get_string(value), NULL, base)); } else if (json_object_is_type(value, json_type_int)) { return value; } else { return NULL; } } static json_object* interpret_as_double(json_object *value) { double val = json_object_get_double(value); return json_object_new_double(val); } static json_object* interpret_as_boolean(json_object *value) { json_bool val; if (json_object_is_type(value, json_type_string)) { const char* str = json_object_get_string(value); val = (strcasecmp(str, "false") == 0 || strcasecmp(str, "no") == 0) ? 0 : 1; } else { val = json_object_get_boolean(value); } return json_object_new_boolean(val); } static int reinterpret_value(json_object **value, enum interpret_type to_type) { switch(to_type) { case it_b10int: *value = interpret_as_int(*value, 10); break; case it_b16int: *value = interpret_as_int(*value, 16); break; case it_floating_pt: *value = interpret_as_double(*value); break; case it_boolean: *value = interpret_as_boolean(*value); break; default: return 0; } return 1; } PARSER(Interpret) assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); json_object *unparsed = NULL; json_object *parsed_raw = NULL; struct interpret_parser_data_s* pData = (struct interpret_parser_data_s*) node->parser_data; if (pData != NULL) { int remaining_len = strLen - *offs; const char *remaining_str = str + *offs; CHKN(parsed_raw = json_object_new_object()); ln_normalize(pData->ctx, remaining_str, remaining_len, &parsed_raw); if (json_object_object_get_ex(parsed_raw, UNPARSED_DATA_KEY, NULL)) { *parsed = 0; } else { json_object_object_get_ex(parsed_raw, DEFAULT_MATCHED_FIELD_NAME, value); json_object_object_get_ex(parsed_raw, DEFAULT_REMAINING_FIELD_NAME, &unparsed); if (reinterpret_value(value, pData->intrprt)) { *parsed = strLen - *offs - json_object_get_string_len(unparsed); } } json_object_put(parsed_raw); } r = 0; /* success */ done: return r; } void* interpret_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { int r = LN_BADCONFIG; char* name = NULL; struct interpret_parser_data_s *pData = NULL; pcons_args_t *args = NULL; int bad_interpret = 0; const char* type_str = NULL; const char *field_type = NULL; CHKN(name = es_str2cstr(node->name, NULL)); CHKN(pData = calloc(1, sizeof(struct interpret_parser_data_s))); CHKN(args = pcons_args(node->raw_data, 2)); CHKN(type_str = pcons_arg(args, 0, NULL)); if (strcmp(type_str, "int") == 0 || strcmp(type_str, "base10int") == 0) { pData->intrprt = it_b10int; } else if (strcmp(type_str, "base16int") == 0) { pData->intrprt = it_b16int; } else if (strcmp(type_str, "float") == 0) { pData->intrprt = it_floating_pt; } else if (strcmp(type_str, "bool") == 0) { pData->intrprt = it_boolean; } else { bad_interpret = 1; FAIL(LN_BADCONFIG); } CHKN(field_type = pcons_arg(args, 1, NULL)); CHKN(pData->ctx = generate_context_with_field_as_prefix(ctx, field_type, strlen(field_type))); r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for interpret-field name"); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for parser-data for field: %s", name); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (type_str == NULL) ln_dbgprintf(ctx, "no type provided for interpretation of field: %s", name); else if (bad_interpret != 0) ln_dbgprintf(ctx, "interpretation to unknown type '%s' requested for field: %s", type_str, name); else if (field_type == NULL) ln_dbgprintf(ctx, "field-type to actually match the content not provided for " "field: %s", name); else if (pData->ctx == NULL) ln_dbgprintf(ctx, "couldn't instantiate the normalizer context for matching " "field: %s", name); interpret_parser_data_destructor((void**) &pData); } free(name); free_pcons_args(&args); return pData; } void interpret_parser_data_destructor(void** dataPtr) { if (*dataPtr != NULL) { struct interpret_parser_data_s *pData = (struct interpret_parser_data_s*) *dataPtr; if (pData->ctx != NULL) ln_exitCtx(pData->ctx); free(pData); *dataPtr = NULL; } }; /** * Parse suffixed char-sequence, where suffix is one of many possible suffixes. */ struct suffixed_parser_data_s { int nsuffix; int *suffix_offsets; int *suffix_lengths; char* suffixes_str; ln_ctx ctx; char* value_field_name; char* suffix_field_name; }; PARSER(Suffixed) { assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); json_object *unparsed = NULL; json_object *parsed_raw = NULL; json_object *parsed_value = NULL; json_object *result = NULL; json_object *suffix = NULL; struct suffixed_parser_data_s *pData = (struct suffixed_parser_data_s*) node->parser_data; if (pData != NULL) { int remaining_len = strLen - *offs; const char *remaining_str = str + *offs; int i; CHKN(parsed_raw = json_object_new_object()); ln_normalize(pData->ctx, remaining_str, remaining_len, &parsed_raw); if (json_object_object_get_ex(parsed_raw, UNPARSED_DATA_KEY, NULL)) { *parsed = 0; } else { json_object_object_get_ex(parsed_raw, DEFAULT_MATCHED_FIELD_NAME, &parsed_value); json_object_object_get_ex(parsed_raw, DEFAULT_REMAINING_FIELD_NAME, &unparsed); const char* unparsed_frag = json_object_get_string(unparsed); for(i = 0; i < pData->nsuffix; i++) { const char* possible_suffix = pData->suffixes_str + pData->suffix_offsets[i]; int len = pData->suffix_lengths[i]; if (strncmp(possible_suffix, unparsed_frag, len) == 0) { CHKN(result = json_object_new_object()); CHKN(suffix = json_object_new_string(possible_suffix)); json_object_get(parsed_value); json_object_object_add(result, pData->value_field_name, parsed_value); json_object_object_add(result, pData->suffix_field_name, suffix); *parsed = strLen - *offs - json_object_get_string_len(unparsed) + len; break; } } if (result != NULL) { *value = result; } } } FAILParser if (r != 0) { if (result != NULL) json_object_put(result); } if (parsed_raw != NULL) json_object_put(parsed_raw); } ENDFailParser static struct suffixed_parser_data_s* _suffixed_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx, es_str_t* raw_args, const char* value_field, const char* suffix_field) { int r = LN_BADCONFIG; pcons_args_t* args = NULL; char* name = NULL; struct suffixed_parser_data_s *pData = NULL; const char *escaped_tokenizer = NULL; const char *uncopied_suffixes_str = NULL; const char *tokenizer = NULL; char *suffixes_str = NULL; const char *field_type = NULL; char *tok_saveptr = NULL; char *tok_input = NULL; int i = 0; char *tok = NULL; CHKN(name = es_str2cstr(node->name, NULL)); CHKN(pData = calloc(1, sizeof(struct suffixed_parser_data_s))); if (value_field == NULL) value_field = "value"; if (suffix_field == NULL) suffix_field = "suffix"; pData->value_field_name = strdup(value_field); pData->suffix_field_name = strdup(suffix_field); CHKN(args = pcons_args(raw_args, 3)); CHKN(escaped_tokenizer = pcons_arg(args, 0, NULL)); pcons_unescape_arg(args, 0); CHKN(tokenizer = pcons_arg(args, 0, NULL)); CHKN(uncopied_suffixes_str = pcons_arg(args, 1, NULL)); pcons_unescape_arg(args, 1); CHKN(suffixes_str = pcons_arg_copy(args, 1, NULL)); tok_input = suffixes_str; while (strtok_r(tok_input, tokenizer, &tok_saveptr) != NULL) { tok_input = NULL; pData->nsuffix++; } if (pData->nsuffix == 0) { FAIL(LN_INVLDFDESCR); } CHKN(pData->suffix_offsets = calloc(pData->nsuffix, sizeof(int))); CHKN(pData->suffix_lengths = calloc(pData->nsuffix, sizeof(int))); CHKN(pData->suffixes_str = pcons_arg_copy(args, 1, NULL)); tok_input = pData->suffixes_str; while ((tok = strtok_r(tok_input, tokenizer, &tok_saveptr)) != NULL) { tok_input = NULL; pData->suffix_offsets[i] = tok - pData->suffixes_str; pData->suffix_lengths[i++] = strlen(tok); } CHKN(field_type = pcons_arg(args, 2, NULL)); CHKN(pData->ctx = generate_context_with_field_as_prefix(ctx, field_type, strlen(field_type))); r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory suffixed-field name"); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for parser-data for field: %s", name); else if (pData->value_field_name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for value-field's name for field: %s", name); else if (pData->suffix_field_name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for suffix-field's name for field: %s", name); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (escaped_tokenizer == NULL) ln_dbgprintf(ctx, "suffix token-string missing for field: '%s'", name); else if (tokenizer == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for unescaping token-string for field: '%s'", name); else if (uncopied_suffixes_str == NULL) ln_dbgprintf(ctx, "suffix-list missing for field: '%s'", name); else if (suffixes_str == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for suffix-list for field: '%s'", name); else if (pData->nsuffix == 0) ln_dbgprintf(ctx, "couldn't read suffix-value(s) for field: '%s'", name); else if (pData->suffix_offsets == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for suffix-list element references for field: " "'%s'", name); else if (pData->suffix_lengths == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for suffix-list element lengths for field: '%s'", name); else if (pData->suffixes_str == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for suffix-list for field: '%s'", name); else if (field_type == NULL) ln_dbgprintf(ctx, "field-type declaration missing for field: '%s'", name); else if (pData->ctx == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for normalizer-context for field: '%s'", name); suffixed_parser_data_destructor((void**)&pData); } free_pcons_args(&args); if (suffixes_str != NULL) free(suffixes_str); if (name != NULL) free(name); return pData; } void* suffixed_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { return _suffixed_parser_data_constructor(node, ctx, node->raw_data, NULL, NULL); } void* named_suffixed_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) { int r = LN_BADCONFIG; pcons_args_t* args = NULL; char* name = NULL; const char* value_field_name = NULL; const char* suffix_field_name = NULL; const char* remaining_args = NULL; es_str_t* unnamed_suffix_args = NULL; struct suffixed_parser_data_s* pData = NULL; CHKN(name = es_str2cstr(node->name, NULL)); CHKN(args = pcons_args(node->raw_data, 3)); CHKN(value_field_name = pcons_arg(args, 0, NULL)); CHKN(suffix_field_name = pcons_arg(args, 1, NULL)); CHKN(remaining_args = pcons_arg(args, 2, NULL)); CHKN(unnamed_suffix_args = es_newStrFromCStr(remaining_args, strlen(remaining_args))); CHKN(pData = _suffixed_parser_data_constructor(node, ctx, unnamed_suffix_args, value_field_name, suffix_field_name)); r = 0; done: if (r != 0) { if (name == NULL) ln_dbgprintf(ctx, "couldn't allocate memory named_suffixed-field name"); else if (args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for argument-parsing for field: %s", name); else if (value_field_name == NULL) ln_dbgprintf(ctx, "key-name for value not provided for field: %s", name); else if (suffix_field_name == NULL) ln_dbgprintf(ctx, "key-name for suffix not provided for field: %s", name); else if (unnamed_suffix_args == NULL) ln_dbgprintf(ctx, "couldn't allocate memory for unnamed-suffix-field args for field: %s", name); else if (pData == NULL) ln_dbgprintf(ctx, "couldn't create parser-data for field: %s", name); suffixed_parser_data_destructor((void**)&pData); } if (unnamed_suffix_args != NULL) free(unnamed_suffix_args); if (args != NULL) free_pcons_args(&args); if (name != NULL) free(name); return pData; } void suffixed_parser_data_destructor(void** dataPtr) { if ((*dataPtr) != NULL) { struct suffixed_parser_data_s *pData = (struct suffixed_parser_data_s*) *dataPtr; if (pData->suffixes_str != NULL) free(pData->suffixes_str); if (pData->suffix_offsets != NULL) free(pData->suffix_offsets); if (pData->suffix_lengths != NULL) free(pData->suffix_lengths); if (pData->value_field_name != NULL) free(pData->value_field_name); if (pData->suffix_field_name != NULL) free(pData->suffix_field_name); if (pData->ctx != NULL) ln_exitCtx(pData->ctx); free(pData); *dataPtr = NULL; } } /** * Just get everything till the end of string. */ PARSER(Rest) assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); /* silence the warning about unused variable */ (void)str; /* success, persist */ *parsed = strLen - *offs; r = 0; return r; } /** * Parse a possibly quoted string. In this initial implementation, escaping of the quote * char is not supported. A quoted string is one start starts with a double quote, * has some text (not containing double quotes) and ends with the first double * quote character seen. The extracted string does NOT include the quote characters. * swisskid, 2015-01-21 */ PARSER(OpQuotedString) const char *c; size_t i; char *cstr = NULL; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(i == strLen) goto done; if(c[i] != '"') { while(i < strLen && c[i] != ' ') i++; if(i == *offs) goto done; /* success, persist */ *parsed = i - *offs; /* create JSON value to save quoted string contents */ CHKN(cstr = strndup((char*)c + *offs, *parsed)); } else { ++i; /* search end of string */ while(i < strLen && c[i] != '"') i++; if(i == strLen || c[i] != '"') goto done; /* success, persist */ *parsed = i + 1 - *offs; /* "eat" terminal double quote */ /* create JSON value to save quoted string contents */ CHKN(cstr = strndup((char*)c + *offs + 1, *parsed - 2)); } CHKN(*value = json_object_new_string(cstr)); r = 0; /* success */ done: free(cstr); return r; } /** * Parse a quoted string. In this initial implementation, escaping of the quote * char is not supported. A quoted string is one start starts with a double quote, * has some text (not containing double quotes) and ends with the first double * quote character seen. The extracted string does NOT include the quote characters. * rgerhards, 2011-01-14 */ PARSER(QuotedString) const char *c; size_t i; char *cstr = NULL; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(i + 2 > strLen) goto done; /* needs at least 2 characters */ if(c[i] != '"') goto done; ++i; /* search end of string */ while(i < strLen && c[i] != '"') i++; if(i == strLen || c[i] != '"') goto done; /* success, persist */ *parsed = i + 1 - *offs; /* "eat" terminal double quote */ /* create JSON value to save quoted string contents */ CHKN(cstr = strndup((char*)c + *offs + 1, *parsed - 2)); CHKN(*value = json_object_new_string(cstr)); r = 0; /* success */ done: free(cstr); return r; } /** * Parse an ISO date, that is YYYY-MM-DD (exactly this format). * Note: we do manual loop unrolling -- this is fast AND efficient. * rgerhards, 2011-01-14 */ PARSER(ISODate) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(*offs+10 > strLen) goto done; /* if it is not 10 chars, it can't be an ISO date */ /* year */ if(!isdigit(c[i])) goto done; if(!isdigit(c[i+1])) goto done; if(!isdigit(c[i+2])) goto done; if(!isdigit(c[i+3])) goto done; if(c[i+4] != '-') goto done; /* month */ if(c[i+5] == '0') { if(c[i+6] < '1' || c[i+6] > '9') goto done; } else if(c[i+5] == '1') { if(c[i+6] < '0' || c[i+6] > '2') goto done; } else { goto done; } if(c[i+7] != '-') goto done; /* day */ if(c[i+8] == '0') { if(c[i+9] < '1' || c[i+9] > '9') goto done; } else if(c[i+8] == '1' || c[i+8] == '2') { if(!isdigit(c[i+9])) goto done; } else if(c[i+8] == '3') { if(c[i+9] != '0' && c[i+9] != '1') goto done; } else { goto done; } /* success, persist */ *parsed = 10; r = 0; /* success */ done: return r; } /** * Parse a Cisco interface spec. Sample for such a spec are: * outside:192.168.52.102/50349 * inside:192.168.1.15/56543 (192.168.1.112/54543) * outside:192.168.1.13/50179 (192.168.1.13/50179)(LOCAL\some.user) * outside:192.168.1.25/41850(LOCAL\RG-867G8-DEL88D879BBFFC8) * inside:192.168.1.25/53 (192.168.1.25/53) (some.user) * 192.168.1.15/0(LOCAL\RG-867G8-DEL88D879BBFFC8) * From this, we conclude the format is: * [interface:]ip/port [SP (ip2/port2)] [[SP](username)] * In order to match, this syntax must start on a non-whitespace char * other than colon. */ PARSER(CiscoInterfaceSpec) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(c[i] == ':' || isspace(c[i])) goto done; /* first, check if we have an interface. We do this by trying * to detect if we have an IP. If we have, obviously no interface * is present. Otherwise, we check if we have a valid interface. */ int bHaveInterface = 0; size_t idxInterface = 0; size_t lenInterface = 0; int bHaveIP = 0; size_t lenIP; size_t idxIP = i; if(ln_parseIPv4(str, strLen, &i, node, &lenIP, NULL) == 0) { bHaveIP = 1; i += lenIP - 1; /* position on delimiter */ } else { idxInterface = i; while(i < strLen) { if(isspace(c[i])) goto done; if(c[i] == ':') break; ++i; } lenInterface = i - idxInterface; bHaveInterface = 1; } if(i == strLen) goto done; ++i; /* skip over colon */ /* we now utilize our other parser helpers */ if(!bHaveIP) { idxIP = i; if(ln_parseIPv4(str, strLen, &i, node, &lenIP, NULL) != 0) goto done; i += lenIP; } if(i == strLen || c[i] != '/') goto done; ++i; /* skip slash */ const size_t idxPort = i; size_t lenPort; if(ln_parseNumber(str, strLen, &i, node, &lenPort, NULL) != 0) goto done; i += lenPort; if(i == strLen) goto success; /* check if optional second ip/port is present * We assume we must at least have 5 chars [" (::1)"] */ int bHaveIP2 = 0; size_t idxIP2 = 0, lenIP2 = 0; size_t idxPort2 = 0, lenPort2 = 0; if(i+5 < strLen && c[i] == ' ' && c[i+1] == '(') { size_t iTmp = i+2; /* skip over " (" */ idxIP2 = iTmp; if(ln_parseIPv4(str, strLen, &iTmp, node, &lenIP2, NULL) == 0) { iTmp += lenIP2; if(i < strLen || c[iTmp] == '/') { ++iTmp; /* skip slash */ idxPort2 = iTmp; if(ln_parseNumber(str, strLen, &iTmp, node, &lenPort2, NULL) == 0) { iTmp += lenPort2; if(iTmp < strLen && c[iTmp] == ')') { i = iTmp + 1; /* match, so use new index */ bHaveIP2 = 1; } } } } } /* check if optional username is present * We assume we must at least have 3 chars ["(n)"] */ int bHaveUser = 0; size_t idxUser = 0; size_t lenUser = 0; if( (i+2 < strLen && c[i] == '(' && !isspace(c[i+1]) ) || (i+3 < strLen && c[i] == ' ' && c[i+1] == '(' && !isspace(c[i+2])) ) { idxUser = i + ((c[i] == ' ') ? 2 : 1); /* skip [SP]'(' */ size_t iTmp = idxUser; while(iTmp < strLen && !isspace(c[iTmp]) && c[iTmp] != ')') ++iTmp; /* just scan */ if(iTmp < strLen && c[iTmp] == ')') { i = iTmp + 1; /* we have a match, so use new index */ bHaveUser = 1; lenUser = iTmp - idxUser; } } /* all done, save data */ if(value == NULL) goto success; CHKN(*value = json_object_new_object()); json_object *json; if(bHaveInterface) { CHKN(json = json_object_new_string_len(c+idxInterface, lenInterface)); json_object_object_add_ex(*value, "interface", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } CHKN(json = json_object_new_string_len(c+idxIP, lenIP)); json_object_object_add_ex(*value, "ip", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); CHKN(json = json_object_new_string_len(c+idxPort, lenPort)); json_object_object_add_ex(*value, "port", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); if(bHaveIP2) { CHKN(json = json_object_new_string_len(c+idxIP2, lenIP2)); json_object_object_add_ex(*value, "ip2", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); CHKN(json = json_object_new_string_len(c+idxPort2, lenPort2)); json_object_object_add_ex(*value, "port2", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } if(bHaveUser) { CHKN(json = json_object_new_string_len(c+idxUser, lenUser)); json_object_object_add_ex(*value, "user", json, JSON_C_OBJECT_ADD_KEY_IS_NEW|JSON_C_OBJECT_KEY_IS_CONSTANT); } success: /* success, persist */ *parsed = i - *offs; r = 0; /* success */ done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; /* to be on the save side */ } return r; } /** * Parse a duration. A duration is similar to a timestamp, except that * it tells about time elapsed. As such, hours can be larger than 23 * and hours may also be specified by a single digit (this, for example, * is commonly done in Cisco software). * Note: we do manual loop unrolling -- this is fast AND efficient. */ PARSER(Duration) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; /* hour is a bit tricky */ if(!isdigit(c[i])) goto done; ++i; if(isdigit(c[i])) ++i; if(c[i] == ':') ++i; else goto done; if(i+5 > strLen) goto done;/* if it is not 5 chars from here, it can't be us */ if(c[i] < '0' || c[i] > '5') goto done; if(!isdigit(c[i+1])) goto done; if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!isdigit(c[i+4])) goto done; /* success, persist */ *parsed = (i + 5) - *offs; r = 0; /* success */ done: return r; } /** * Parse a timestamp in 24hr format (exactly HH:MM:SS). * Note: we do manual loop unrolling -- this is fast AND efficient. * rgerhards, 2011-01-14 */ PARSER(Time24hr) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(*offs+8 > strLen) goto done; /* if it is not 8 chars, it can't be us */ /* hour */ if(c[i] == '0' || c[i] == '1') { if(!isdigit(c[i+1])) goto done; } else if(c[i] == '2') { if(c[i+1] < '0' || c[i+1] > '3') goto done; } else { goto done; } /* TODO: the code below is a duplicate of 24hr parser - create common function */ if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!isdigit(c[i+4])) goto done; if(c[i+5] != ':') goto done; if(c[i+6] < '0' || c[i+6] > '5') goto done; if(!isdigit(c[i+7])) goto done; /* success, persist */ *parsed = 8; r = 0; /* success */ done: return r; } /** * Parse a timestamp in 12hr format (exactly HH:MM:SS). * Note: we do manual loop unrolling -- this is fast AND efficient. * TODO: the code below is a duplicate of 24hr parser - create common function? * rgerhards, 2011-01-14 */ PARSER(Time12hr) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); c = str; i = *offs; if(*offs+8 > strLen) goto done; /* if it is not 8 chars, it can't be us */ /* hour */ if(c[i] == '0') { if(!isdigit(c[i+1])) goto done; } else if(c[i] == '1') { if(c[i+1] < '0' || c[i+1] > '2') goto done; } else { goto done; } if(c[i+2] != ':') goto done; if(c[i+3] < '0' || c[i+3] > '5') goto done; if(!isdigit(c[i+4])) goto done; if(c[i+5] != ':') goto done; if(c[i+6] < '0' || c[i+6] > '5') goto done; if(!isdigit(c[i+7])) goto done; /* success, persist */ *parsed = 8; r = 0; /* success */ done: return r; } /* helper to IPv4 address parser, checks the next set of numbers. * Syntax 1 to 3 digits, value together not larger than 255. * @param[in] str parse buffer * @param[in/out] offs offset into buffer, updated if successful * @return 0 if OK, 1 otherwise */ static int chkIPv4AddrByte(const char *str, size_t strLen, size_t *offs) { int val = 0; int r = 1; /* default: done -- simplifies things */ const char *c; size_t i = *offs; c = str; if(i == strLen || !isdigit(c[i])) goto done; val = c[i++] - '0'; if(i < strLen && isdigit(c[i])) { val = val * 10 + c[i++] - '0'; if(i < strLen && isdigit(c[i])) val = val * 10 + c[i++] - '0'; } if(val > 255) /* cannot be a valid IP address byte! */ goto done; *offs = i; r = 0; done: return r; } /** * Parser for IPv4 addresses. */ PARSER(IPv4) const char *c; size_t i; assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; if(i + 7 > strLen) { /* IPv4 addr requires at least 7 characters */ goto done; } c = str; /* byte 1*/ if(chkIPv4AddrByte(str, strLen, &i) != 0) goto done; if(i == strLen || c[i++] != '.') goto done; /* byte 2*/ if(chkIPv4AddrByte(str, strLen, &i) != 0) goto done; if(i == strLen || c[i++] != '.') goto done; /* byte 3*/ if(chkIPv4AddrByte(str, strLen, &i) != 0) goto done; if(i == strLen || c[i++] != '.') goto done; /* byte 4 - we do NOT need any char behind it! */ if(chkIPv4AddrByte(str, strLen, &i) != 0) goto done; /* if we reach this point, we found a valid IP address */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /* skip past the IPv6 address block, parse pointer is set to * first char after the block. Returns an error if already at end * of string. * @param[in] str parse buffer * @param[in/out] offs offset into buffer, updated if successful * @return 0 if OK, 1 otherwise */ static int skipIPv6AddrBlock(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ offs) { int j; if(*offs == strLen) return 1; for(j = 0 ; j < 4 && *offs+j < strLen && isxdigit(str[*offs+j]) ; ++j) /*just skip*/ ; *offs += j; return 0; } /** * Parser for IPv6 addresses. * Bases on RFC4291 Section 2.2. The address must be followed * by whitespace or end-of-string, else it is not considered * a valid address. This prevents false positives. */ PARSER(IPv6) const char *c; size_t i; size_t beginBlock; /* last block begin in case we need IPv4 parsing */ int hasIPv4 = 0; int nBlocks = 0; /* how many blocks did we already have? */ int bHad0Abbrev = 0; /* :: already used? */ assert(str != NULL); assert(offs != NULL); assert(parsed != NULL); i = *offs; if(i + 2 > strLen) { /* IPv6 addr requires at least 2 characters ("::") */ goto done; } c = str; /* check that first block is non-empty */ if(! ( isxdigit(c[i]) || (c[i] == ':' && c[i+1] == ':') ) ) goto done; /* try for all potential blocks plus one more (so we see errors!) */ for(int j = 0 ; j < 9 ; ++j) { beginBlock = i; if(skipIPv6AddrBlock(str, strLen, &i) != 0) goto done; nBlocks++; if(i == strLen) goto chk_ok; if(isspace(c[i])) goto chk_ok; if(c[i] == '.'){ /* IPv4 processing! */ hasIPv4 = 1; break; } if(c[i] != ':') goto done; i++; /* "eat" ':' */ if(i == strLen) goto chk_ok; /* check for :: */ if(bHad0Abbrev) { if(c[i] == ':') goto done; } else { if(c[i] == ':') { bHad0Abbrev = 1; ++i; if(i == strLen) goto chk_ok; } } } if(hasIPv4) { size_t ipv4_parsed; --nBlocks; /* prevent pure IPv4 address to be recognized */ if(beginBlock == *offs) goto done; i = beginBlock; if(ln_parseIPv4(str, strLen, &i, node, &ipv4_parsed, NULL) != 0) goto done; i += ipv4_parsed; } chk_ok: /* we are finished parsing, check if things are ok */ if(nBlocks > 8) goto done; if(bHad0Abbrev && nBlocks >= 8) goto done; /* now check if trailing block is missing. Note that i is already * on next character, so we need to go two back. Two are always * present, else we would not reach this code here. */ if(c[i-1] == ':' && c[i-2] != ':') goto done; /* if we reach this point, we found a valid IP address */ *parsed = i - *offs; r = 0; /* success */ done: return r; } /* check if a char is valid inside a name of the iptables motif. * We try to keep the set as slim as possible, because the iptables * parser may otherwise create a very broad match (especially the * inclusion of simple words like "DF" cause grief here). * Note: we have taken the permitted set from iptables log samples. * Report bugs if we missed some additional rules. */ static inline int isValidIPTablesNameChar(const char c) { /* right now, upper case only is valid */ return ('A' <= c && c <= 'Z') ? 1 : 0; } /* helper to iptables parser, parses out a a single name=value pair */ static int parseIPTablesNameValue(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ offs, struct json_object *const __restrict__ valroot) { int r = LN_WRONGPARSER; size_t i = *offs; char *name = NULL; const size_t iName = i; while(i < strLen && isValidIPTablesNameChar(str[i])) ++i; if(i == iName || (i < strLen && str[i] != '=' && str[i] != ' ')) goto done; /* no name at all! */ const ssize_t lenName = i - iName; ssize_t iVal = -1; size_t lenVal = i - iVal; if(i < strLen && str[i] != ' ') { /* we have a real value (not just a flag name like "DF") */ ++i; /* skip '=' */ iVal = i; while(i < strLen && !isspace(str[i])) ++i; lenVal = i - iVal; } /* parsing OK */ *offs = i; r = 0; if(valroot == NULL) goto done; CHKN(name = malloc(lenName+1)); memcpy(name, str+iName, lenName); name[lenName] = '\0'; json_object *json; if(iVal == -1) { json = NULL; } else { CHKN(json = json_object_new_string_len(str+iVal, lenVal)); } json_object_object_add(valroot, name, json); done: free(name); return r; } /** * Parser for iptables logs (the structured part). * This parser is named "v2-iptables" because of a traditional * parser named "iptables", which we do not want to replace, at * least right now (we may re-think this before the first release). * For performance reasons, this works in two stages. In the first * stage, we only detect if the motif is correct. The second stage is * only called when we know it is. In it, we go once again over the * message again and actually extract the data. This is done because * data extraction is relatively expensive and in most cases we will * have much more frequent mismatches than matches. * Note that this motif must have at least one field, otherwise it * could detect things that are not iptables to be it. Further limits * may be imposed in the future as we see additional need. * added 2015-04-30 rgerhards */ PARSER(v2IPTables) size_t i = *offs; int nfields = 0; /* stage one */ while(i < strLen) { CHKR(parseIPTablesNameValue(str, strLen, &i, NULL)); ++nfields; /* exactly one SP is permitted between fields */ if(i < strLen && str[i] == ' ') ++i; } if(nfields < 2) { FAIL(LN_WRONGPARSER); } /* success, persist */ *parsed = i - *offs; r = 0; /* stage two */ if(value == NULL) goto done; i = *offs; CHKN(*value = json_object_new_object()); while(i < strLen) { CHKR(parseIPTablesNameValue(str, strLen, &i, *value)); while(i < strLen && isspace(str[i])) ++i; } done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; } return r; } /** * Parse JSON. This parser tries to find JSON data inside a message. * If it finds valid JSON, it will extract it. Extra data after the * JSON is permitted. * Note: the json-c JSON parser treats whitespace after the actual * json to be part of the json. So in essence, any whitespace is * processed by this parser. We use the same semantics to keep things * neatly in sync. If json-c changes for some reason or we switch to * an alternate json lib, we probably need to be sure to keep that * behaviour, and probably emulate it. * added 2015-04-28 by rgerhards, v1.1.2 */ PARSER(JSON) const size_t i = *offs; struct json_tokener *tokener = NULL; if(i == strLen) goto done; if(str[i] != '{' && str[i] != '[') { /* this can't be json, see RFC4627, Sect. 2 * see this bug in json-c: * https://github.com/json-c/json-c/issues/181 * In any case, it's better to do this quick check, * even if json-c did not have the bug because this * check here is much faster than calling the parser. */ goto done; } if((tokener = json_tokener_new()) == NULL) goto done; struct json_object *const json = json_tokener_parse_ex(tokener, str+i, (int) (strLen - i)); if(json == NULL) goto done; /* success, persist */ *parsed = (i + tokener->char_offset) - *offs; r = 0; /* success */ if(value == NULL) { json_object_put(json); } else { *value = json; } done: if(tokener != NULL) json_tokener_free(tokener); return r; } /* check if a char is valid inside a name of a NameValue list * The set of valid characters may be extended if there is good * need to do so. We have selected the current set carefully, but * may have overlooked some cases. */ static inline int isValidNameChar(const char c) { return (isalnum(c) || c == '.' || c == '_' || c == '-' ) ? 1 : 0; } /* helper to NameValue parser, parses out a a single name=value pair * * name must be alphanumeric characters, value must be non-whitespace * characters, if quoted than with symmetric quotes. Supported formats * - name=value * - name="value" * - name='value' * Note "name=" is valid and means a field with empty value. * TODO: so far, quote characters are not permitted WITHIN quoted values. */ static int parseNameValue(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ offs, struct json_object *const __restrict__ valroot) { int r = LN_WRONGPARSER; size_t i = *offs; char *name = NULL; const size_t iName = i; while(i < strLen && isValidNameChar(str[i])) ++i; if(i == iName || str[i] != '=') goto done; /* no name at all! */ const size_t lenName = i - iName; ++i; /* skip '=' */ const size_t iVal = i; while(i < strLen && !isspace(str[i])) ++i; const size_t lenVal = i - iVal; /* parsing OK */ *offs = i; r = 0; if(valroot == NULL) goto done; CHKN(name = malloc(lenName+1)); memcpy(name, str+iName, lenName); name[lenName] = '\0'; json_object *json; CHKN(json = json_object_new_string_len(str+iVal, lenVal)); json_object_object_add(valroot, name, json); done: free(name); return r; } /** * Parse CEE syslog. * This essentially is a JSON parser, with additional restrictions: * The message must start with "@cee:" and json must immediately follow (whitespace permitted). * after the JSON, there must be no other non-whitespace characters. * In other words: the message must consist of a single JSON object, * only. * added 2015-04-28 by rgerhards, v1.1.2 */ PARSER(CEESyslog) size_t i = *offs; struct json_tokener *tokener = NULL; struct json_object *json = NULL; if(strLen < i + 7 || /* "@cee:{}" is minimum text */ str[i] != '@' || str[i+1] != 'c' || str[i+2] != 'e' || str[i+3] != 'e' || str[i+4] != ':') goto done; /* skip whitespace */ for(i += 5 ; i < strLen && isspace(str[i]) ; ++i) /* just skip */; if(i == strLen || str[i] != '{') goto done; /* note: we do not permit arrays in CEE mode */ if((tokener = json_tokener_new()) == NULL) goto done; json = json_tokener_parse_ex(tokener, str+i, (int) (strLen - i)); if(json == NULL) goto done; if(i + tokener->char_offset != strLen) goto done; /* success, persist */ *parsed = strLen; r = 0; /* success */ if(value != NULL) { *value = json; json = NULL; /* do NOT free below! */ } done: if(tokener != NULL) json_tokener_free(tokener); if(json != NULL) json_object_put(json); return r; } /** * Parser for name/value pairs. * On entry must point to alnum char. All following chars must be * name/value pairs delimited by whitespace up until the end of string. * For performance reasons, this works in two stages. In the first * stage, we only detect if the motif is correct. The second stage is * only called when we know it is. In it, we go once again over the * message again and actually extract the data. This is done because * data extraction is relatively expensive and in most cases we will * have much more frequent mismatches than matches. * added 2015-04-25 rgerhards */ PARSER(NameValue) size_t i = *offs; /* stage one */ while(i < strLen) { CHKR(parseNameValue(str, strLen, &i, NULL)); while(i < strLen && isspace(str[i])) ++i; } /* success, persist */ *parsed = i - *offs; r = 0; /* success */ /* stage two */ if(value == NULL) goto done; i = *offs; CHKN(*value = json_object_new_object()); while(i < strLen) { CHKR(parseNameValue(str, strLen, &i, *value)); while(i < strLen && isspace(str[i])) ++i; } /* TODO: fix mem leak if alloc json fails */ done: return r; } /** * Parse a MAC layer address. * The standard (IEEE 802) format for printing MAC-48 addresses in * human-friendly form is six groups of two hexadecimal digits, * separated by hyphens (-) or colons (:), in transmission order * (e.g. 01-23-45-67-89-ab or 01:23:45:67:89:ab ). * This form is also commonly used for EUI-64. * from: http://en.wikipedia.org/wiki/MAC_address * * This parser must start on a hex digit. * added 2015-05-04 by rgerhards, v1.1.2 */ PARSER(MAC48) size_t i = *offs; char delim; if(strLen < i + 17 || /* this motif has exactly 17 characters */ !isxdigit(str[i]) || !isxdigit(str[i+1]) ) FAIL(LN_WRONGPARSER); if(str[i+2] == ':') delim = ':'; else if(str[i+2] == '-') delim = '-'; else FAIL(LN_WRONGPARSER); /* first byte ok */ if(!isxdigit(str[i+3]) || !isxdigit(str[i+4]) || str[i+5] != delim || /* 2nd byte ok */ !isxdigit(str[i+6]) || !isxdigit(str[i+7]) || str[i+8] != delim || /* 3rd byte ok */ !isxdigit(str[i+9]) || !isxdigit(str[i+10]) || str[i+11] != delim || /* 4th byte ok */ !isxdigit(str[i+12]) || !isxdigit(str[i+13]) || str[i+14] != delim || /* 5th byte ok */ !isxdigit(str[i+15]) || !isxdigit(str[i+16]) /* 6th byte ok */ ) FAIL(LN_WRONGPARSER); /* success, persist */ *parsed = 17; r = 0; /* success */ if(value != NULL) { CHKN(*value = json_object_new_string_len(str+i, 17)); } done: return r; } /* This parses the extension value and updates the index * to point to the end of it. */ static int cefParseExtensionValue(const char *const __restrict__ str, const size_t strLen, size_t *__restrict__ iEndVal) { int r = 0; size_t i = *iEndVal; size_t iLastWordBegin; /* first find next unquoted equal sign and record begin of * last word in front of it - this is the actual end of the * current name/value pair and the begin of the next one. */ int hadSP = 0; int inEscape = 0; for(iLastWordBegin = 0 ; i < strLen ; ++i) { if(inEscape) { if(str[i] != '=' && str[i] != '\\' && str[i] != 'r' && str[i] != 'n') FAIL(LN_WRONGPARSER); inEscape = 0; } else { if(str[i] == '=') { break; } else if(str[i] == '\\') { inEscape = 1; } else if(str[i] == ' ') { hadSP = 1; } else { if(hadSP) { iLastWordBegin = i; hadSP = 0; } } } } /* Note: iLastWordBegin can never be at offset zero, because * the CEF header starts there! */ if(i < strLen) { *iEndVal = (iLastWordBegin == 0) ? i : iLastWordBegin - 1; } else { *iEndVal = i; } done: return r; } /* must be positioned on first char of name, returns index * of end of name. * Note: ArcSight violates the CEF spec ifself: they generate * leading underscores in their extension names, which are * definitely not alphanumeric. We still accept them... * They also seem to use dots. */ static int cefParseName(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ i) { int r = 0; while(*i < strLen && str[*i] != '=') { if(!(isalnum(str[*i]) || str[*i] == '_' || str[*i] == '.')) FAIL(LN_WRONGPARSER); ++(*i); } done: return r; } /* parse CEF extensions. They are basically name=value * pairs with the ugly exception that values may contain * spaces but need NOT to be quoted. Thankfully, at least * names are specified as being alphanumeric without spaces * in them. So we must add a lookahead parser to check if * a word is a name (and thus the begin of a new pair) or * not. This is done by subroutines. */ static int cefParseExtensions(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ offs, json_object *const __restrict__ jroot) { int r = 0; size_t i = *offs; size_t iName, lenName; size_t iValue, lenValue; char *name = NULL; char *value = NULL; while(i < strLen) { while(i < strLen && str[i] == ' ') ++i; iName = i; CHKR(cefParseName(str, strLen, &i)); if(i+1 >= strLen || str[i] != '=') FAIL(LN_WRONGPARSER); lenName = i - iName; ++i; /* skip '=' */ iValue = i; CHKR(cefParseExtensionValue(str, strLen, &i)); lenValue = i - iValue; ++i; /* skip past value */ if(jroot != NULL) { CHKN(name = malloc(sizeof(char) * (lenName + 1))); memcpy(name, str+iName, lenName); name[lenName] = '\0'; CHKN(value = malloc(sizeof(char) * (lenValue + 1))); /* copy value but escape it */ size_t iDst = 0; for(size_t iSrc = 0 ; iSrc < lenValue ; ++iSrc) { if(str[iValue+iSrc] == '\\') { ++iSrc; /* we know the next char must exist! */ switch(str[iValue+iSrc]) { case '=': value[iDst] = '='; break; case 'n': value[iDst] = '\n'; break; case 'r': value[iDst] = '\r'; break; case '\\': value[iDst] = '\\'; break; default: break; } } else { value[iDst] = str[iValue+iSrc]; } ++iDst; } value[iDst] = '\0'; json_object *json; CHKN(json = json_object_new_string(value)); json_object_object_add(jroot, name, json); free(name); name = NULL; free(value); value = NULL; } } done: free(name); free(value); return r; } /* gets a CEF header field. Must be positioned on the * first char after the '|' in front of field. * Note that '|' may be escaped as "\|", which also means * we need to supprot "\\" (see CEF spec for details). * We return the string in *val, if val is non-null. In * that case we allocate memory that the caller must free. * This is necessary because there are potentially escape * sequences inside the string. */ static int cefGetHdrField(const char *const __restrict__ str, const size_t strLen, size_t *const __restrict__ offs, char **val) { int r = 0; size_t i = *offs; assert(str[i] != '|'); while(i < strLen && str[i] != '|') { if(str[i] == '\\') { ++i; /* skip esc char */ if(str[i] != '\\' && str[i] != '|') FAIL(LN_WRONGPARSER); } ++i; /* scan to next delimiter */ } if(str[i] != '|') FAIL(LN_WRONGPARSER); const size_t iBegin = *offs; /* success, persist */ *offs = i + 1; if(val == NULL) { r = 0; goto done; } const size_t len = i - iBegin; CHKN(*val = malloc(len + 1)); size_t iDst = 0; for(size_t iSrc = 0 ; iSrc < len ; ++iSrc) { if(str[iBegin+iSrc] == '\\') ++iSrc; /* we already checked above that this is OK! */ (*val)[iDst++] = str[iBegin+iSrc]; } (*val)[iDst] = 0; r = 0; done: return r; } /** * Parser for ArcSight Common Event Format (CEF) version 0. * added 2015-05-05 by rgerhards, v1.1.2 */ PARSER(CEF) size_t i = *offs; char *vendor = NULL; char *product = NULL; char *version = NULL; char *sigID = NULL; char *name = NULL; char *severity = NULL; /* minimum header: "CEF:0|x|x|x|x|x|x|" --> 17 chars */ if(strLen < i + 17 || str[i] != 'C' || str[i+1] != 'E' || str[i+2] != 'F' || str[i+3] != ':' || str[i+4] != '0' || str[i+5] != '|' ) FAIL(LN_WRONGPARSER); i += 6; /* position on '|' */ CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &vendor)); CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &product)); CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &version)); CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &sigID)); CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &name)); CHKR(cefGetHdrField(str, strLen, &i, (value == NULL) ? NULL : &severity)); ++i; /* skip over terminal '|' */ /* OK, we now know we have a good header. Now, we need * to process extensions. * This time, we do NOT pre-process the extension, but rather * persist them directly to JSON. This is contrary to other * parsers, but as the CEF header is pretty unique, this time * it is extremely unlikely we will get a no-match during * extension processing. Even if so, nothing bad happens, as * the extracted data is discarded. But the regular case saves * us processing time and complexity. The only time when we * cannot directly process it is when the caller asks us not * to persist the data. So this must be handled differently. */ size_t iBeginExtensions = i; CHKR(cefParseExtensions(str, strLen, &i, NULL)); /* success, persist */ *parsed = *offs - i; r = 0; /* success */ if(value != NULL) { CHKN(*value = json_object_new_object()); json_object *json; CHKN(json = json_object_new_string(vendor)); json_object_object_add(*value, "DeviceVendor", json); CHKN(json = json_object_new_string(product)); json_object_object_add(*value, "DeviceProduct", json); CHKN(json = json_object_new_string(version)); json_object_object_add(*value, "DeviceVersion", json); CHKN(json = json_object_new_string(sigID)); json_object_object_add(*value, "SignatureID", json); CHKN(json = json_object_new_string(name)); json_object_object_add(*value, "Name", json); CHKN(json = json_object_new_string(severity)); json_object_object_add(*value, "Severity", json); json_object *jext; CHKN(jext = json_object_new_object()); json_object_object_add(*value, "Extensions", jext); i = iBeginExtensions; cefParseExtensions(str, strLen, &i, jext); } done: if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); value = NULL; } free(vendor); free(product); free(version); free(sigID); free(name); free(severity); return r; } /** * Parser for Checkpoint LEA on-disk format. * added 2015-06-18 by rgerhards, v1.1.2 */ PARSER(CheckpointLEA) size_t i = *offs; size_t iName, lenName; size_t iValue, lenValue; int foundFields = 0; char *name = NULL; char *val = NULL; while(i < strLen) { while(i < strLen && str[i] == ' ') /* skip leading SP */ ++i; if(i == strLen) { /* OK if just trailing space */ if(foundFields == 0) FAIL(LN_WRONGPARSER); break; /* we are done with the loop, all processed */ } else { ++foundFields; } iName = i; /* TODO: do a stricter check? ... but we don't have a spec */ while(i < strLen && str[i] != ':') { ++i; } if(i+1 >= strLen || str[i] != ':') FAIL(LN_WRONGPARSER); lenName = i - iName; ++i; /* skip ':' */ while(i < strLen && str[i] == ' ') /* skip leading SP */ ++i; if(i == strLen) FAIL(LN_WRONGPARSER); if(str[i] == '"') { int continuous_backslash = 0; iValue = i + 1; ++i; while(i < strLen && (str[i] != '"' || (continuous_backslash & 1) == 1)) { if(str[i] == '\\') { ++continuous_backslash; } else { continuous_backslash = 0; } ++i; } if(i >= strLen || str[i] != '"') FAIL(LN_WRONGPARSER); lenValue = i - iValue; ++i; /* skip closing quote */ } else { iValue = i; while(i < strLen && str[i] != ';') { ++i; } lenValue = i - iValue; } if(i >= strLen || str[i] != ';') FAIL(LN_WRONGPARSER); ++i; /* skip ';' */ if(value != NULL) { CHKN(name = malloc(sizeof(char) * (lenName + 1))); memcpy(name, str+iName, lenName); name[lenName] = '\0'; CHKN(val = malloc(sizeof(char) * (lenValue + 1))); memcpy(val, str+iValue, lenValue); val[lenValue] = '\0'; if(*value == NULL) CHKN(*value = json_object_new_object()); json_object *json; CHKN(json = json_object_new_string(val)); json_object_object_add(*value, name, json); free(name); name = NULL; free(val); val = NULL; } } /* success, persist */ *parsed = *offs - i; r = 0; /* success */ done: free(name); free(val); if(r != 0 && value != NULL && *value != NULL) { json_object_put(*value); *value = NULL; } return r; } liblognorm-2.1.0/src/v1_parser.h000066400000000000000000000212331520037563000165220ustar00rootroot00000000000000/* * liblognorm - a fast samples-based log normalization library * Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_V1_PARSER_H_INCLUDED #define LIBLOGNORM_V1_PARSER_H_INCLUDED #include "v1_ptree.h" /** * Parser interface * @param[in] str the to-be-parsed string * @param[in] strLen length of the to-be-parsed string * @param[in] offs an offset into the string * @param[in] node fieldlist with additional data; for simple * parsers, this sets variable "ed", which just is * string data. * @param[out] parsed bytes * @param[out] json object containing parsed data (can be unused) * @return 0 on success, something else otherwise */ /** * Parser for RFC5424 date. */ int ln_parseRFC5424Date(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for RFC3164 date. */ int ln_parseRFC3164Date(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for numbers. */ int ln_parseNumber(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for real-number in floating-pt representation */ int ln_parseFloat(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for hex numbers. */ int ln_parseHexNumber(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for kernel timestamps. */ int ln_parseKernelTimestamp(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for whitespace */ int ln_parseWhitespace(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for Words (SP-terminated strings). */ int ln_parseWord(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse everything up to a specific string. */ int ln_parseStringTo(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for Alphabetic words (no numbers, punct, ctrl, space). */ int ln_parseAlpha(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse everything up to a specific character. */ int ln_parseCharTo(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse everything up to a specific character (relaxed constraints, suitable for CSV) */ int ln_parseCharSeparated(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Get everything till the rest of string. */ int ln_parseRest(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse an optionally quoted string. */ int ln_parseOpQuotedString(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse a quoted string. */ int ln_parseQuotedString(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse an ISO date. */ int ln_parseISODate(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse a timestamp in 12hr format. */ int ln_parseTime12hr(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse a timestamp in 24hr format. */ int ln_parseTime24hr(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse a duration. */ int ln_parseDuration(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for IPv4 addresses. */ int ln_parseIPv4(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for IPv6 addresses. */ int ln_parseIPv6(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse JSON. */ int ln_parseJSON(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse cee syslog. */ int ln_parseCEESyslog(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parse iptables log, the new way */ int ln_parsev2IPTables(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser Cisco interface specifiers */ int ln_parseCiscoInterfaceSpec(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser 48 bit MAC layer addresses. */ int ln_parseMAC48(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for CEF version 0. */ int ln_parseCEF(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for Checkpoint LEA. */ int ln_parseCheckpointLEA(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Parser for name/value pairs. */ int ln_parseNameValue(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); /** * Get all tokens separated by tokenizer-string as array. */ int ln_parseTokenized(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); void* tokenized_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void tokenized_parser_data_destructor(void** dataPtr); #ifdef FEATURE_REGEXP /** * Get field matching regex */ int ln_parseRegex(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void regex_parser_data_destructor(void** dataPtr); #endif /** * Match using the 'current' or 'separate rulebase' all over again from current match position */ int ln_parseRecursive(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); void* recursive_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void* descent_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void recursive_parser_data_destructor(void** dataPtr); /** * Get interpreted field */ int ln_parseInterpret(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); void* interpret_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void interpret_parser_data_destructor(void** dataPtr); /** * Parse a suffixed field */ int ln_parseSuffixed(const char *str, size_t strlen, size_t *offs, const ln_fieldList_t *node, size_t *parsed, struct json_object **value); void* suffixed_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void* named_suffixed_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx); void suffixed_parser_data_destructor(void** dataPtr); #endif /* #ifndef LIBLOGNORM_V1_PARSER_H_INCLUDED */ liblognorm-2.1.0/src/v1_ptree.c000066400000000000000000000622231520037563000163440ustar00rootroot00000000000000/** * @file ptree.c * @brief Implementation of the parse tree object. * @class ln_ptree ptree.h *//* * Copyright 2010 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #include #define LOGNORM_V1_SUBSYSTEM /* indicate we are old cruft */ #include "v1_liblognorm.h" #include "annot.h" #include "internal.h" #include "lognorm.h" #include "v1_ptree.h" #include "v1_samp.h" #include "v1_parser.h" /** * Get base addr of common prefix. Takes length of prefix in account * and selects the right buffer. */ static inline unsigned char* prefixBase(struct ln_ptree *tree) { return (tree->lenPrefix <= sizeof(tree->prefix)) ? tree->prefix.data : tree->prefix.ptr; } struct ln_ptree* ln_newPTree(ln_ctx ctx, struct ln_ptree **parentptr) { struct ln_ptree *tree; if((tree = calloc(1, sizeof(struct ln_ptree))) == NULL) goto done; tree->parentptr = parentptr; tree->ctx = ctx; ctx->nNodes++; done: return tree; } void ln_deletePTreeNode(ln_fieldList_t *node) { ln_deletePTree(node->subtree); es_deleteStr(node->name); if(node->data != NULL) es_deleteStr(node->data); if(node->raw_data != NULL) es_deleteStr(node->raw_data); if(node->parser_data != NULL && node->parser_data_destructor != NULL) node->parser_data_destructor(&(node->parser_data)); free(node); } void ln_deletePTree(struct ln_ptree *tree) { ln_fieldList_t *node, *nextnode; size_t i; if(tree == NULL) goto done; if(tree->tags != NULL) json_object_put(tree->tags); for(node = tree->froot; node != NULL; node = nextnode) { nextnode = node->next; ln_deletePTreeNode(node); } /* need to free a large prefix buffer? */ if(tree->lenPrefix > sizeof(tree->prefix)) free(tree->prefix.ptr); for(i = 0 ; i < 256 ; ++i) if(tree->subtree[i] != NULL) ln_deletePTree(tree->subtree[i]); free(tree); done: return; } /** * Set the common prefix inside a note, taking into account the subtle * issues associated with it. * @return 0 on success, something else otherwise */ static int setPrefix(struct ln_ptree *tree, unsigned char *buf, size_t lenBuf, size_t offs) { int r; LN_DBGPRINTF(tree->ctx, "setPrefix lenBuf %zu, offs %zu", lenBuf, offs); tree->lenPrefix = lenBuf - offs; if(tree->lenPrefix > sizeof(tree->prefix)) { /* too-large for standard buffer, need to alloc one */ if((tree->prefix.ptr = malloc(tree->lenPrefix * sizeof(unsigned char))) == NULL) { r = LN_NOMEM; goto done; /* fail! */ } memcpy(tree->prefix.ptr, buf, tree->lenPrefix); } else { /* note: r->lenPrefix may be 0, but that is OK */ memcpy(tree->prefix.data, buf, tree->lenPrefix); } r = 0; done: return r; } /** * Check if the provided tree is a leaf. This means that it * does not contain any subtrees. * @return 1 if it is a leaf, 0 otherwise */ static int isLeaf(struct ln_ptree *tree) { int r = 0; int i; if(tree->froot != NULL) goto done; for(i = 0 ; i < 256 ; ++i) { if(tree->subtree[i] != NULL) goto done; } r = 1; done: return r; } /** * Check if the provided tree is a true leaf. This means that it * does not contain any subtrees of any kind and no prefix, * and it is not terminal leaf. * @return 1 if it is a leaf, 0 otherwise */ static inline int isTrueLeaf(struct ln_ptree *tree) { return((tree->lenPrefix == 0) && isLeaf(tree)) && !tree->flags.isTerminal; } struct ln_ptree * ln_addPTree(struct ln_ptree *tree, es_str_t *str, size_t offs) { struct ln_ptree *r; struct ln_ptree **parentptr; /**< pointer in parent that needs to be updated */ LN_DBGPRINTF(tree->ctx, "addPTree: offs %zu", offs); parentptr = &(tree->subtree[es_getBufAddr(str)[offs]]); /* First check if tree node is totally empty. If so, we can simply add * the prefix to this node. This case is important, because it happens * every time with a new field. */ if(isTrueLeaf(tree)) { if(setPrefix(tree, es_getBufAddr(str), es_strlen(str), offs) != 0) { r = NULL; } else { r = tree; } goto done; } if(tree->ctx->debug) { char *cstr = es_str2cstr(str, NULL); LN_DBGPRINTF(tree->ctx, "addPTree: add '%s', offs %zu, tree %p", cstr + offs, offs, tree); free(cstr); } if((r = ln_newPTree(tree->ctx, parentptr)) == NULL) goto done; if(setPrefix(r, es_getBufAddr(str) + offs + 1, es_strlen(str) - offs - 1, 0) != 0) { free(r); r = NULL; goto done; } *parentptr = r; done: return r; } /** * Split the provided tree (node) into two at the provided index into its * common prefix. This function exists to support splitting nodes when * a mismatch in the common prefix requires that. This function more or less * keeps the tree as it is, just changes the structure. No new node is added. * Usually, it is desired to add a new node. This must be made afterwards. * Note that we need to create a new tree *in front of* the current one, as * the current one contains field etc. subtree pointers. * @param[in] tree tree to split * @param[in] offs offset into common prefix (must be less than prefix length!) */ static struct ln_ptree* splitTree(struct ln_ptree *tree, unsigned short offs) { unsigned char *c; struct ln_ptree *r; unsigned short newlen; ln_ptree **newparentptr; /**< pointer in parent that needs to be updated */ assert(offs < tree->lenPrefix); if((r = ln_newPTree(tree->ctx, tree->parentptr)) == NULL) goto done; LN_DBGPRINTF(tree->ctx, "splitTree %p at offs %u", tree, offs); /* note: the overall prefix is reduced by one char, which is now taken * care of inside the "branch table". */ c = prefixBase(tree); //LN_DBGPRINTF(tree->ctx, "splitTree new bb, *(c+offs): '%s'", c); if(setPrefix(r, c, offs, 0) != 0) { ln_deletePTree(r); r = NULL; goto done; /* fail! */ } LN_DBGPRINTF(tree->ctx, "splitTree new tree %p lenPrefix=%u, char '%c'", r, r->lenPrefix, r->prefix.data[0]); /* add the proper branch table entry for the new node. must be done * here, because the next step will destroy the required index char! */ newparentptr = &(r->subtree[c[offs]]); r->subtree[c[offs]] = tree; /* finally fix existing common prefix */ newlen = tree->lenPrefix - offs - 1; if(tree->lenPrefix > sizeof(tree->prefix) && (newlen <= sizeof(tree->prefix))) { /* note: c is a different pointer; the original * pointer is overwritten by memcpy! */ LN_DBGPRINTF(tree->ctx, "splitTree new case one bb, offs %u, lenPrefix %u, newlen %u", offs, tree->lenPrefix, newlen); //LN_DBGPRINTF(tree->ctx, "splitTree new case one bb, *(c+offs): '%s'", c); memcpy(tree->prefix.data, c+offs+1, newlen); free(c); } else { LN_DBGPRINTF(tree->ctx, "splitTree new case two bb, offs=%u, newlen %u", offs, newlen); memmove(c, c+offs+1, newlen); } tree->lenPrefix = tree->lenPrefix - offs - 1; if(tree->parentptr == 0) tree->ctx->ptree = r; /* root does not have a parent! */ else *(tree->parentptr) = r; tree->parentptr = newparentptr; done: return r; } struct ln_ptree * ln_buildPTree(struct ln_ptree *tree, es_str_t *str, size_t offs) { struct ln_ptree *r; unsigned char *c; unsigned char *cpfix; size_t i; unsigned short ipfix; assert(tree != NULL); LN_DBGPRINTF(tree->ctx, "buildPTree: begin at %p, offs %zu", tree, offs); c = es_getBufAddr(str); /* check if the prefix matches and, if not, at what offset it is different */ ipfix = 0; cpfix = prefixBase(tree); for( i = offs ; (i < es_strlen(str)) && (ipfix < tree->lenPrefix) && (c[i] == cpfix[ipfix]) ; ++i, ++ipfix) { ; /*DO NOTHING - just find end of match */ LN_DBGPRINTF(tree->ctx, "buildPTree: tree %p, i %zu, char '%c'", tree, i, c[i]); } /* if we reach this point, we have processed as much of the common prefix * as we could. The following code now does the proper actions based on * the possible cases. */ if(i == es_strlen(str)) { /* all of our input is consumed, no more recursion */ if(ipfix == tree->lenPrefix) { LN_DBGPRINTF(tree->ctx, "case 1.1"); /* exact match, we are done! */ r = tree; } else { LN_DBGPRINTF(tree->ctx, "case 1.2"); /* we need to split the node at the current position */ r = splitTree(tree, ipfix); } } else if(ipfix < tree->lenPrefix) { LN_DBGPRINTF(tree->ctx, "case 2, i=%zu, ipfix=%u", i, ipfix); /* we need to split the node at the current position */ if((r = splitTree(tree, ipfix)) == NULL) goto done; /* fail */ LN_DBGPRINTF(tree->ctx, "pre addPTree: i %zu", i); if((r = ln_addPTree(r, str, i)) == NULL) goto done; //r = ln_buildPTree(r, str, i + 1); } else { /* we could consume the current common prefix, but now need * to traverse the rest of the tree based on the next char. */ if(tree->subtree[c[i]] == NULL) { LN_DBGPRINTF(tree->ctx, "case 3.1"); /* non-match, need new subtree */ r = ln_addPTree(tree, str, i); } else { LN_DBGPRINTF(tree->ctx, "case 3.2"); /* match, follow subtree */ r = ln_buildPTree(tree->subtree[c[i]], str, i + 1); } } //LN_DBGPRINTF(tree->ctx, "---------------------------------------"); //ln_displayPTree(tree, 0); //LN_DBGPRINTF(tree->ctx, "======================================="); done: return r; } int ln_addFDescrToPTree(struct ln_ptree **tree, ln_fieldList_t *node) { int r; ln_fieldList_t *curr; assert(tree != NULL);assert(*tree != NULL); assert(node != NULL); if((node->subtree = ln_newPTree((*tree)->ctx, &node->subtree)) == NULL) { r = -1; goto done; } LN_DBGPRINTF((*tree)->ctx, "got new subtree %p", node->subtree); /* check if we already have this field, if so, merge * TODO: optimized, check logic */ for(curr = (*tree)->froot ; curr != NULL ; curr = curr->next) { if(!es_strcmp(curr->name, node->name) && curr->parser == node->parser && ((curr->raw_data == NULL && node->raw_data == NULL) || (curr->raw_data != NULL && node->raw_data != NULL && !es_strcmp(curr->raw_data, node->raw_data)))) { *tree = curr->subtree; ln_deletePTreeNode(node); r = 0; LN_DBGPRINTF((*tree)->ctx, "merging with tree %p\n", *tree); goto done; } } if((*tree)->froot == NULL) { (*tree)->froot = (*tree)->ftail = node; } else { (*tree)->ftail->next = node; (*tree)->ftail = node; } r = 0; LN_DBGPRINTF((*tree)->ctx, "prev subtree %p", *tree); *tree = node->subtree; LN_DBGPRINTF((*tree)->ctx, "new subtree %p", *tree); done: return r; } void ln_displayPTree(struct ln_ptree *tree, int level) { int i; int nChildLit; int nChildField; es_str_t *str; char *cstr; ln_fieldList_t *node; char indent[2048]; if(level > 1023) level = 1023; memset(indent, ' ', level * 2); indent[level * 2] = '\0'; nChildField = 0; for(node = tree->froot ; node != NULL ; node = node->next ) { ++nChildField; } nChildLit = 0; for(i = 0 ; i < 256 ; ++i) { if(tree->subtree[i] != NULL) { nChildLit++; } } str = es_newStr(sizeof(tree->prefix)); es_addBuf(&str, (char*) prefixBase(tree), tree->lenPrefix); cstr = es_str2cstr(str, NULL); es_deleteStr(str); LN_DBGPRINTF(tree->ctx, "%ssubtree%s %p (prefix: '%s', children: %d literals, %d fields) [visited %u " "backtracked %u terminated %u]", indent, tree->flags.isTerminal ? " TERM" : "", tree, cstr, nChildLit, nChildField, tree->stats.visited, tree->stats.backtracked, tree->stats.terminated); free(cstr); /* display char subtrees */ for(i = 0 ; i < 256 ; ++i) { if(tree->subtree[i] != NULL) { LN_DBGPRINTF(tree->ctx, "%schar %2.2x(%c):", indent, i, i); ln_displayPTree(tree->subtree[i], level + 1); } } /* display field subtrees */ for(node = tree->froot ; node != NULL ; node = node->next ) { cstr = es_str2cstr(node->name, NULL); LN_DBGPRINTF(tree->ctx, "%sfield %s:", indent, cstr); free(cstr); ln_displayPTree(node->subtree, level + 1); } } /* the following is a quick hack, which should be moved to the * string class. */ static inline void dotAddPtr(es_str_t **str, void *p) { char buf[64]; int i; i = snprintf(buf, sizeof(buf), "%p", p); es_addBuf(str, buf, i); } /** * recursive handler for DOT graph generator. */ static void ln_genDotPTreeGraphRec(struct ln_ptree *tree, es_str_t **str) { int i; ln_fieldList_t *node; dotAddPtr(str, tree); es_addBufConstcstr(str, " [label=\""); if(tree->lenPrefix > 0) { es_addChar(str, '\''); es_addBuf(str, (char*) prefixBase(tree), tree->lenPrefix); es_addChar(str, '\''); } es_addBufConstcstr(str, "\""); if(isLeaf(tree)) { es_addBufConstcstr(str, " style=\"bold\""); } es_addBufConstcstr(str, "]\n"); /* display char subtrees */ for(i = 0 ; i < 256 ; ++i) { if(tree->subtree[i] != NULL) { dotAddPtr(str, tree); es_addBufConstcstr(str, " -> "); dotAddPtr(str, tree->subtree[i]); es_addBufConstcstr(str, " [label=\""); es_addChar(str, (char) i); es_addBufConstcstr(str, "\"]\n"); ln_genDotPTreeGraphRec(tree->subtree[i], str); } } /* display field subtrees */ for(node = tree->froot ; node != NULL ; node = node->next ) { dotAddPtr(str, tree); es_addBufConstcstr(str, " -> "); dotAddPtr(str, node->subtree); es_addBufConstcstr(str, " [label=\""); es_addStr(str, node->name); es_addBufConstcstr(str, "\" style=\"dotted\"]\n"); ln_genDotPTreeGraphRec(node->subtree, str); } } void ln_genDotPTreeGraph(struct ln_ptree *tree, es_str_t **str) { es_addBufConstcstr(str, "digraph ptree {\n"); ln_genDotPTreeGraphRec(tree, str); es_addBufConstcstr(str, "}\n"); } /** * add unparsed string to event. */ static int addUnparsedField(const char *str, size_t strLen, int offs, struct json_object *json) { int r = 1; struct json_object *value; char *s = NULL; CHKN(s = strndup(str, strLen)); value = json_object_new_string(s); if (value == NULL) { goto done; } json_object_object_add(json, ORIGINAL_MSG_KEY, value); value = json_object_new_string(s + offs); if (value == NULL) { goto done; } json_object_object_add(json, UNPARSED_DATA_KEY, value); r = 0; done: free(s); return r; } /** * Special parser for iptables-like name/value pairs. * The pull multiple fields. Note that once this parser has been selected, * it is very unlikely to be left, as it is *very* generic. This parser is * required because practice shows that already-structured data like iptables * can otherwise not be processed by liblognorm in a meaningful way. * * @param[in] tree current tree to process * @param[in] str string to be matched against (the to-be-normalized data) * @param[in] strLen length of str * @param[in/out] offs start position in input data, on exit first unparsed position * @param[in/out] event handle to event that is being created during normalization * * @return 0 if parser was successfully, something else on error */ static int ln_iptablesParser(struct ln_ptree *tree, const char *str, size_t strLen, size_t *offs, struct json_object *json) { int r; size_t o = *offs; es_str_t *fname; es_str_t *fval; const char *pstr; const char *end; struct json_object *value; LN_DBGPRINTF(tree->ctx, "%zu enter iptables parser, len %zu", *offs, strLen); if(o == strLen) { r = -1; /* can not be, we have no n/v pairs! */ goto done; } end = str + strLen; pstr = str + o; while(pstr < end) { while(pstr < end && isspace(*pstr)) ++pstr; CHKN(fname = es_newStr(16)); while(pstr < end && !isspace(*pstr) && *pstr != '=') { es_addChar(&fname, *pstr); ++pstr; } if(pstr < end && *pstr == '=') { CHKN(fval = es_newStr(16)); ++pstr; /* error on space */ while(pstr < end && !isspace(*pstr)) { es_addChar(&fval, *pstr); ++pstr; } } else { CHKN(fval = es_newStrFromCStr("[*PRESENT*]", sizeof("[*PRESENT*]")-1)); } char *cn, *cv; CHKN(cn = ln_es_str2cstr(&fname)); CHKN(cv = ln_es_str2cstr(&fval)); if (tree->ctx->debug) { LN_DBGPRINTF(tree->ctx, "iptables parser extracts %s=%s", cn, cv); } CHKN(value = json_object_new_string(cv)); json_object_object_add(json, cn, value); es_deleteStr(fval); es_deleteStr(fname); } r = 0; *offs = strLen; done: LN_DBGPRINTF(tree->ctx, "%zu iptables parser returns %d", *offs, r); return r; } /** * Recursive step of the normalizer. It walks the parse tree and calls itself * recursively when this is appropriate. It also implements backtracking in * those (hopefully rare) cases where it is required. * * @param[in] tree current tree to process * @param[in] string string to be matched against (the to-be-normalized data) * @param[in] strLen length of the to-be-matched string * @param[in] offs start position in input data * @param[in/out] json ... that is being created during normalization * @param[out] endNode if a match was found, this is the matching node (undefined otherwise) * * @return number of characters left unparsed by following the subtree, negative if * the to-be-parsed message is shorter than the rule sample by this number of * characters. */ static int ln_v1_normalizeRec(struct ln_ptree *tree, const char *str, size_t strLen, size_t offs, struct json_object *json, struct ln_ptree **endNode) { int r; int localR; size_t i; int left; ln_fieldList_t *node; ln_fieldList_t *restMotifNode = NULL; char *cstr; const char *c; unsigned char *cpfix; unsigned ipfix; size_t parsed; char *namestr; struct json_object *value; ++tree->stats.visited; if(offs >= strLen) { *endNode = tree; r = -tree->lenPrefix; goto done; } LN_DBGPRINTF(tree->ctx, "%zu: enter parser, tree %p", offs, tree); c = str; cpfix = prefixBase(tree); node = tree->froot; r = strLen - offs; /* first we need to check if the common prefix matches (and consume input data while we do) */ ipfix = 0; while(offs < strLen && ipfix < tree->lenPrefix) { LN_DBGPRINTF(tree->ctx, "%zu: prefix compare '%c', '%c'", offs, c[offs], cpfix[ipfix]); if(c[offs] != cpfix[ipfix]) { r -= ipfix; goto done; } ++offs, ++ipfix; } if(ipfix != tree->lenPrefix) { /* incomplete prefix match --> to-be-normalized string too short */ r = ipfix - tree->lenPrefix; goto done; } r -= ipfix; LN_DBGPRINTF(tree->ctx, "%zu: prefix compare succeeded, still valid", offs); /* now try the parsers */ while(node != NULL) { if(tree->ctx->debug) { cstr = es_str2cstr(node->name, NULL); LN_DBGPRINTF(tree->ctx, "%zu:trying parser for field '%s': %p", offs, cstr, node->parser); free(cstr); } i = offs; if(node->isIPTables) { localR = ln_iptablesParser(tree, str, strLen, &i, json); LN_DBGPRINTF(tree->ctx, "%zu iptables parser return, i=%zu", offs, i); if(localR == 0) { /* potential hit, need to verify */ LN_DBGPRINTF(tree->ctx, "potential hit, trying subtree"); left = ln_v1_normalizeRec(node->subtree, str, strLen, i, json, endNode); if(left == 0 && (*endNode)->flags.isTerminal) { LN_DBGPRINTF(tree->ctx, "%zu: parser matches at %zu", offs, i); r = 0; goto done; } LN_DBGPRINTF(tree->ctx, "%zu nonmatch, backtracking required, left=%d", offs, left); ++tree->stats.backtracked; if(left < r) r = left; } } else if(node->parser == ln_parseRest) { /* This is a quick and dirty adjustment to handle "rest" more intelligently. * It's just a tactical fix: in the longer term, we'll handle the whole * situation differently. However, it makes sense to fix this now, as this * solves some real-world problems immediately. -- rgerhards, 2015-04-15 */ restMotifNode = node; } else { value = NULL; localR = node->parser(str, strLen, &i, node, &parsed, &value); LN_DBGPRINTF(tree->ctx, "parser returns %d, parsed %zu", localR, parsed); if(localR == 0) { /* potential hit, need to verify */ LN_DBGPRINTF(tree->ctx, "%zu: potential hit, trying subtree %p", offs, node->subtree); left = ln_v1_normalizeRec(node->subtree, str, strLen, i + parsed, json, endNode); LN_DBGPRINTF(tree->ctx, "%zu: subtree returns %d", offs, r); if(left == 0 && (*endNode)->flags.isTerminal) { LN_DBGPRINTF(tree->ctx, "%zu: parser matches at %zu", offs, i); if(es_strbufcmp(node->name, (unsigned char*)"-", 1)) { /* Store the value here; create json if not already created */ if (value == NULL) { CHKN(cstr = strndup(str + i, parsed)); value = json_object_new_string(cstr); free(cstr); } if (value == NULL) { LN_DBGPRINTF(tree->ctx, "unable to create json"); goto done; } namestr = ln_es_str2cstr(&node->name); json_object_object_add(json, namestr, value); } else { if (value != NULL) { /* Free the unneeded value */ json_object_put(value); } } r = 0; goto done; } LN_DBGPRINTF(tree->ctx, "%zu nonmatch, backtracking required, left=%d", offs, left); if (value != NULL) { /* Free the value if it was created */ json_object_put(value); } if(left > 0 && left < r) r = left; LN_DBGPRINTF(tree->ctx, "%zu nonmatch, backtracking required, left=%d, r now %d", offs, left, r); ++tree->stats.backtracked; } } node = node->next; } if(offs == strLen) { *endNode = tree; r = 0; goto done; } if(offs < strLen) { unsigned char cc = str[offs]; LN_DBGPRINTF(tree->ctx, "%zu no field, trying subtree char '%c': %p", offs, cc, tree->subtree[cc]); } else { LN_DBGPRINTF(tree->ctx, "%zu no field, offset already beyond end", offs); } /* now let's see if we have a literal */ if(tree->subtree[(unsigned char)str[offs]] != NULL) { left = ln_v1_normalizeRec(tree->subtree[(unsigned char)str[offs]], str, strLen, offs + 1, json, endNode); LN_DBGPRINTF(tree->ctx, "%zu got left %d, r %d", offs, left, r); if(left < r) r = left; LN_DBGPRINTF(tree->ctx, "%zu got return %d", offs, r); } if(r == 0 && (*endNode)->flags.isTerminal) goto done; /* and finally give "rest" a try if it was present. Note that we MUST do this after * literal evaluation, otherwise "rest" can never be overridden by other rules. */ if(restMotifNode != NULL) { LN_DBGPRINTF(tree->ctx, "rule has rest motif, forcing match via it"); value = NULL; restMotifNode->parser(str, strLen, &i, restMotifNode, &parsed, &value); # ifndef NDEBUG left = /* we only need this for the assert below */ # endif ln_v1_normalizeRec(restMotifNode->subtree, str, strLen, i + parsed, json, endNode); assert(left == 0); /* with rest, we have this invariant */ assert((*endNode)->flags.isTerminal); /* this one also */ LN_DBGPRINTF(tree->ctx, "%zu: parser matches at %zu", offs, i); if(es_strbufcmp(restMotifNode->name, (unsigned char*)"-", 1)) { /* Store the value here; create json if not already created */ if (value == NULL) { CHKN(cstr = strndup(str + i, parsed)); value = json_object_new_string(cstr); free(cstr); } if (value == NULL) { LN_DBGPRINTF(tree->ctx, "unable to create json"); goto done; } namestr = ln_es_str2cstr(&restMotifNode->name); json_object_object_add(json, namestr, value); } else { if (value != NULL) { /* Free the unneeded value */ json_object_put(value); } } r = 0; goto done; } done: LN_DBGPRINTF(tree->ctx, "%zu returns %d", offs, r); if(r == 0 && *endNode == tree) ++tree->stats.terminated; return r; } int ln_v1_normalize(ln_ctx ctx, const char *str, size_t strLen, struct json_object **json_p) { int r; int left; struct ln_ptree *endNode = NULL; if(*json_p == NULL) { CHKN(*json_p = json_object_new_object()); } left = ln_v1_normalizeRec(ctx->ptree, str, strLen, 0, *json_p, &endNode); if(ctx->debug) { if(left == 0) { LN_DBGPRINTF(ctx, "final result for normalizer: left %d, endNode %p, " "isTerminal %d, tagbucket %p", left, endNode, endNode->flags.isTerminal, endNode->tags); } else { LN_DBGPRINTF(ctx, "final result for normalizer: left %d, endNode %p", left, endNode); } } if(left != 0 || !endNode->flags.isTerminal) { /* we could not successfully parse, some unparsed items left */ if(left < 0) { addUnparsedField(str, strLen, strLen, *json_p); } else { addUnparsedField(str, strLen, strLen - left, *json_p); } } else { /* success, finalize event */ if(endNode->tags != NULL) { /* add tags to an event */ json_object_get(endNode->tags); json_object_object_add(*json_p, "event.tags", endNode->tags); CHKR(ln_annotate(ctx, *json_p, endNode->tags)); } } r = 0; done: return r; } /** * Gather and output pdag statistics for the full pdag (ctx) * including all disconnected components (type defs). * * Data is sent to given file ptr. */ void ln_fullPTreeStats(ln_ctx ctx, FILE __attribute__((unused)) *const fp, const int __attribute__((unused)) extendedStats) { ln_displayPTree(ctx->ptree, 0); } liblognorm-2.1.0/src/v1_ptree.h000066400000000000000000000163571520037563000163600ustar00rootroot00000000000000/** * @file ptree.h * @brief The parse tree object. * @class ln_ptree ptree.h *//* * Copyright 2013 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is meant to be included by applications using liblognorm. * For lognorm library files themselves, include "lognorm.h". * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_PTREE_H_INCLUDED #define LIBLOGNORM_PTREE_H_INCLUDED #include #include #define ORIGINAL_MSG_KEY "originalmsg" #define UNPARSED_DATA_KEY "unparsed-data" typedef struct ln_ptree ln_ptree; /**< the parse tree object */ typedef struct ln_fieldList_s ln_fieldList_t; /** * List of supported fields inside parse tree. * This list holds all fields and their description. While normalizing, * fields are tried in the order of this list. So the enqueue order * dictates precedence during parsing. * * value list. This is a single-linked list. In a later stage, we should * optimize it so that frequently used fields are moved "up" towards * the root of the list. In any case, we do NOT expect this list to * be long, as the parser should already have gotten quite specific when * we hit a fieldconst . */ struct ln_fieldList_s { es_str_t *name; /**< field name */ es_str_t *data; /**< extra data to be passed to parser */ es_str_t *raw_data; /**< extra untouched (unescaping is not done) data available to be used by parser */ void *parser_data; /** opaque data that the field-parser understands */ void (*parser_data_destructor)(void **); /** destroy opaque data that field-parser understands */ int (*parser)(const char*, size_t, size_t*, const ln_fieldList_t *, size_t*, struct json_object **); /**< parser to use */ ln_ptree *subtree; /**< subtree to follow if parser succeeded */ ln_fieldList_t *next; /**< list housekeeping, next node (or NULL) */ unsigned char isIPTables; /**< special parser: iptables! */ }; /* parse tree object */ struct ln_ptree { ln_ctx ctx; /**< our context */ ln_ptree **parentptr; /**< pointer to *us* *inside* the parent BUT this is NOT a pointer to the parent! */ ln_fieldList_t *froot; /**< root of field list */ ln_fieldList_t *ftail; /**< tail of field list */ struct { unsigned isTerminal:1; /**< designates this node a terminal sequence? */ } flags; struct json_object *tags; /* tags to assign to events of this type */ /* the representation below requires a lof of memory but is * very fast. As an alternate approach, we can use a hash table * where we ignore control characters. That should work quite well. * But we do not do this in the initial step. */ ln_ptree *subtree[256]; unsigned short lenPrefix; /**< length of common prefix, 0->none */ union { unsigned char *ptr; /**< use if data element is too large */ unsigned char data[16]; /**< fast lookup for small string */ } prefix; /**< a common prefix string for all of this node */ struct { unsigned visited; unsigned backtracked; /**< incremented when backtracking was initiated */ unsigned terminated; } stats; /**< usage statistics */ }; /* Methods */ /** * Allocates and initializes a new parse tree node. * @memberof ln_ptree * * @param[in] ctx current library context. This MUST match the * context of the parent. * @param[in] parent pointer to the new node inside the parent * * @return pointer to new node or NULL on error */ struct ln_ptree* ln_newPTree(ln_ctx ctx, struct ln_ptree** parent); /** * Free a parse tree and destruct all members. * @memberof ln_ptree * * @param[in] tree pointer to ptree to free */ void ln_deletePTree(struct ln_ptree *tree); /** * Free a parse tree node and destruct all members. * @memberof ln_ptree * * @param[in] node pointer to free */ void ln_deletePTreeNode(ln_fieldList_t *node); /** * Add a field description to the a tree. * The field description will be added as last field. Fields are * parsed in the order they have been added, so be sure to care * about the order if that matters. * @memberof ln_ptree * * @param[in] tree pointer to ptree to modify * @param[in] fielddescr a fully populated (and initialized) * field description node * @returns 0 on success, something else otherwise */ int ln_addFDescrToPTree(struct ln_ptree **tree, ln_fieldList_t *node); /** * Add a literal to a ptree. * Creates new tree nodes as necessary. * @memberof ln_ptree * * @param[in] tree root of tree where to add * @param[in] str literal (string) to add * @param[in] offs offset of where in literal adding should start * * @return NULL on error, otherwise pointer to deepest tree added */ struct ln_ptree* ln_addPTree(struct ln_ptree *tree, es_str_t *str, size_t offs); /** * Display the content of a ptree (debug function). * This is a debug aid that spits out a textual representation * of the provided ptree via multiple calls of the debug callback. * * @param tree ptree to display * @param level recursion level, must be set to 0 on initial call */ void ln_displayPTree(struct ln_ptree *tree, int level); /** * Generate a DOT graph. * Well, actually it does not generate the graph itself, but a * control file that is suitable for the GNU DOT tool. Such a file * can be very useful to understand complex sample databases * (not to mention that it is probably fun for those creating * samples). * The dot commands are appended to the provided string. * * @param[in] tree ptree to display * @param[out] str string which receives the DOT commands. */ void ln_genDotPTreeGraph(struct ln_ptree *tree, es_str_t **str); /** * Build a ptree based on the provided string, but only if necessary. * The passed-in tree is searched and traversed for str. If a node exactly * matching str is found, that node is returned. If no exact match is found, * a new node is added. Existing nodes may be split, if a so-far common * prefix needs to be split in order to add the new node. * * @param[in] tree root of the current tree * @param[in] str string to be added * @param[in] offs offset into str where match needs to start * (this is required for recursive calls to handle * common prefixes) * @return NULL on error, otherwise the ptree leaf that * corresponds to the parameters passed. */ struct ln_ptree * ln_buildPTree(struct ln_ptree *tree, es_str_t *str, size_t offs); /* internal helper for displaying stats */ void ln_fullPTreeStats(ln_ctx ctx, FILE *const fp, const int extendedStats); #endif /* #ifndef LOGNORM_PTREE_H_INCLUDED */ liblognorm-2.1.0/src/v1_samp.c000066400000000000000000000563111520037563000161660ustar00rootroot00000000000000/* samp.c -- code for ln_v1_samp objects. * * Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH. * * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013 * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #include "config.h" #include #include #include #include #include #include #define LOGNORM_V1_SUBSYSTEM /* indicate we are old cruft */ #include "v1_liblognorm.h" #include "internal.h" #include "lognorm.h" #include "samp.h" #include "v1_ptree.h" #include "v1_samp.h" #include "v1_parser.h" /** * Construct a sample object. */ struct ln_v1_samp* ln_v1_sampCreate(ln_ctx __attribute__((unused)) ctx) { struct ln_v1_samp* samp; if((samp = calloc(1, sizeof(struct ln_v1_samp))) == NULL) goto done; /* place specific init code here (none at this time) */ done: return samp; } void ln_v1_sampFree(ln_ctx __attribute__((unused)) ctx, struct ln_v1_samp *samp) { free(samp); } /** * Extract a field description from a sample. * The field description is added to the tail of the current * subtree's field list. The parse buffer must be position on the * leading '%' that starts a field definition. It is a program error * if this condition is not met. * * Note that we break up the object model and access ptree members * directly. Let's consider us a friend of ptree. This is necessary * to optimize the structure for a high-speed parsing process. * * @param[in] str a temporary work string. This is passed in to save the * creation overhead * @returns 0 on success, something else otherwise */ static int addFieldDescr(ln_ctx ctx, struct ln_ptree **subtree, es_str_t *rule, es_size_t *bufOffs, es_str_t **str) { int r; ln_fieldList_t *node = ln_v1_parseFieldDescr(ctx, rule, bufOffs, str, &r); assert(subtree != NULL); if (node != NULL) CHKR(ln_addFDescrToPTree(subtree, node)); done: return r; } ln_fieldList_t* ln_v1_parseFieldDescr(ln_ctx ctx, es_str_t *rule, es_size_t *bufOffs, es_str_t **str, int* ret) { int r = 0; ln_fieldList_t *node; es_size_t i = *bufOffs; char *cstr; /* for debug mode strings */ unsigned char *buf; es_size_t lenBuf; void* (*constructor_fn)(ln_fieldList_t *, ln_ctx) = NULL; buf = es_getBufAddr(rule); lenBuf = es_strlen(rule); assert(buf[i] == '%'); ++i; /* "eat" ':' */ CHKN(node = calloc(1, sizeof(ln_fieldList_t))); node->subtree = NULL; node->next = NULL; node->data = NULL; node->raw_data = NULL; node->parser_data = NULL; node->parser_data_destructor = NULL; CHKN(node->name = es_newStr(16)); /* skip leading whitespace in field name */ while(i < lenBuf && isspace(buf[i])) ++i; while(i < lenBuf && buf[i] != ':') { CHKR(es_addChar(&node->name, buf[i++])); } if(es_strlen(node->name) == 0) { FAIL(LN_INVLDFDESCR); } if(ctx->debug) { cstr = es_str2cstr(node->name, NULL); ln_dbgprintf(ctx, "parsed field: '%s'", cstr); free(cstr); } if(buf[i] != ':') { /* may be valid later if we have a loaded CEE dictionary * and the name is present inside it. */ FAIL(LN_INVLDFDESCR); } ++i; /* skip ':' */ /* parse and process type (trailing whitespace must be trimmed) */ es_emptyStr(*str); size_t j = i; /* scan for terminator */ while(j < lenBuf && buf[j] != ':' && buf[j] != '%') ++j; /* now trim trailing space backwards */ size_t next = j; --j; while(j >= i && isspace(buf[j])) --j; /* now copy */ while(i <= j) { CHKR(es_addChar(str, buf[i++])); } /* finally move i to consumed position */ i = next; if(i == lenBuf) { FAIL(LN_INVLDFDESCR); } node->isIPTables = 0; /* first assume no special parser is used */ if(!es_strconstcmp(*str, "date-rfc3164")) { node->parser = ln_parseRFC3164Date; } else if(!es_strconstcmp(*str, "date-rfc5424")) { node->parser = ln_parseRFC5424Date; } else if(!es_strconstcmp(*str, "number")) { node->parser = ln_parseNumber; } else if(!es_strconstcmp(*str, "float")) { node->parser = ln_parseFloat; } else if(!es_strconstcmp(*str, "hexnumber")) { node->parser = ln_parseHexNumber; } else if(!es_strconstcmp(*str, "kernel-timestamp")) { node->parser = ln_parseKernelTimestamp; } else if(!es_strconstcmp(*str, "whitespace")) { node->parser = ln_parseWhitespace; } else if(!es_strconstcmp(*str, "ipv4")) { node->parser = ln_parseIPv4; } else if(!es_strconstcmp(*str, "ipv6")) { node->parser = ln_parseIPv6; } else if(!es_strconstcmp(*str, "word")) { node->parser = ln_parseWord; } else if(!es_strconstcmp(*str, "alpha")) { node->parser = ln_parseAlpha; } else if(!es_strconstcmp(*str, "rest")) { node->parser = ln_parseRest; } else if(!es_strconstcmp(*str, "op-quoted-string")) { node->parser = ln_parseOpQuotedString; } else if(!es_strconstcmp(*str, "quoted-string")) { node->parser = ln_parseQuotedString; } else if(!es_strconstcmp(*str, "date-iso")) { node->parser = ln_parseISODate; } else if(!es_strconstcmp(*str, "time-24hr")) { node->parser = ln_parseTime24hr; } else if(!es_strconstcmp(*str, "time-12hr")) { node->parser = ln_parseTime12hr; } else if(!es_strconstcmp(*str, "duration")) { node->parser = ln_parseDuration; } else if(!es_strconstcmp(*str, "cisco-interface-spec")) { node->parser = ln_parseCiscoInterfaceSpec; } else if(!es_strconstcmp(*str, "json")) { node->parser = ln_parseJSON; } else if(!es_strconstcmp(*str, "cee-syslog")) { node->parser = ln_parseCEESyslog; } else if(!es_strconstcmp(*str, "mac48")) { node->parser = ln_parseMAC48; } else if(!es_strconstcmp(*str, "name-value-list")) { node->parser = ln_parseNameValue; } else if(!es_strconstcmp(*str, "cef")) { node->parser = ln_parseCEF; } else if(!es_strconstcmp(*str, "checkpoint-lea")) { node->parser = ln_parseCheckpointLEA; } else if(!es_strconstcmp(*str, "v2-iptables")) { node->parser = ln_parsev2IPTables; } else if(!es_strconstcmp(*str, "iptables")) { node->parser = NULL; node->isIPTables = 1; } else if(!es_strconstcmp(*str, "string-to")) { /* TODO: check extra data!!!! (very important) */ node->parser = ln_parseStringTo; } else if(!es_strconstcmp(*str, "char-to")) { /* TODO: check extra data!!!! (very important) */ node->parser = ln_parseCharTo; } else if(!es_strconstcmp(*str, "char-sep")) { /* TODO: check extra data!!!! (very important) */ node->parser = ln_parseCharSeparated; } else if(!es_strconstcmp(*str, "tokenized")) { node->parser = ln_parseTokenized; constructor_fn = tokenized_parser_data_constructor; node->parser_data_destructor = tokenized_parser_data_destructor; } #ifdef FEATURE_REGEXP else if(!es_strconstcmp(*str, "regex")) { node->parser = ln_parseRegex; constructor_fn = regex_parser_data_constructor; node->parser_data_destructor = regex_parser_data_destructor; } #endif else if (!es_strconstcmp(*str, "recursive")) { node->parser = ln_parseRecursive; constructor_fn = recursive_parser_data_constructor; node->parser_data_destructor = recursive_parser_data_destructor; } else if (!es_strconstcmp(*str, "descent")) { node->parser = ln_parseRecursive; constructor_fn = descent_parser_data_constructor; node->parser_data_destructor = recursive_parser_data_destructor; } else if (!es_strconstcmp(*str, "interpret")) { node->parser = ln_parseInterpret; constructor_fn = interpret_parser_data_constructor; node->parser_data_destructor = interpret_parser_data_destructor; } else if (!es_strconstcmp(*str, "suffixed")) { node->parser = ln_parseSuffixed; constructor_fn = suffixed_parser_data_constructor; node->parser_data_destructor = suffixed_parser_data_destructor; } else if (!es_strconstcmp(*str, "named_suffixed")) { node->parser = ln_parseSuffixed; constructor_fn = named_suffixed_parser_data_constructor; node->parser_data_destructor = suffixed_parser_data_destructor; } else { cstr = es_str2cstr(*str, NULL); ln_errprintf(ctx, 0, "invalid field type '%s'", cstr); free(cstr); FAIL(LN_INVLDFDESCR); } if(buf[i] == '%') { i++; } else { /* parse extra data */ CHKN(node->data = es_newStr(8)); i++; while(i < lenBuf) { if(buf[i] == '%') { ++i; break; /* end of field */ } CHKR(es_addChar(&node->data, buf[i++])); } node->raw_data = es_strdup(node->data); es_unescapeStr(node->data); if(ctx->debug) { cstr = es_str2cstr(node->data, NULL); ln_dbgprintf(ctx, "parsed extra data: '%s'", cstr); free(cstr); } } if (constructor_fn) node->parser_data = constructor_fn(node, ctx); *bufOffs = i; done: if (r != 0) { if (node->name != NULL) es_deleteStr(node->name); free(node); node = NULL; } *ret = r; return node; } /** * Parse a Literal string out of the template and add it to the tree. * @param[in] ctx the context * @param[in/out] subtree on entry, current subtree, on exist newest * deepest subtree * @param[in] rule string with current rule * @param[in/out] bufOffs parse pointer, up to which offset is parsed * (is updated so that it points to first char after consumed * string on exit). * @param[out] str literal extracted (is empty, when no litral could be found) * @return 0 on success, something else otherwise */ static int parseLiteral(ln_ctx ctx, struct ln_ptree **subtree, es_str_t *rule, es_size_t *bufOffs, es_str_t **str) { int r = 0; es_size_t i = *bufOffs; unsigned char *buf; es_size_t lenBuf; es_emptyStr(*str); buf = es_getBufAddr(rule); lenBuf = es_strlen(rule); /* extract maximum length literal */ while(i < lenBuf) { if(buf[i] == '%') { if(i+1 < lenBuf && buf[i+1] != '%') { break; /* field start is end of literal */ } if (++i == lenBuf) break; } CHKR(es_addChar(str, buf[i])); ++i; } es_unescapeStr(*str); if(ctx->debug) { char *cstr = es_str2cstr(*str, NULL); ln_dbgprintf(ctx, "parsed literal: '%s'", cstr); free(cstr); } *subtree = ln_buildPTree(*subtree, *str, 0); *bufOffs = i; r = 0; done: return r; } /* Implementation note: * We read in the sample, and split it into chunks of literal text and * fields. Each literal text is added as whole to the tree, as is each * field individually. To do so, we keep track of our current subtree * root, which changes whenever a new part of the tree is build. It is * set to the then-lowest part of the tree, where the next step sample * data is to be added. * * This function processes the whole string or returns an error. * * format: literal1%field:type:extra-data%literal2 * * @returns the new subtree root (or NULL in case of error) */ static int addSampToTree(ln_ctx ctx, es_str_t *rule, struct json_object *tagBucket) { int r = -1; struct ln_ptree* subtree; es_str_t *str = NULL; es_size_t i; subtree = ctx->ptree; CHKN(str = es_newStr(256)); i = 0; while(i < es_strlen(rule)) { ln_dbgprintf(ctx, "addSampToTree %d of %d", i, es_strlen(rule)); CHKR(parseLiteral(ctx, &subtree, rule, &i, &str)); /* After the literal there can be field only*/ if (i < es_strlen(rule)) { CHKR(addFieldDescr(ctx, &subtree, rule, &i, &str)); if (i == es_strlen(rule)) { /* finish the tree with empty literal to avoid false merging*/ CHKR(parseLiteral(ctx, &subtree, rule, &i, &str)); } } } ln_dbgprintf(ctx, "end addSampToTree %d of %d", i, es_strlen(rule)); /* we are at the end of rule processing, so this node is a terminal */ subtree->flags.isTerminal = 1; subtree->tags = tagBucket; done: if(str != NULL) es_deleteStr(str); return r; } /** * get the initial word of a rule line that tells us the type of the * line. * @param[in] buf line buffer * @param[in] len length of buffer * @param[out] offs offset after "=" * @param[out] str string with "linetype-word" (newly created) * @returns 0 on success, something else otherwise */ static int getLineType(const char *buf, es_size_t lenBuf, es_size_t *offs, es_str_t **str) { int r = -1; es_size_t i; *str = es_newStr(16); for(i = 0 ; i < lenBuf && buf[i] != '=' ; ++i) { CHKR(es_addChar(str, buf[i])); } if(i < lenBuf) ++i; /* skip over '=' */ *offs = i; done: return r; } /** * Get a new common prefix from the config file. That is actually everything from * the current offset to the end of line. * * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset after "=" * @param[in/out] str string to store common offset. If NULL, it is created, * otherwise it is emptied. * @returns 0 on success, something else otherwise */ static int getPrefix(const char *buf, es_size_t lenBuf, es_size_t offs, es_str_t **str) { int r; if(*str == NULL) { CHKN(*str = es_newStr(lenBuf - offs)); } else { es_emptyStr(*str); } r = es_addBuf(str, (char*)buf + offs, lenBuf - offs); done: return r; } /** * Extend the common prefix. This means that the line is concatenated * to the prefix. This is useful if the same rulebase is to be used with * different prefixes (well, not strictly necessary, but probably useful). * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset to-be-added text starts * @returns 0 on success, something else otherwise */ static int extendPrefix(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { return es_addBuf(&ctx->rulePrefix, (char*)buf+offs, lenBuf - offs); } /** * Add a tag to the tag bucket. Helper to processTags. * @param[in] ctx current context * @param[in] tagname string with tag name * @param[out] tagBucket tagbucket to which new tags shall be added * the tagbucket is created if it is NULL * @returns 0 on success, something else otherwise */ static int addTagStrToBucket(ln_ctx ctx, es_str_t *tagname, struct json_object **tagBucket) { int r = -1; char *cstr; struct json_object *tag; if(*tagBucket == NULL) { CHKN(*tagBucket = json_object_new_array()); } cstr = es_str2cstr(tagname, NULL); ln_dbgprintf(ctx, "tag found: '%s'", cstr); CHKN(tag = json_object_new_string(cstr)); json_object_array_add(*tagBucket, tag); free(cstr); r = 0; done: return r; } /** * Extract the tags and create a tag bucket out of them * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in,out] poffs offset where tags start, on exit and success * offset after tag part (excluding ':') * @param[out] tagBucket tagbucket to which new tags shall be added * the tagbucket is created if it is NULL * @returns 0 on success, something else otherwise */ static int processTags(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t *poffs, struct json_object **tagBucket) { int r = -1; es_str_t *str = NULL; es_size_t i; assert(poffs != NULL); i = *poffs; while(i < lenBuf && buf[i] != ':') { if(buf[i] == ',') { /* end of this tag */ if(str == NULL) goto done; CHKR(addTagStrToBucket(ctx, str, tagBucket)); es_deleteStr(str); str = NULL; } else { if(str == NULL) { CHKN(str = es_newStr(32)); } CHKR(es_addChar(&str, buf[i])); } ++i; } if(i >= lenBuf || buf[i] != ':') goto done; ++i; /* skip ':' */ if(str != NULL) { CHKR(addTagStrToBucket(ctx, str, tagBucket)); es_deleteStr(str); } *poffs = i; r = 0; done: return r; } /** * Process a new rule and add it to tree. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset where rule starts * @returns 0 on success, something else otherwise */ static int processRule(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { int r = -1; es_str_t *str; struct json_object *tagBucket = NULL; ln_dbgprintf(ctx, "sample line to add: '%s'\n", buf+offs); CHKR(processTags(ctx, buf, lenBuf, &offs, &tagBucket)); if(offs == lenBuf) { ln_dbgprintf(ctx, "error, actual message sample part is missing"); // TODO: provide some error indicator to app? We definitely must do (a callback?) goto done; } if(ctx->rulePrefix == NULL) { CHKN(str = es_newStr(lenBuf)); } else { CHKN(str = es_strdup(ctx->rulePrefix)); } CHKR(es_addBuf(&str, (char*)buf + offs, lenBuf - offs)); addSampToTree(ctx, str, tagBucket); es_deleteStr(str); r = 0; done: return r; } /** * Obtain a field name from a rule base line. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset where tag starts, * on exit: updated offset AFTER TAG and (':') * @param [out] strTag obtained tag, if successful * @returns 0 on success, something else otherwise */ static int getFieldName(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs, es_str_t **strTag) { int r = -1; es_size_t i; i = *offs; while(i < lenBuf && (isalnum(buf[i]) || buf[i] == '_' || buf[i] == '.')) { if(*strTag == NULL) { CHKN(*strTag = es_newStr(32)); } CHKR(es_addChar(strTag, buf[i])); ++i; } *offs = i; r = 0; done: return r; } /** * Skip over whitespace. * Skips any whitespace present at the offset. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset first unprocessed position */ static void skipWhitespace(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs) { while(*offs < lenBuf && isspace(buf[*offs])) { (*offs)++; } } /** * Obtain an annotation (field) operation. * This usually is a plus or minus sign followed by a field name * followed (if plus) by an equal sign and the field value. On entry, * offs must be positioned on the first unprocessed field (after ':' for * the initial field!). Extra whitespace is detected and, if present, * skipped. The obtained operation is added to the annotation set provided. * Note that extracted string objects are passed to the annotation; thus it * is vital NOT to free them (most importantly, this is *not* a memory leak). * * @param[in] ctx current context * @param[in] annot active annotation set to which the operation is to be added * @param[in] buf line buffer * @param[in] len length of buffer * @param[in/out] offs on entry: offset where tag starts, * on exit: updated offset AFTER TAG and (':') * @param [out] strTag obtained tag, if successful * @returns 0 on success, something else otherwise */ static int getAnnotationOp(ln_ctx ctx, ln_annot *annot, const char *buf, es_size_t lenBuf, es_size_t *offs) { int r = -1; es_size_t i; es_str_t *fieldName = NULL; es_str_t *fieldVal = NULL; ln_annot_opcode opc; i = *offs; skipWhitespace(ctx, buf, lenBuf, &i); if(i == lenBuf) { r = 0; goto done; /* nothing left to process (no error!) */ } if(buf[i] == '+') { opc = ln_annot_ADD; } else if(buf[i] == '-') { ln_dbgprintf(ctx, "annotate op '-' not yet implemented - failing"); goto fail; } else { ln_dbgprintf(ctx, "invalid annotate opcode '%c' - failing" , buf[i]); goto fail; } i++; if(i == lenBuf) goto fail; /* nothing left to process */ CHKR(getFieldName(ctx, buf, lenBuf, &i, &fieldName)); if(i == lenBuf) goto fail; /* nothing left to process */ if(buf[i] != '=') goto fail; /* format error */ i++; skipWhitespace(ctx, buf, lenBuf, &i); if(buf[i] != '"') goto fail; /* format error */ ++i; while(i < lenBuf && buf[i] != '"') { if(fieldVal == NULL) { CHKN(fieldVal = es_newStr(32)); } CHKR(es_addChar(&fieldVal, buf[i])); ++i; } *offs = (i == lenBuf) ? i : i+1; CHKR(ln_addAnnotOp(annot, opc, fieldName, fieldVal)); r = 0; done: return r; fail: return -1; } /** * Process a new annotation and add it to the annotation set. * * @param[in] ctx current context * @param[in] buf line buffer * @param[in] len length of buffer * @param[in] offs offset where annotation starts * @returns 0 on success, something else otherwise */ static int processAnnotate(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs) { int r; es_str_t *tag = NULL; ln_annot *annot; ln_dbgprintf(ctx, "sample annotation to add: '%s'", buf+offs); CHKR(getFieldName(ctx, buf, lenBuf, &offs, &tag)); skipWhitespace(ctx, buf, lenBuf, &offs); if(buf[offs] != ':' || tag == NULL) { ln_dbgprintf(ctx, "invalid tag field in annotation, line is '%s'", buf); r=-1; goto done; } ++offs; /* we got an annotation! */ CHKN(annot = ln_newAnnot(tag)); while(offs < lenBuf) { CHKR(getAnnotationOp(ctx, annot, buf, lenBuf, &offs)); } r = ln_addAnnotToSet(ctx->pas, annot); done: return r; } struct ln_v1_samp * ln_v1_processSamp(ln_ctx ctx, const char *buf, es_size_t lenBuf) { struct ln_v1_samp *samp = NULL; es_str_t *typeStr = NULL; es_size_t offs; int bSuccess = 0; if(getLineType(buf, lenBuf, &offs, &typeStr) != 0) goto done; if((samp = ln_v1_sampCreate(ctx)) == NULL) goto done; if(!es_strconstcmp(typeStr, "prefix")) { if(getPrefix(buf, lenBuf, offs, &ctx->rulePrefix) != 0) goto done; } else if(!es_strconstcmp(typeStr, "extendprefix")) { if(extendPrefix(ctx, buf, lenBuf, offs) != 0) goto done; } else if(!es_strconstcmp(typeStr, "rule")) { if(processRule(ctx, buf, lenBuf, offs) != 0) goto done; } else if(!es_strconstcmp(typeStr, "annotate")) { if(processAnnotate(ctx, buf, lenBuf, offs) != 0) goto done; } else { /* TODO error reporting */ char *str; str = es_str2cstr(typeStr, NULL); ln_dbgprintf(ctx, "invalid record type detected: '%s'", str); free(str); goto done; } bSuccess = 1; done: if(typeStr != NULL) es_deleteStr(typeStr); if(!bSuccess && samp != NULL) { ln_v1_sampFree(ctx, samp); samp = NULL; } return samp; } struct ln_v1_samp * ln_v1_sampRead(ln_ctx ctx, FILE *const __restrict__ repo, int *const __restrict__ isEof) { struct ln_v1_samp *samp = NULL; char buf[10*1024]; /**< max size of rule - TODO: make configurable */ size_t i = 0; int inParser = 0; int done = 0; while(!done) { int c = fgetc(repo); if(c == EOF) { *isEof = 1; if(i == 0) goto done; else done = 1; /* last line missing LF, still process it! */ } else if(c == '\n') { ++ctx->conf_ln_nbr; if(inParser) { if(ln_sampChkRunawayRule(ctx, repo, NULL)) { /* ignore previous rule */ inParser = 0; i = 0; } } if(!inParser && i != 0) done = 1; } else if(c == '#' && i == 0) { ln_sampSkipCommentLine(ctx, repo, NULL); i = 0; /* back to beginning */ } else { if(c == '%') inParser = (inParser) ? 0 : 1; buf[i++] = c; if(i >= sizeof(buf)) { ln_errprintf(ctx, 0, "line is too long"); goto done; } } } buf[i] = '\0'; ln_dbgprintf(ctx, "read rulebase line[~%d]: '%s'", ctx->conf_ln_nbr, buf); samp = ln_v1_processSamp(ctx, buf, i); ln_dbgprintf(ctx, "---------------------------------------"); ln_displayPTree(ctx->ptree, 0); ln_dbgprintf(ctx, "======================================="); done: return samp; } liblognorm-2.1.0/src/v1_samp.h000066400000000000000000000063731520037563000161760ustar00rootroot00000000000000/** * @file samples.h * @brief Object to process log samples. * @author Rainer Gerhards * * This object handles log samples, and in actual log sample files. * It co-operates with the ptree object to build the actual parser tree. *//* * * liblognorm - a fast samples-based log normalization library * Copyright 2010 by Rainer Gerhards and Adiscon GmbH. * * This file is part of liblognorm. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution. */ #ifndef LIBLOGNORM_V1_SAMPLES_H_INCLUDED #define LIBLOGNORM_V1_SAMPLES_H_INCLUDED #include /* we need es_size_t */ #include /** * A single log sample. */ struct ln_v1_samp { es_str_t *msg; }; /** * Reads a sample stored in buffer buf and creates a new ln_v1_samp object * out of it. * * @note * It is the caller's responsibility to delete the newly * created ln_v1_samp object if it is no longer needed. * * @param[ctx] ctx current library context * @param[buf] cstr buffer containing the string contents of the sample * @param[lenBuf] length of the sample contained within buf * @return Newly create object or NULL if an error occurred. */ struct ln_v1_samp * ln_v1_processSamp(ln_ctx ctx, const char *buf, es_size_t lenBuf); /** * Read a sample from repository (sequentially). * * Reads a sample starting with the current file position and * creates a new ln_v1_samp object out of it. * * @note * It is the caller's responsibility to delete the newly * created ln_v1_samp object if it is no longer needed. * * @param[in] ctx current library context * @param[in] repo repository descriptor * @param[out] isEof must be set to 0 on entry and is switched to 1 if EOF occurred. * @return Newly create object or NULL if an error or EOF occurred. */ struct ln_v1_samp * ln_v1_sampRead(ln_ctx ctx, FILE *repo, int *isEof); /** * Free ln_v1_samp object. */ void ln_v1_sampFree(ln_ctx ctx, struct ln_v1_samp *samp); /** * Parse a given sample * * @param[in] ctx current library context * @param[in] rule string (with prefix and suffix '%' markers) * @param[in] offset in rule-string to start at (it should be pointed to * starting character: '%') * @param[in] temp string buffer(working space), * externalized for efficiency reasons * @param[out] return code (0 means success) * @return newly created node, which can be added to sample tree. */ ln_fieldList_t* ln_v1_parseFieldDescr(ln_ctx ctx, es_str_t *rule, es_size_t *bufOffs, es_str_t **str, int* ret); #endif /* #ifndef LIBLOGNORM_V1_SAMPLES_H_INCLUDED */ liblognorm-2.1.0/tests/000077500000000000000000000000001520037563000150215ustar00rootroot00000000000000liblognorm-2.1.0/tests/.gitignore000066400000000000000000000000731520037563000170110ustar00rootroot00000000000000*.o json_eq .deps core *.rulebase vgcore.* .libs user_test liblognorm-2.1.0/tests/Makefile.am000066400000000000000000000152541520037563000170640ustar00rootroot00000000000000check_PROGRAMS = json_eq err_callback_cookie TESTS_TURBO_SHELLS = \ turbo_smoke.sh # re-enable if we really need the c program check check_PROGRAMS = json_eq user_test json_eq_self_sources = json_eq.c json_eq_SOURCES = $(json_eq_self_sources) json_eq_CPPFLAGS = $(JSON_C_CFLAGS) $(LIBESTR_CFLAGS) $(WARN_CFLAGS) -I$(top_srcdir)/src json_eq_LDADD = $(JSON_C_LIBS) $(LIBESTR_LIBS) -lm json_eq_LDFLAGS = -no-install err_callback_cookie_SOURCES = err_callback_cookie.c err_callback_cookie_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) -I$(top_srcdir)/src err_callback_cookie_LDADD = $(LIBLOGNORM_LIBS) err_callback_cookie_LDFLAGS = -no-install #user_test_SOURCES = user_test.c #user_test_CPPFLAGS = $(LIBLOGNORM_CFLAGS) $(JSON_C_CFLAGS) $(LIBESTR_CFLAGS) #user_test_LDADD = $(JSON_C_LIBS) $(LIBLOGNORM_LIBS) $(LIBESTR_LIBS) ../compat/compat.la #user_test_LDFLAGS = -no-install # The following tests are for the new pdag-based engine (v2+). # # There are some notes due: # # removed field_float_with_invalid_ruledef.sh because test is not valid. # more info: https://github.com/rsyslog/liblognorm/issues/98 # note that probably the other currently disable *_invalid_*.sh # tests are also affected. # # there seems to be a problem with some format in cisco-interface-spec # Probably this was just not seen in v1, because of some impreciseness # in the ptree normalizer. Pushing equivalent v2 test back until v2 # implementation is further developed. TESTS_SHELLSCRIPTS = \ usrdef_simple.sh \ usrdef_two.sh \ usrdef_twotypes.sh \ usrdef_actual1.sh \ usrdef_ipaddr.sh \ usrdef_ipaddr_dotdot.sh \ usrdef_ipaddr_dotdot2.sh \ usrdef_ipaddr_dotdot3.sh \ usrdef_nested_segfault.sh \ rule_empty_tag_segfault.sh \ missing_line_ending.sh \ lognormalizer-invld-call.sh \ string_rb_simple.sh \ string_rb_simple_2_lines.sh \ names.sh \ literal.sh \ include.sh \ include_RULEBASES.sh \ seq_simple.sh \ runaway_rule.sh \ runaway_rule_comment.sh \ annotate.sh \ alternative_simple.sh \ alternative_three.sh \ alternative_nested.sh \ alternative_segfault.sh \ repeat_very_simple.sh \ repeat_simple.sh \ repeat_fail_on_duplicate.sh \ repeat_mismatch_in_while.sh \ repeat_while_alternative.sh \ repeat_alternative_nested.sh \ repeat_named_while_segfault.sh \ repeat_name_dot.sh \ parser_eof_hardening.sh \ parser_prios.sh \ parser_whitespace.sh \ parser_whitespace_jsoncnf.sh \ parser_LF.sh \ parser_LF_jsoncnf.sh \ strict_prefix_actual_sample1.sh \ strict_prefix_matching_1.sh \ strict_prefix_matching_2.sh \ field_string.sh \ field_quoted_string.sh \ field_op_quoted_string_escape.sh \ field_op_quoted_string_escape_invalid.sh \ field_string_perm_chars.sh \ field_string_lazy_matching.sh \ field_string_doc_sample_lazy.sh \ field_string_dashIsEmpty.sh \ field_number.sh \ field_number-fmt_number.sh \ field_number_maxval.sh \ field_hexnumber.sh \ field_hexnumber-fmt_number.sh \ field_hexnumber_jsoncnf.sh \ field_hexnumber_range.sh \ field_hexnumber_range_jsoncnf.sh \ rule_last_str_short.sh \ field_mac48.sh \ field_mac48_jsoncnf.sh \ field_name_value.sh \ field_name_value_jsoncnf.sh \ field_name_value_whitespace.sh \ field_kernel_timestamp.sh \ field_kernel_timestamp_jsoncnf.sh \ field_whitespace.sh \ rule_last_str_long.sh \ field_whitespace_jsoncnf.sh \ field_rest.sh \ field_rest_jsoncnf.sh \ field_json.sh \ field_json_jsoncnf.sh \ field_json_skipempty.sh \ field_cee-syslog.sh \ field_cee-syslog_jsoncnf.sh \ field_ipv6.sh \ field_ipv6_jsoncnf.sh \ field_v2-iptables.sh \ field_v2-iptables_jsoncnf.sh \ field_cef.sh \ field_cef_jsoncnf.sh \ field_checkpoint-lea.sh \ field_checkpoint-lea_jsoncnf.sh \ field_checkpoint-lea-terminator.sh \ field_duration.sh \ field_duration_jsoncnf.sh \ field_float.sh \ field_float-fmt_number.sh \ field_float_jsoncnf.sh \ field_rfc5424timestamp-fmt_timestamp-unix.sh \ field_rfc5424timestamp-fmt_timestamp-unix-ms.sh \ very_long_logline_jsoncnf.sh # now come tests for the legacy (v1) engine TESTS_SHELLSCRIPTS += \ missing_line_ending_v1.sh \ runaway_rule_v1.sh \ runaway_rule_comment_v1.sh \ field_hexnumber_v1.sh \ field_mac48_v1.sh \ field_name_value_v1.sh \ field_kernel_timestamp_v1.sh \ field_whitespace_v1.sh \ field_rest_v1.sh \ field_json_v1.sh \ field_cee-syslog_v1.sh \ field_ipv6_v1.sh \ field_v2-iptables_v1.sh \ field_cef_v1.sh \ field_checkpoint-lea_v1.sh \ field_duration_v1.sh \ field_float_v1.sh \ field_cee-syslog_v1.sh \ field_tokenized.sh \ field_tokenized_with_invalid_ruledef.sh \ field_recursive.sh \ field_tokenized_recursive.sh \ field_interpret.sh \ field_interpret_with_invalid_ruledef.sh \ field_descent.sh \ field_descent_with_invalid_ruledef.sh \ field_suffixed.sh \ field_suffixed_with_invalid_ruledef.sh \ field_cisco-interface-spec.sh \ field_cisco-interface-spec-at-EOL.sh \ field_float_with_invalid_ruledef.sh \ very_long_logline.sh TESTS_SHELLSCRIPTS += \ parser_eof_hardening_v1.sh #re-add to TESTS if needed: user_test TESTS = \ $(TESTS_SHELLSCRIPTS) \ err_callback_cookie.sh REGEXP_TESTS = \ field_regex_default_group_parse_and_return.sh \ field_regex_invalid_args.sh \ field_regex_with_consume_group.sh \ field_regex_with_consume_group_and_return_group.sh \ field_regex_with_negation.sh \ field_tokenized_with_regex.sh \ field_regex_while_regex_support_is_disabled.sh EXTRA_DIST = exec.sh \ $(TESTS_SHELLSCRIPTS) \ $(TESTS_TURBO_SHELLS) \ $(REGEXP_TESTS) \ err_callback_cookie.sh \ $(json_eq_self_sources) \ $(user_test_SOURCES) if ENABLE_REGEXP TESTS += $(REGEXP_TESTS) endif if ENABLE_TURBO TESTS += $(TESTS_TURBO_SHELLS) check_PROGRAMS += turbo_test_arena turbo_test_result turbo_test_simd turbo_test_vm turbo_test_json turbo_test_arena_SOURCES = turbo_test_arena.c turbo_test_arena_CPPFLAGS = -I$(top_srcdir)/src $(WARN_CFLAGS) turbo_test_arena_LDADD = ../src/liblognorm.la turbo_test_arena_LDFLAGS = -no-install turbo_test_result_SOURCES = turbo_test_result.c turbo_test_result_CPPFLAGS = -I$(top_srcdir)/src $(WARN_CFLAGS) turbo_test_result_LDADD = ../src/liblognorm.la turbo_test_result_LDFLAGS = -no-install turbo_test_simd_SOURCES = turbo_test_simd.c turbo_test_simd_CPPFLAGS = -I$(top_srcdir)/src $(WARN_CFLAGS) $(TURBO_CFLAGS) turbo_test_simd_LDADD = ../src/liblognorm.la turbo_test_simd_LDFLAGS = -no-install turbo_test_vm_SOURCES = turbo_test_vm.c turbo_test_vm_CPPFLAGS = -I$(top_srcdir)/src $(WARN_CFLAGS) turbo_test_vm_LDADD = ../src/liblognorm.la turbo_test_vm_LDFLAGS = -no-install turbo_test_json_SOURCES = turbo_test_json.c turbo_test_json_CPPFLAGS = -I$(top_srcdir)/src $(WARN_CFLAGS) turbo_test_json_LDADD = ../src/liblognorm.la -lm turbo_test_json_LDFLAGS = -no-install TESTS += turbo_test_arena turbo_test_result turbo_test_simd turbo_test_vm turbo_test_json endif liblognorm-2.1.0/tests/alternative_nested.sh000077500000000000000000000011661520037563000212440ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple alternative syntax" add_rule 'version=2' add_rule 'rule=:a % {"type":"alternative", "parser": [ [ {"type":"number", "name":"num1"}, {"type":"literal", "text":":"}, {"type":"number", "name":"num"}, ], {"type":"hexnumber", "name":"hex"} ] }% b' execute 'a 47:11 b' assert_output_json_eq '{"num": "11", "num1": "47" }' execute 'a 0x4711 b' assert_output_json_eq '{ "hex": "0x4711" }' cleanup_tmp_files liblognorm-2.1.0/tests/alternative_segfault.sh000077500000000000000000000012771520037563000215770ustar00rootroot00000000000000#!/bin/bash # added 2016-10-17 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "a case that caused a segfault in practice" add_rule 'version=2' add_rule 'rule=:%host:ipv4% %{"type":"alternative","parser":[{"type":"literal","text":"-"},{"type":"number","name":"identd"}]}% %OK:word%' execute '1.2.3.4 - TEST_OK' assert_output_json_eq '{ "OK": "TEST_OK", "host": "1.2.3.4" }' execute '1.2.3.4 100 TEST_OK' assert_output_json_eq '{ "OK": "TEST_OK", "identd": "100", "host": "1.2.3.4" }' execute '1.2.3.4 ERR TEST_OK' assert_output_json_eq '{ "originalmsg": "1.2.3.4 ERR TEST_OK", "unparsed-data": "ERR TEST_OK" }' cleanup_tmp_files liblognorm-2.1.0/tests/alternative_simple.sh000077500000000000000000000007301520037563000212470ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple alternative syntax" add_rule 'version=2' add_rule 'rule=:a %{"type":"alternative", "parser":[{"name":"num", "type":"number"}, {"name":"hex", "type":"hexnumber"}]}% b' execute 'a 4711 b' assert_output_json_eq '{ "num": "4711" }' execute 'a 0x4711 b' assert_output_json_eq '{ "hex": "0x4711" }' cleanup_tmp_files liblognorm-2.1.0/tests/alternative_three.sh000077500000000000000000000010641520037563000210660ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple alternative syntax" add_rule 'version=2' add_rule 'rule=:a %{"type":"alternative", "parser":[{"name":"num", "type":"number"}, {"name":"hex", "type":"hexnumber"}, {"name":"wrd", "type":"word"}]}% b' execute 'a 4711 b' assert_output_json_eq '{ "num": "4711" }' execute 'a 0x4711 b' assert_output_json_eq '{ "hex": "0x4711" }' execute 'a 0xyz b' assert_output_json_eq '{ "wrd": "0xyz" }' cleanup_tmp_files liblognorm-2.1.0/tests/annotate.sh000077500000000000000000000015751520037563000172010ustar00rootroot00000000000000#!/bin/bash # added 2016-11-08 by Rainer Gerhards . $srcdir/exec.sh test_def $0 "annotate functionality" reset_rules add_rule 'version=2' add_rule 'rule=ABC,WIN:<%-:number%>1 %-:date-rfc5424% %-:word% %tag:word% - - -' add_rule 'rule=ABC:<%-:number%>1 %-:date-rfc5424% %-:word% %tag:word% + - -' add_rule 'rule=WIN:<%-:number%>1 %-:date-rfc5424% %-:word% %tag:word% . - -' add_rule 'annotate=WIN:+annot1="WIN" # inline-comment' add_rule 'annotate=ABC:+annot2="ABC"' execute '<37>1 2016-11-03T23:59:59+03:00 server.example.net TAG . - -' assert_output_json_eq '{ "tag": "TAG", "annot1": "WIN" }' execute '<37>1 2016-11-03T23:59:59+03:00 server.example.net TAG + - -' assert_output_json_eq '{ "tag": "TAG", "annot2": "ABC" }' execute '<6>1 2016-09-02T07:41:07+02:00 server.example.net TAG - - -' assert_output_json_eq '{ "tag": "TAG", "annot1": "WIN", "annot2": "ABC" }' cleanup_tmp_files liblognorm-2.1.0/tests/err_callback_cookie.c000066400000000000000000000022721520037563000211250ustar00rootroot00000000000000#include "config.h" #include "liblognorm.h" struct err_cb_state { int called; int cookie_match; int saw_message; }; static void error_callback(void *cookie, const char *msg, size_t len) { struct err_cb_state *state = (struct err_cb_state *) cookie; if(state != NULL) { state->called++; if(cookie == state) { state->cookie_match = 1; } if(msg != NULL && len > 0) { state->saw_message = 1; } } } int main(void) { static const char *const invalid_rulebase = "version=2\n" "rule=:%arr:tokenized:quux:some_non_existent_type%\n"; struct err_cb_state state = {0, 0, 0}; ln_ctx ctx = ln_initCtx(); int ret = 1; if(ctx == NULL) return ret; if(ln_setErrMsgCB(ctx, error_callback, &state) != 0) goto done; ln_loadSamplesFromString(ctx, invalid_rulebase); if(state.called >= 1 && state.cookie_match == 1 && state.saw_message == 1) ret = 0; done: ln_exitCtx(ctx); return ret; } liblognorm-2.1.0/tests/err_callback_cookie.sh000077500000000000000000000011241520037563000213130ustar00rootroot00000000000000#!/bin/bash # Ensure the callback test uses the freshly built liblognorm from the tree. if [ -x "./err_callback_cookie" ] && [ -d "../src/.libs" ]; then test_bin="./err_callback_cookie" build_libdir="../src/.libs" else script_dir="$(CDPATH= cd -- "$(dirname "$0")" && pwd)" top_builddir="${top_builddir:-${script_dir}/..}" test_bin="${top_builddir}/tests/err_callback_cookie" build_libdir="${top_builddir}/src/.libs" fi if [ -n "${LD_LIBRARY_PATH}" ]; then export LD_LIBRARY_PATH="${build_libdir}:${LD_LIBRARY_PATH}" else export LD_LIBRARY_PATH="${build_libdir}" fi exec "${test_bin}" liblognorm-2.1.0/tests/exec.sh000066400000000000000000000057751520037563000163170ustar00rootroot00000000000000# environment variables: # GREP - if set, can be used to use alternative grep version # Most important use case is to use GNU grep (ggrep) # on Solaris. If unset, use "grep". set -e if [ "x$debug" == "xon" ]; then #get core-dump on crash ulimit -c unlimited fi #cmd="../src/ln_test -v" # case to get debug info (add -vvv for more verbosity) cmd="$(cd .. && pwd)/src/ln_test" # regular case json_eq="$(pwd)/json_eq" test_tmpdir="$(mktemp -d "$(pwd)/tmp.test.XXXXXX")" test_out="$test_tmpdir/test.out" trap 'rm -rf -- "$test_tmpdir"' EXIT build_libdir="$(cd .. && pwd)/src/.libs" if [ -n "${LD_LIBRARY_PATH}" ]; then export LD_LIBRARY_PATH="${build_libdir}:${LD_LIBRARY_PATH}" else export LD_LIBRARY_PATH="${build_libdir}" fi . ./options.sh no_solaris10() { if (uname -a | grep -q "SunOS.*5.10"); then printf 'platform: %s\n' "$(uname -a)" printf 'This looks like solaris 10, we disable known-failing tests to\n' printf 'permit OpenCSW to build packages. However, this are real failures\n' printf 'and so a fix should be done as soon as time permits.\n' exit 77 fi } test_def() { test_file=$(basename $1) test_name=$(echo $test_file | sed -e 's/\..*//g') echo =============================================================================== echo "[${test_file}]: test for ${2}" } execute() { if [ "x$debug" == "xon" ]; then echo "======rulebase=======" cat "$(rulebase_file_name)" echo "=====================" set -x fi if [ "$1" == "file" ]; then input_file="$2" case "$input_file" in /*) ;; *) input_file="$(pwd)/$input_file" ;; esac ( cd "$test_tmpdir" $cmd $ln_opts -r "$(rulebase_file_name)" -e json > "$test_out" < "$input_file" ) else ( cd "$test_tmpdir" printf '%s\n' "$1" | $cmd $ln_opts -r "$(rulebase_file_name)" -e json > "$test_out" ) fi echo "Out:" cat "$test_out" if [ "x$debug" == "xon" ]; then set +x fi } execute_with_string() { # $1 must be rulebase string # $2 must be sample string if [ "x$debug" == "xon" ]; then echo "======rulebase=======" cat "$(rulebase_file_name)" echo "=====================" set -x fi ( cd "$test_tmpdir" printf '%s\n' "$2" | $cmd $ln_opts -R "$1" -e json > "$test_out" ) echo "Out:" cat "$test_out" if [ "x$debug" == "xon" ]; then set +x fi } assert_output_contains() { ${GREP:-grep} -F "$1" < "$test_out" } assert_output_json_eq() { "$json_eq" "$1" "$(cat "$test_out")" } rulebase_file_name() { if [ "x$1" == "x" ]; then echo "$test_tmpdir/tmp.rulebase" else echo "$test_tmpdir/$1.rulebase" fi } reset_rules() { rb_file=$(rulebase_file_name $1) rm -f $rb_file } add_rule() { rb_file=$(rulebase_file_name $2) echo $1 >> $rb_file } add_rule_no_LF() { rb_file=$(rulebase_file_name $2) echo -n $1 >> $rb_file } cleanup_tmp_files() { : } reset_rules liblognorm-2.1.0/tests/field_cee-syslog.sh000077500000000000000000000015571520037563000206050ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'version=2' add_rule 'rule=:%field:cee-syslog%' execute '@cee:{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee:{"f1": "1", "f2": 2} ' # note the trailing space assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # # Things that MUST NOT work # execute '@cee: {"f1": "1", "f2": 2} data' assert_output_json_eq '{ "originalmsg": "@cee: {\"f1\": \"1\", \"f2\": 2} data", "unparsed-data": "@cee: {\"f1\": \"1\", \"f2\": 2} data" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cee-syslog_jsoncnf.sh000077500000000000000000000016041520037563000223160ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'version=2' add_rule 'rule=:%{"name":"field", "type":"cee-syslog"}%' execute '@cee:{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee:{"f1": "1", "f2": 2} ' # note the trailing space assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # # Things that MUST NOT work # execute '@cee: {"f1": "1", "f2": 2} data' assert_output_json_eq '{ "originalmsg": "@cee: {\"f1\": \"1\", \"f2\": 2} data", "unparsed-data": "@cee: {\"f1\": \"1\", \"f2\": 2} data" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cee-syslog_v1.sh000077500000000000000000000015321520037563000212040ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'rule=:%field:cee-syslog%' execute '@cee:{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee:{"f1": "1", "f2": 2} ' # note the trailing space assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '@cee: {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # # Things that MUST NOT work # execute '@cee: {"f1": "1", "f2": 2} data' assert_output_json_eq '{ "originalmsg": "@cee: {\"f1\": \"1\", \"f2\": 2} data", "unparsed-data": "@cee: {\"f1\": \"1\", \"f2\": 2} data" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cef.sh000077500000000000000000000231471520037563000172670ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "CEF parser" add_rule 'version=2' add_rule 'rule=:%f:cef%' # fabricated tests to test specific functionality execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product\|1\|\\|Version|Signature ID|some name|Severity| aa=field1 bb=this is a name\=value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product|1|\\", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a name=value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a = value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity|' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=value' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "value" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=val\nue' # embedded LF assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "val\nue" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value' #invalid punctuation in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\alue' #invalid escape in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue" }' execute 'CEF:0|V\endor|Product|Version|Signature ID|some name|Severity| name=value' #invalid escape in header assert_output_json_eq '{ "originalmsg": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value", "unparsed-data": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # single trailing space - valid assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # multiple trailing spaces - invalid assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor' assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor", "unparsed-data": "CEF:0|Vendor" }' execute 'CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "originalmsg": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3", "unparsed-data": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3" }' execute '' assert_output_json_eq '{ "originalmsg": "", "unparsed-data": "" }' # finally, a use case from practice execute 'CEF:0|ArcSight|ArcSight|10.0.0.15.0|rule:101|FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt|High| eventId=24934046519 type=2 mrt=8888882444085 sessionId=0 generatorID=34rSQWFOOOCAVlswcKFkbA\=\= categorySignificance=/Normal categoryBehavior=/Execute/Query categoryDeviceGroup=/Application categoryOutcome=/Success categoryObject=/Host/Application modelConfidence=0 severity=0 relevance=10 assetCriticality=0 priority=2 art=1427882454263 cat=/Detection/FOO/UNIX/Direct Root Connection Attempt deviceSeverity=Warning rt=1427881661000 shost=server.foo.bar src=10.0.0.1 sourceZoneID=MRL4p30sFOOO8panjcQnFbw\=\= sourceZoneURI=/All Zones/FOO Solutions/Server Subnet/UK/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1 sourceGeoCountryCode=GB sourceGeoLocationInfo=London slong=-0.90843 slat=51.9039 dhost=server.foo.bar dst=10.0.0.1 destinationZoneID=McFOOO0sBABCUHR83pKJmQA\=\= destinationZoneURI=/All Zones/FOO Solutions/Prod/AMERICAS/FOO 10.0.0.1-10.0.0.1 duser=johndoe destinationGeoCountryCode=US destinationGeoLocationInfo=Jersey City dlong=-90.0435 dlat=30.732 fname=FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt filePath=/All Rules/Real-time Rules/ACBP-ACCESS CONTROL and AUTHORIZATION/FOO/Unix Server/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt fileType=Rule ruleThreadId=NQVtdFOOABDrKsmLWpyq8g\=\= cs2= flexString2=DC0001-988 locality=1 cs2Label=Configuration Resource ahost=foo.bar agt=10.0.0.1 av=10.0.0.12 atz=Europe/Berlin aid=34rSQWFOOOBCAVlswcKFkbA\=\= at=superagent_ng dvchost=server.foo.bar dvc=10.0.0.1 deviceZoneID=Mbb8pFOOODol1dBKdURJA\=\= deviceZoneURI=/All Zones/FOO Solutions/Prod/GERMANY/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1 dtz=Europe/Berlin deviceFacility=Rules Engine eventAnnotationStageUpdateTime=1427882444192 eventAnnotationModificationTime=1427882444192 eventAnnotationAuditTrail=1,1427453188050,root,Queued,,,,\n eventAnnotationVersion=1 eventAnnotationFlags=0 eventAnnotationEndTime=1427881661000 eventAnnotationManagerReceiptTime=1427882444085 _cefVer=0.1 ad.arcSightEventPath=3VcygrkkBABCAYFOOLlU13A\=\= baseEventIds=24934003731"' assert_output_json_eq '{ "f": { "DeviceVendor": "ArcSight", "DeviceProduct": "ArcSight", "DeviceVersion": "10.0.0.15.0", "SignatureID": "rule:101", "Name": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "Severity": "High", "Extensions": { "eventId": "24934046519", "type": "2", "mrt": "8888882444085", "sessionId": "0", "generatorID": "34rSQWFOOOCAVlswcKFkbA==", "categorySignificance": "\/Normal", "categoryBehavior": "\/Execute\/Query", "categoryDeviceGroup": "\/Application", "categoryOutcome": "\/Success", "categoryObject": "\/Host\/Application", "modelConfidence": "0", "severity": "0", "relevance": "10", "assetCriticality": "0", "priority": "2", "art": "1427882454263", "cat": "\/Detection\/FOO\/UNIX\/Direct Root Connection Attempt", "deviceSeverity": "Warning", "rt": "1427881661000", "shost": "server.foo.bar", "src": "10.0.0.1", "sourceZoneID": "MRL4p30sFOOO8panjcQnFbw==", "sourceZoneURI": "\/All Zones\/FOO Solutions\/Server Subnet\/UK\/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1", "sourceGeoCountryCode": "GB", "sourceGeoLocationInfo": "London", "slong": "-0.90843", "slat": "51.9039", "dhost": "server.foo.bar", "dst": "10.0.0.1", "destinationZoneID": "McFOOO0sBABCUHR83pKJmQA==", "destinationZoneURI": "\/All Zones\/FOO Solutions\/Prod\/AMERICAS\/FOO 10.0.0.1-10.0.0.1", "duser": "johndoe", "destinationGeoCountryCode": "US", "destinationGeoLocationInfo": "Jersey City", "dlong": "-90.0435", "dlat": "30.732", "fname": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "filePath": "\/All Rules\/Real-time Rules\/ACBP-ACCESS CONTROL and AUTHORIZATION\/FOO\/Unix Server\/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "fileType": "Rule", "ruleThreadId": "NQVtdFOOABDrKsmLWpyq8g==", "cs2": "", "flexString2": "DC0001-988", "locality": "1", "cs2Label": "Configuration Resource", "ahost": "foo.bar", "agt": "10.0.0.1", "av": "10.0.0.12", "atz": "Europe\/Berlin", "aid": "34rSQWFOOOBCAVlswcKFkbA==", "at": "superagent_ng", "dvchost": "server.foo.bar", "dvc": "10.0.0.1", "deviceZoneID": "Mbb8pFOOODol1dBKdURJA==", "deviceZoneURI": "\/All Zones\/FOO Solutions\/Prod\/GERMANY\/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1", "dtz": "Europe\/Berlin", "deviceFacility": "Rules Engine", "eventAnnotationStageUpdateTime": "1427882444192", "eventAnnotationModificationTime": "1427882444192", "eventAnnotationAuditTrail": "1,1427453188050,root,Queued,,,,\n", "eventAnnotationVersion": "1", "eventAnnotationFlags": "0", "eventAnnotationEndTime": "1427881661000", "eventAnnotationManagerReceiptTime": "1427882444085", "_cefVer": "0.1", "ad.arcSightEventPath": "3VcygrkkBABCAYFOOLlU13A==", "baseEventIds": "24934003731\"" } } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cef_jsoncnf.sh000077500000000000000000000231741520037563000210070ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "CEF parser" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"cef"}%' # fabricated tests to test specific functionality execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product\|1\|\\|Version|Signature ID|some name|Severity| aa=field1 bb=this is a name\=value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product|1|\\", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a name=value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a = value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity|' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=value' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "value" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=val\nue' # embedded LF assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "val\nue" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value' #invalid punctuation in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\alue' #invalid escape in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue" }' execute 'CEF:0|V\endor|Product|Version|Signature ID|some name|Severity| name=value' #invalid escape in header assert_output_json_eq '{ "originalmsg": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value", "unparsed-data": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # single trailing space - valid assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # multiple trailing spaces - invalid assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor' assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor", "unparsed-data": "CEF:0|Vendor" }' execute 'CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "originalmsg": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3", "unparsed-data": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3" }' execute '' assert_output_json_eq '{ "originalmsg": "", "unparsed-data": "" }' # finally, a use case from practice execute 'CEF:0|ArcSight|ArcSight|10.0.0.15.0|rule:101|FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt|High| eventId=24934046519 type=2 mrt=8888882444085 sessionId=0 generatorID=34rSQWFOOOCAVlswcKFkbA\=\= categorySignificance=/Normal categoryBehavior=/Execute/Query categoryDeviceGroup=/Application categoryOutcome=/Success categoryObject=/Host/Application modelConfidence=0 severity=0 relevance=10 assetCriticality=0 priority=2 art=1427882454263 cat=/Detection/FOO/UNIX/Direct Root Connection Attempt deviceSeverity=Warning rt=1427881661000 shost=server.foo.bar src=10.0.0.1 sourceZoneID=MRL4p30sFOOO8panjcQnFbw\=\= sourceZoneURI=/All Zones/FOO Solutions/Server Subnet/UK/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1 sourceGeoCountryCode=GB sourceGeoLocationInfo=London slong=-0.90843 slat=51.9039 dhost=server.foo.bar dst=10.0.0.1 destinationZoneID=McFOOO0sBABCUHR83pKJmQA\=\= destinationZoneURI=/All Zones/FOO Solutions/Prod/AMERICAS/FOO 10.0.0.1-10.0.0.1 duser=johndoe destinationGeoCountryCode=US destinationGeoLocationInfo=Jersey City dlong=-90.0435 dlat=30.732 fname=FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt filePath=/All Rules/Real-time Rules/ACBP-ACCESS CONTROL and AUTHORIZATION/FOO/Unix Server/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt fileType=Rule ruleThreadId=NQVtdFOOABDrKsmLWpyq8g\=\= cs2= flexString2=DC0001-988 locality=1 cs2Label=Configuration Resource ahost=foo.bar agt=10.0.0.1 av=10.0.0.12 atz=Europe/Berlin aid=34rSQWFOOOBCAVlswcKFkbA\=\= at=superagent_ng dvchost=server.foo.bar dvc=10.0.0.1 deviceZoneID=Mbb8pFOOODol1dBKdURJA\=\= deviceZoneURI=/All Zones/FOO Solutions/Prod/GERMANY/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1 dtz=Europe/Berlin deviceFacility=Rules Engine eventAnnotationStageUpdateTime=1427882444192 eventAnnotationModificationTime=1427882444192 eventAnnotationAuditTrail=1,1427453188050,root,Queued,,,,\n eventAnnotationVersion=1 eventAnnotationFlags=0 eventAnnotationEndTime=1427881661000 eventAnnotationManagerReceiptTime=1427882444085 _cefVer=0.1 ad.arcSightEventPath=3VcygrkkBABCAYFOOLlU13A\=\= baseEventIds=24934003731"' assert_output_json_eq '{ "f": { "DeviceVendor": "ArcSight", "DeviceProduct": "ArcSight", "DeviceVersion": "10.0.0.15.0", "SignatureID": "rule:101", "Name": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "Severity": "High", "Extensions": { "eventId": "24934046519", "type": "2", "mrt": "8888882444085", "sessionId": "0", "generatorID": "34rSQWFOOOCAVlswcKFkbA==", "categorySignificance": "\/Normal", "categoryBehavior": "\/Execute\/Query", "categoryDeviceGroup": "\/Application", "categoryOutcome": "\/Success", "categoryObject": "\/Host\/Application", "modelConfidence": "0", "severity": "0", "relevance": "10", "assetCriticality": "0", "priority": "2", "art": "1427882454263", "cat": "\/Detection\/FOO\/UNIX\/Direct Root Connection Attempt", "deviceSeverity": "Warning", "rt": "1427881661000", "shost": "server.foo.bar", "src": "10.0.0.1", "sourceZoneID": "MRL4p30sFOOO8panjcQnFbw==", "sourceZoneURI": "\/All Zones\/FOO Solutions\/Server Subnet\/UK\/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1", "sourceGeoCountryCode": "GB", "sourceGeoLocationInfo": "London", "slong": "-0.90843", "slat": "51.9039", "dhost": "server.foo.bar", "dst": "10.0.0.1", "destinationZoneID": "McFOOO0sBABCUHR83pKJmQA==", "destinationZoneURI": "\/All Zones\/FOO Solutions\/Prod\/AMERICAS\/FOO 10.0.0.1-10.0.0.1", "duser": "johndoe", "destinationGeoCountryCode": "US", "destinationGeoLocationInfo": "Jersey City", "dlong": "-90.0435", "dlat": "30.732", "fname": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "filePath": "\/All Rules\/Real-time Rules\/ACBP-ACCESS CONTROL and AUTHORIZATION\/FOO\/Unix Server\/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "fileType": "Rule", "ruleThreadId": "NQVtdFOOABDrKsmLWpyq8g==", "cs2": "", "flexString2": "DC0001-988", "locality": "1", "cs2Label": "Configuration Resource", "ahost": "foo.bar", "agt": "10.0.0.1", "av": "10.0.0.12", "atz": "Europe\/Berlin", "aid": "34rSQWFOOOBCAVlswcKFkbA==", "at": "superagent_ng", "dvchost": "server.foo.bar", "dvc": "10.0.0.1", "deviceZoneID": "Mbb8pFOOODol1dBKdURJA==", "deviceZoneURI": "\/All Zones\/FOO Solutions\/Prod\/GERMANY\/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1", "dtz": "Europe\/Berlin", "deviceFacility": "Rules Engine", "eventAnnotationStageUpdateTime": "1427882444192", "eventAnnotationModificationTime": "1427882444192", "eventAnnotationAuditTrail": "1,1427453188050,root,Queued,,,,\n", "eventAnnotationVersion": "1", "eventAnnotationFlags": "0", "eventAnnotationEndTime": "1427881661000", "eventAnnotationManagerReceiptTime": "1427882444085", "_cefVer": "0.1", "ad.arcSightEventPath": "3VcygrkkBABCAYFOOLlU13A==", "baseEventIds": "24934003731\"" } } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cef_v1.sh000077500000000000000000000231001520037563000176620ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "CEF parser" add_rule 'rule=:%f:cef%' # fabricated tests to test specific functionality execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product\|1\|\\|Version|Signature ID|some name|Severity| aa=field1 bb=this is a name\=value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product|1|\\", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a name=value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "aa": "field1", "bb": "this is a = value", "cc": "field 3" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity|' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=value' assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "value" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=val\nue' # embedded LF assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { "name": "val\nue" } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value' #invalid punctuation in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| n,me=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\alue' #invalid escape in extension assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| name=v\\alue" }' execute 'CEF:0|V\endor|Product|Version|Signature ID|some name|Severity| name=value' #invalid escape in header assert_output_json_eq '{ "originalmsg": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value", "unparsed-data": "CEF:0|V\\endor|Product|Version|Signature ID|some name|Severity| name=value" }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # single trailing space - valid assert_output_json_eq '{ "f": { "DeviceVendor": "Vendor", "DeviceProduct": "Product", "DeviceVersion": "Version", "SignatureID": "Signature ID", "Name": "some name", "Severity": "Severity", "Extensions": { } } }' execute 'CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ' # multiple trailing spaces - invalid assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| ", "unparsed-data": "CEF:0|Vendor|Product|Version|Signature ID|some name|Severity| " }' execute 'CEF:0|Vendor' assert_output_json_eq '{ "originalmsg": "CEF:0|Vendor", "unparsed-data": "CEF:0|Vendor" }' execute 'CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \= value cc=field 3' assert_output_json_eq '{ "originalmsg": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3", "unparsed-data": "CEF:1|Vendor|Product|Version|Signature ID|some name|Severity| aa=field1 bb=this is a \\= value cc=field 3" }' execute '' assert_output_json_eq '{ "originalmsg": "", "unparsed-data": "" }' # finally, a use case from practice execute 'CEF:0|ArcSight|ArcSight|10.0.0.15.0|rule:101|FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt|High| eventId=24934046519 type=2 mrt=8888882444085 sessionId=0 generatorID=34rSQWFOOOCAVlswcKFkbA\=\= categorySignificance=/Normal categoryBehavior=/Execute/Query categoryDeviceGroup=/Application categoryOutcome=/Success categoryObject=/Host/Application modelConfidence=0 severity=0 relevance=10 assetCriticality=0 priority=2 art=1427882454263 cat=/Detection/FOO/UNIX/Direct Root Connection Attempt deviceSeverity=Warning rt=1427881661000 shost=server.foo.bar src=10.0.0.1 sourceZoneID=MRL4p30sFOOO8panjcQnFbw\=\= sourceZoneURI=/All Zones/FOO Solutions/Server Subnet/UK/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1 sourceGeoCountryCode=GB sourceGeoLocationInfo=London slong=-0.90843 slat=51.9039 dhost=server.foo.bar dst=10.0.0.1 destinationZoneID=McFOOO0sBABCUHR83pKJmQA\=\= destinationZoneURI=/All Zones/FOO Solutions/Prod/AMERICAS/FOO 10.0.0.1-10.0.0.1 duser=johndoe destinationGeoCountryCode=US destinationGeoLocationInfo=Jersey City dlong=-90.0435 dlat=30.732 fname=FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt filePath=/All Rules/Real-time Rules/ACBP-ACCESS CONTROL and AUTHORIZATION/FOO/Unix Server/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt fileType=Rule ruleThreadId=NQVtdFOOABDrKsmLWpyq8g\=\= cs2= flexString2=DC0001-988 locality=1 cs2Label=Configuration Resource ahost=foo.bar agt=10.0.0.1 av=10.0.0.12 atz=Europe/Berlin aid=34rSQWFOOOBCAVlswcKFkbA\=\= at=superagent_ng dvchost=server.foo.bar dvc=10.0.0.1 deviceZoneID=Mbb8pFOOODol1dBKdURJA\=\= deviceZoneURI=/All Zones/FOO Solutions/Prod/GERMANY/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1 dtz=Europe/Berlin deviceFacility=Rules Engine eventAnnotationStageUpdateTime=1427882444192 eventAnnotationModificationTime=1427882444192 eventAnnotationAuditTrail=1,1427453188050,root,Queued,,,,\n eventAnnotationVersion=1 eventAnnotationFlags=0 eventAnnotationEndTime=1427881661000 eventAnnotationManagerReceiptTime=1427882444085 _cefVer=0.1 ad.arcSightEventPath=3VcygrkkBABCAYFOOLlU13A\=\= baseEventIds=24934003731"' assert_output_json_eq '{ "f": { "DeviceVendor": "ArcSight", "DeviceProduct": "ArcSight", "DeviceVersion": "10.0.0.15.0", "SignatureID": "rule:101", "Name": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "Severity": "High", "Extensions": { "eventId": "24934046519", "type": "2", "mrt": "8888882444085", "sessionId": "0", "generatorID": "34rSQWFOOOCAVlswcKFkbA==", "categorySignificance": "\/Normal", "categoryBehavior": "\/Execute\/Query", "categoryDeviceGroup": "\/Application", "categoryOutcome": "\/Success", "categoryObject": "\/Host\/Application", "modelConfidence": "0", "severity": "0", "relevance": "10", "assetCriticality": "0", "priority": "2", "art": "1427882454263", "cat": "\/Detection\/FOO\/UNIX\/Direct Root Connection Attempt", "deviceSeverity": "Warning", "rt": "1427881661000", "shost": "server.foo.bar", "src": "10.0.0.1", "sourceZoneID": "MRL4p30sFOOO8panjcQnFbw==", "sourceZoneURI": "\/All Zones\/FOO Solutions\/Server Subnet\/UK\/PAR-WDC-12-CELL5-PROD S2U 1 10.0.0.1-10.0.0.1", "sourceGeoCountryCode": "GB", "sourceGeoLocationInfo": "London", "slong": "-0.90843", "slat": "51.9039", "dhost": "server.foo.bar", "dst": "10.0.0.1", "destinationZoneID": "McFOOO0sBABCUHR83pKJmQA==", "destinationZoneURI": "\/All Zones\/FOO Solutions\/Prod\/AMERICAS\/FOO 10.0.0.1-10.0.0.1", "duser": "johndoe", "destinationGeoCountryCode": "US", "destinationGeoLocationInfo": "Jersey City", "dlong": "-90.0435", "dlat": "30.732", "fname": "FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "filePath": "\/All Rules\/Real-time Rules\/ACBP-ACCESS CONTROL and AUTHORIZATION\/FOO\/Unix Server\/FOO-UNIX-Bypassing Golden Host-Direct Root Connection Attempt", "fileType": "Rule", "ruleThreadId": "NQVtdFOOABDrKsmLWpyq8g==", "cs2": "", "flexString2": "DC0001-988", "locality": "1", "cs2Label": "Configuration Resource", "ahost": "foo.bar", "agt": "10.0.0.1", "av": "10.0.0.12", "atz": "Europe\/Berlin", "aid": "34rSQWFOOOBCAVlswcKFkbA==", "at": "superagent_ng", "dvchost": "server.foo.bar", "dvc": "10.0.0.1", "deviceZoneID": "Mbb8pFOOODol1dBKdURJA==", "deviceZoneURI": "\/All Zones\/FOO Solutions\/Prod\/GERMANY\/FOO US2 Class6 A 508 10.0.0.1-10.0.0.1", "dtz": "Europe\/Berlin", "deviceFacility": "Rules Engine", "eventAnnotationStageUpdateTime": "1427882444192", "eventAnnotationModificationTime": "1427882444192", "eventAnnotationAuditTrail": "1,1427453188050,root,Queued,,,,\n", "eventAnnotationVersion": "1", "eventAnnotationFlags": "0", "eventAnnotationEndTime": "1427881661000", "eventAnnotationManagerReceiptTime": "1427882444085", "_cefVer": "0.1", "ad.arcSightEventPath": "3VcygrkkBABCAYFOOLlU13A==", "baseEventIds": "24934003731\"" } } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_checkpoint-lea-terminator.sh000077500000000000000000000020431520037563000235720ustar00rootroot00000000000000#!/bin/bash # added 2018-10-31 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "Checkpoint LEA parser" add_rule 'version=2' add_rule 'rule=:[ %{"name":"f", "type":"checkpoint-lea", "terminator": "]"}%]' execute '[ tcp_flags: RST-ACK; src: 192.168.0.1; ]' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' execute '[ tcp_flags: RST-ACK; src: 192.168.0.1 ]' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' # Newest Checkpoint format execute '[ tcp_flags:"RST-ACK"; src:"192.168.0.1"; ]' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' execute '[ tcp_flags:"RST-ACK"; src:"192.168.0.1" ]' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' execute '[ key:"value with \"escaped quote\""; path:"C:\\Windows\\System32" ]' assert_output_json_eq '{ "f": { "key": "value with \\\"escaped quote\\\"", "path": "C:\\\\Windows\\\\System32" } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_checkpoint-lea.sh000077500000000000000000000006001520037563000214050ustar00rootroot00000000000000#!/bin/bash # added 2015-06-18 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "Checkpoint LEA parser" add_rule 'version=2' add_rule 'rule=:%f:checkpoint-lea%' execute 'tcp_flags: RST-ACK; src: 192.168.0.1;' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_checkpoint-lea_jsoncnf.sh000077500000000000000000000010641520037563000231320ustar00rootroot00000000000000#!/bin/bash # added 2015-06-18 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "Checkpoint LEA parser" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"checkpoint-lea"}%' execute 'tcp_flags: RST-ACK; src: 192.168.0.1;' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' # Newest Checkpoint format execute 'tcp_flags:"RST-ACK"; src:"192.168.0.1";' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_checkpoint-lea_v1.sh000077500000000000000000000013121520037563000220140ustar00rootroot00000000000000#!/bin/bash # added 2015-06-18 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "Checkpoint LEA parser" add_rule 'rule=:%f:checkpoint-lea%' execute 'tcp_flags: RST-ACK; src: 192.168.0.1;' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' # Newest Checkpoint format execute 'tcp_flags:"RST-ACK"; src:"192.168.0.1";' assert_output_json_eq '{ "f": { "tcp_flags": "RST-ACK", "src": "192.168.0.1" } }' execute 'key:"value with \"escaped quote\""; path:"C:\\Windows\\System32";' assert_output_json_eq '{ "f": { "key": "value with \\\"escaped quote\\\"", "path": "C:\\\\Windows\\\\System32" } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cisco-interface-spec-at-EOL.sh000077500000000000000000000011241520037563000235260ustar00rootroot00000000000000#!/bin/bash # added 2015-04-13 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "cisco-interface-spec-at-EOL syntax" add_rule 'version=2' add_rule 'rule=:begin %field:cisco-interface-spec%%r:rest%' execute 'begin outside:192.0.2.1/50349 end' assert_output_json_eq '{ "r": " end", "field": { "interface": "outside", "ip": "192.0.2.1", "port": "50349" } }' execute 'begin outside:192.0.2.1/50349' assert_output_json_eq '{ "r": "", "field": { "interface": "outside", "ip": "192.0.2.1", "port": "50349" } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_cisco-interface-spec.sh000077500000000000000000000045541520037563000225210ustar00rootroot00000000000000#!/bin/bash # added 2015-04-13 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "cisco-interface-spec syntax" add_rule 'rule=:begin %field:cisco-interface-spec% end' execute 'begin outside:176.97.252.102/50349 end' assert_output_json_eq '{"field": { "interface": "outside", "ip": "176.97.252.102", "port": "50349" } }' execute 'begin outside:176.97.252.102/50349(DOMAIN\rainer) end' # we need to add the backslash escape for the testbench plumbing assert_output_json_eq '{"field": { "interface": "outside", "ip": "176.97.252.102", "port": "50349", "user": "DOMAIN\\rainer" } }' execute 'begin outside:176.97.252.102/50349(test/rainer) end' # we need to add the backslash escape for the testbench plumbing assert_output_json_eq '{"field": { "interface": "outside", "ip": "176.97.252.102", "port": "50349", "user": "test/rainer" } }' execute 'begin outside:176.97.252.102/50349(rainer) end' # we need to add the backslash escape for the testbench plumbing assert_output_json_eq '{"field": { "interface": "outside", "ip": "176.97.252.102", "port": "50349", "user": "rainer" } }' execute 'begin outside:192.168.1.13/50179 (192.168.1.13/50179)(LOCAL\some.user) end' assert_output_json_eq ' { "field": { "interface": "outside", "ip": "192.168.1.13", "port": "50179", "ip2": "192.168.1.13", "port2": "50179", "user": "LOCAL\\some.user" } }' execute 'begin outside:192.168.1.13/50179 (192.168.1.13/50179) (LOCAL\some.user) end' assert_output_json_eq ' { "field": { "interface": "outside", "ip": "192.168.1.13", "port": "50179", "ip2": "192.168.1.13", "port2": "50179", "user": "LOCAL\\some.user" } }' execute 'begin 192.168.1.13/50179 (192.168.1.13/50179) (LOCAL\without.if) end' assert_output_json_eq ' { "field": { "ip": "192.168.1.13", "port": "50179", "ip2": "192.168.1.13", "port2": "50179", "user": "LOCAL\\without.if" } }' # # Test for things that MUST NOT match! # # the SP before the second IP is missing: execute 'begin outside:192.168.1.13/50179(192.168.1.13/50179)(LOCAL\some.user) end' # note: the expected result looks a bit strange. This is the case because we # cannot (yet?) detect that "(192.168.1.13/50179)" is not a valid user name. assert_output_json_eq '{ "originalmsg": "begin outside:192.168.1.13\/50179(192.168.1.13\/50179)(LOCAL\\some.user) end", "unparsed-data": "(LOCAL\\some.user) end" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_descent.sh000077500000000000000000000102741520037563000201540ustar00rootroot00000000000000#!/bin/bash # added 2014-12-11 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "descent based parsing field" #descent with default tail field add_rule 'rule=:blocked on %device:word% %net:descent:./child.rulebase%at %tm:date-rfc5424%' reset_rules 'child' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' 'child' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %tail:rest%' 'child' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' #descent with tail field being explicitly named 'tail' reset_rules add_rule 'rule=:blocked on %device:word% %net:descent:./field.rulebase:tail%at %tm:date-rfc5424%' reset_rules 'field' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' 'field' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %tail:rest%' 'field' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' #descent with tail field having arbitrary name reset_rules add_rule 'rule=:blocked on %device:word% %net:descent:./subset.rulebase:remaining%at %tm:date-rfc5424%' reset_rules 'subset' add_rule 'rule=:%ip_addr:ipv4% %remaining:rest%' 'subset' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %remaining:rest%' 'subset' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' #head call handling with with separate rulebase and tail field with with arbitrary name (this is what recursive field can't do) reset_rules add_rule 'rule=:%net:descent:./alt.rulebase:remains%blocked on %device:word%' reset_rules 'alt' add_rule 'rule=:%ip_addr:ipv4% %remains:rest%' 'alt' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %remains:rest%' 'alt' execute '10.20.30.40 blocked on gw-1' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}}' execute '10.20.30.40/16 blocked on gw-1' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}}' #descent-field which calls another descent-field reset_rules add_rule 'rule=:%op:descent:./op.rulebase:rest% on %device:word%' reset_rules 'op' add_rule 'rule=:%net:descent:./alt.rulebase:remains%%action:word%%rest:rest%' 'op' reset_rules 'alt' add_rule 'rule=:%ip_addr:ipv4% %remains:rest%' 'alt' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %remains:rest%' 'alt' execute '10.20.30.40 blocked on gw-1' assert_output_json_eq '{"op": {"action": "blocked", "net": {"ip_addr": "10.20.30.40"}}, "device": "gw-1"}' execute '10.20.30.40/16 unblocked on gw-2' assert_output_json_eq '{"op": {"action": "unblocked", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}}, "device": "gw-2"}' #descent with file name having lognorm special char add_rule 'rule=:blocked on %device:word% %net:descent:./part\x3anet.rulebase%at %tm:date-rfc5424%' reset_rules 'part:net' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' 'part:net' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %tail:rest%' 'part:net' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_descent_with_invalid_ruledef.sh000077500000000000000000000050661520037563000244260ustar00rootroot00000000000000#!/bin/bash # added 2014-12-15 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "descent based parsing field, with invalid ruledef" #invalid parent field name add_rule 'rule=:%net:desce%' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #no args add_rule 'rule=:%net:descent%' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #incorrect rulebase file path rm -f $srcdir/quux.rulebase add_rule 'rule=:%net:descent:./quux.rulebase%' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #invalid content in rulebase file reset_rules add_rule 'rule=:%net:descent:./child.rulebase%' reset_rules 'child' add_rule 'rule=:%ip_addr:ipv4 %tail:rest%' 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #empty child rulebase file reset_rules add_rule 'rule=:%net:descent:./child.rulebase%' reset_rules 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #no rulebase given reset_rules add_rule 'rule=:%net:descent:' reset_rules 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #no rulebase and no tail-field given reset_rules add_rule 'rule=:%net:descent::' reset_rules 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #no rulebase given, but has valid tail-field reset_rules add_rule 'rule=:%net:descent::foo' reset_rules 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' #empty tail-field given echo empty tail-field given reset_rules add_rule 'rule=:A%net:descent:./child.rulebase:%' reset_rules 'child' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' 'child' execute 'A10.20.30.40 foo' assert_output_json_eq '{ "net": { "tail": "foo", "ip_addr": "10.20.30.40" } }' #named tail-field not populated echo tail-field not populated reset_rules add_rule 'rule=:%net:descent:./child.rulebase:foo% foo' reset_rules 'child' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' 'child' execute '10.20.30.40 foo' assert_output_json_eq '{ "originalmsg": "10.20.30.40 foo", "unparsed-data": "10.20.30.40 foo" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_duration.sh000077500000000000000000000014261520037563000203530ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "duration syntax" add_rule 'version=2' add_rule 'rule=:duration %field:duration% bytes' add_rule 'rule=:duration %field:duration%' execute 'duration 0:00:42 bytes' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 0:00:42' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 9:00:42 bytes' assert_output_json_eq '{"field": "9:00:42"}' execute 'duration 00:00:42 bytes' assert_output_json_eq '{"field": "00:00:42"}' execute 'duration 37:59:42 bytes' assert_output_json_eq '{"field": "37:59:42"}' execute 'duration 37:60:42 bytes' assert_output_contains '"unparsed-data": "37:60:42 bytes"' cleanup_tmp_files liblognorm-2.1.0/tests/field_duration_jsoncnf.sh000077500000000000000000000015001520037563000220640ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "duration syntax" add_rule 'version=2' add_rule 'rule=:duration %{"name":"field", "type":"duration"}% bytes' add_rule 'rule=:duration %{"name":"field", "type":"duration"}%' execute 'duration 0:00:42 bytes' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 0:00:42' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 9:00:42 bytes' assert_output_json_eq '{"field": "9:00:42"}' execute 'duration 00:00:42 bytes' assert_output_json_eq '{"field": "00:00:42"}' execute 'duration 37:59:42 bytes' assert_output_json_eq '{"field": "37:59:42"}' execute 'duration 37:60:42 bytes' assert_output_contains '"unparsed-data": "37:60:42 bytes"' cleanup_tmp_files liblognorm-2.1.0/tests/field_duration_v1.sh000077500000000000000000000014161520037563000207600ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "duration syntax" add_rule 'rule=:duration %field:duration% bytes' add_rule 'rule=:duration %field:duration%' execute 'duration 0:00:42 bytes' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 0:00:42' assert_output_json_eq '{"field": "0:00:42"}' execute 'duration 9:00:42 bytes' assert_output_json_eq '{"field": "9:00:42"}' execute 'duration 00:00:42 bytes' assert_output_json_eq '{"field": "00:00:42"}' execute 'duration 37:59:42 bytes' assert_output_json_eq '{"field": "37:59:42"}' execute 'duration 37:60:42 bytes' assert_output_contains '"unparsed-data": "37:60:42 bytes"' cleanup_tmp_files liblognorm-2.1.0/tests/field_float-fmt_number.sh000077500000000000000000000016531520037563000217710ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "float field" add_rule 'version=2' add_rule 'rule=:here is a number %{ "type":"float", "name":"num", "format":"number"}% in floating pt form' execute 'here is a number 15.9 in floating pt form' assert_output_json_eq '{"num": 15.9}' reset_rules # note: floating point numbers are tricky to get right, even more so if negative. add_rule 'version=2' add_rule 'rule=:here is a negative number %{ "type":"float", "name":"num", "format":"number"}% for you' execute 'here is a negative number -4.2 for you' assert_output_json_eq '{"num": -4.2}' reset_rules add_rule 'version=2' add_rule 'rule=:here is another real number %{ "type":"float", "name":"num", "format":"number"}%.' execute 'here is another real number 2.71.' assert_output_json_eq '{"num": 2.71}' cleanup_tmp_files liblognorm-2.1.0/tests/field_float.sh000077500000000000000000000013401520037563000176260ustar00rootroot00000000000000#!/bin/bash # added 2015-02-25 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "float field" add_rule 'version=2' add_rule 'rule=:here is a number %num:float% in floating pt form' execute 'here is a number 15.9 in floating pt form' assert_output_json_eq '{"num": "15.9"}' reset_rules add_rule 'version=2' add_rule 'rule=:here is a negative number %num:float% for you' execute 'here is a negative number -4.2 for you' assert_output_json_eq '{"num": "-4.2"}' reset_rules add_rule 'version=2' add_rule 'rule=:here is another real number %real_no:float%.' execute 'here is another real number 2.71.' assert_output_json_eq '{"real_no": "2.71"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_float_jsoncnf.sh000077500000000000000000000014371520037563000213550ustar00rootroot00000000000000#!/bin/bash # added 2015-02-25 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "float field" add_rule 'version=2' add_rule 'rule=:here is a number %{"name":"num", "type":"float"}% in floating pt form' execute 'here is a number 15.9 in floating pt form' assert_output_json_eq '{"num": "15.9"}' reset_rules add_rule 'version=2' add_rule 'rule=:here is a negative number %{"name":"num", "type":"float"}% for you' execute 'here is a negative number -4.2 for you' assert_output_json_eq '{"num": "-4.2"}' reset_rules add_rule 'version=2' add_rule 'rule=:here is another real number %{"name":"real_no", "type":"float"}%.' execute 'here is another real number 2.71.' assert_output_json_eq '{"real_no": "2.71"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_float_v1.sh000077500000000000000000000012561520037563000202420ustar00rootroot00000000000000#!/bin/bash # added 2015-02-25 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "float field" add_rule 'rule=:here is a number %num:float% in floating pt form' execute 'here is a number 15.9 in floating pt form' assert_output_json_eq '{"num": "15.9"}' reset_rules add_rule 'rule=:here is a negative number %num:float% for you' execute 'here is a negative number -4.2 for you' assert_output_json_eq '{"num": "-4.2"}' reset_rules add_rule 'rule=:here is another real number %real_no:float%.' execute 'here is another real number 2.71.' assert_output_json_eq '{"real_no": "2.71"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_float_with_invalid_ruledef.sh000077500000000000000000000005271520037563000241030ustar00rootroot00000000000000#!/bin/bash # added 2015-02-26 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "float with invalid field-declaration" add_rule 'rule=:%no:flo% foo' execute '10.0 foo' assert_output_json_eq '{ "originalmsg": "10.0 foo", "unparsed-data": "10.0 foo" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber-fmt_number.sh000077500000000000000000000011421520037563000226520ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "hexnumber field" add_rule 'version=2' add_rule 'rule=:here is a number %{ "type":"hexnumber", "name":"num", "format":"number"} % in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": 4660}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber.sh000077500000000000000000000010741520037563000205220ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "hexnumber field" add_rule 'version=2' add_rule 'rule=:here is a number %num:hexnumber% in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber_jsoncnf.sh000077500000000000000000000011161520037563000222370ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "hexnumber field" add_rule 'version=2' add_rule 'rule=:here is a number %{"name":"num", "type":"hexnumber"}% in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber_range.sh000077500000000000000000000013731520037563000217000ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "hexnumber field with range checks" add_rule 'version=2' add_rule 'rule=:here is a number %num:hexnumber{"maxval":191}% in hex form' execute 'here is a number 0x12 in hex form' assert_output_json_eq '{"num": "0x12"}' execute 'here is a number 0x0 in hex form' assert_output_json_eq '{"num": "0x0"}' execute 'here is a number 0xBf in hex form' assert_output_json_eq '{"num": "0xBf"}' #check cases where parsing failure must occur execute 'here is a number 0xc0 in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0xc0 in hex form", "unparsed-data": "0xc0 in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber_range_jsoncnf.sh000077500000000000000000000014201520037563000234110ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "hexnumber field with range checks" add_rule 'version=2' add_rule 'rule=:here is a number %{"name":"num", "type":"hexnumber", "maxval":191}% in hex form' execute 'here is a number 0x12 in hex form' assert_output_json_eq '{"num": "0x12"}' execute 'here is a number 0x0 in hex form' assert_output_json_eq '{"num": "0x0"}' execute 'here is a number 0xBf in hex form' assert_output_json_eq '{"num": "0xBf"}' #check cases where parsing failure must occur execute 'here is a number 0xc0 in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0xc0 in hex form", "unparsed-data": "0xc0 in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_hexnumber_v1.sh000077500000000000000000000010631520037563000211260ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "hexnumber field" add_rule 'rule=:here is a number %num:hexnumber% in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_interpret.sh000077500000000000000000000045711520037563000205460ustar00rootroot00000000000000#!/bin/bash # added 2014-12-11 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "value interpreting field" add_rule 'rule=:%session_count:interpret:int:word% sessions established' execute '64 sessions established' assert_output_json_eq '{"session_count": 64}' reset_rules add_rule 'rule=:max sessions limit reached: %at_limit:interpret:bool:word%' execute 'max sessions limit reached: true' assert_output_json_eq '{"at_limit": true}' execute 'max sessions limit reached: false' assert_output_json_eq '{"at_limit": false}' execute 'max sessions limit reached: TRUE' assert_output_json_eq '{"at_limit": true}' execute 'max sessions limit reached: FALSE' assert_output_json_eq '{"at_limit": false}' execute 'max sessions limit reached: yes' assert_output_json_eq '{"at_limit": true}' execute 'max sessions limit reached: no' assert_output_json_eq '{"at_limit": false}' execute 'max sessions limit reached: YES' assert_output_json_eq '{"at_limit": true}' execute 'max sessions limit reached: NO' assert_output_json_eq '{"at_limit": false}' reset_rules add_rule 'rule=:record count for shard [%shard:interpret:base16int:char-to:]%] is %record_count:interpret:base10int:number% and %latency_percentile:interpret:float:char-to:\x25%\x25ile latency is %latency:interpret:float:word% %latency_unit:word%' execute 'record count for shard [3F] is 50000 and 99.99%ile latency is 2.1 seconds' assert_output_json_eq '{"shard": 63, "record_count": 50000, "latency_percentile": 99.99, "latency": 2.1, "latency_unit" : "seconds"}' reset_rules add_rule 'rule=:%latency_percentile:interpret:float:char-to:\x25%\x25ile latency is %latency:interpret:float:word%' execute '98.1%ile latency is 1.999123' assert_output_json_eq '{"latency_percentile": 98.1, "latency": 1.999123}' reset_rules add_rule 'rule=:%latency_percentile:interpret:float:number%' add_rule 'rule=:%latency_percentile:interpret:int:number%' add_rule 'rule=:%latency_percentile:interpret:base16int:number%' add_rule 'rule=:%latency_percentile:interpret:base10int:number%' add_rule 'rule=:%latency_percentile:interpret:boolean:number%' execute 'foo' assert_output_json_eq '{ "originalmsg": "foo", "unparsed-data": "foo" }' reset_rules add_rule 'rule=:gc pause: %pause_time:interpret:float:float%ms' execute 'gc pause: 12.3ms' assert_output_json_eq '{"pause_time": 12.3}' cleanup_tmp_files liblognorm-2.1.0/tests/field_interpret_with_invalid_ruledef.sh000077500000000000000000000046751520037563000250220ustar00rootroot00000000000000#!/bin/bash # added 2014-12-11 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "value interpreting field, with invalid ruledef" add_rule 'rule=:%session_count:interpret:int:wd% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret:int:% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret:int% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret:in% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret:in:word% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret:in:wd% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret::word% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:interpret::% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:inter::% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' reset_rules add_rule 'rule=:%session_count:inter:int:word% sessions established' execute '64 sessions established' assert_output_json_eq '{ "originalmsg": "64 sessions established", "unparsed-data": "64 sessions established" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_ipv6.sh000077500000000000000000000043071520037563000174130ustar00rootroot00000000000000#!/bin/bash # added 2015-06-23 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh add_rule 'version=2' test_def $0 "IPv6 parser" add_rule 'rule=:%f:ipv6%' # examples from RFC4291, sect. 2.2 execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:6789' assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:ABCD:EF01:2345:6789" }' execute 'ABCD:EF01:2345:6789:abcd:EF01:2345:6789' # mixed hex case assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:abcd:EF01:2345:6789" }' execute '2001:DB8:0:0:8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8:0:0:8:800:200C:417A" }' execute '0:0:0:0:0:0:0:1' assert_output_json_eq '{ "f": "0:0:0:0:0:0:0:1" }' execute '2001:DB8::8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8::8:800:200C:417A" }' execute 'FF01::101' assert_output_json_eq '{ "f": "FF01::101" }' execute '::1' assert_output_json_eq '{ "f": "::1" }' execute '::' assert_output_json_eq '{ "f": "::" }' execute '0:0:0:0:0:0:13.1.68.3' assert_output_json_eq '{ "f": "0:0:0:0:0:0:13.1.68.3" }' execute '::13.1.68.3' assert_output_json_eq '{ "f": "::13.1.68.3" }' execute '::FFFF:129.144.52.38' assert_output_json_eq '{ "f": "::FFFF:129.144.52.38" }' # invalid samples execute '2001:DB8::8::800:200C:417A' # two :: sequences assert_output_json_eq '{ "originalmsg": "2001:DB8::8::800:200C:417A", "unparsed-data": "2001:DB8::8::800:200C:417A" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345::6789' # :: with too many blocks assert_output_json_eq '{ "originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789", "unparsed-data": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798' # too many blocks (9) assert_output_json_eq '{"originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798", "unparsed-data": ":6798" }' execute ':0:0:0:0:0:0:1' # missing first digit assert_output_json_eq '{ "originalmsg": ":0:0:0:0:0:0:1", "unparsed-data": ":0:0:0:0:0:0:1" }' execute '0:0:0:0:0:0:0:' # missing last digit assert_output_json_eq '{ "originalmsg": "0:0:0:0:0:0:0:", "unparsed-data": "0:0:0:0:0:0:0:" }' execute '13.1.68.3' # pure IPv4 address assert_output_json_eq '{ "originalmsg": "13.1.68.3", "unparsed-data": "13.1.68.3" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_ipv6_jsoncnf.sh000077500000000000000000000043341520037563000211330ustar00rootroot00000000000000#!/bin/bash # added 2015-06-23 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "IPv6 parser" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"ipv6"}%' # examples from RFC4291, sect. 2.2 execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:6789' assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:ABCD:EF01:2345:6789" }' execute 'ABCD:EF01:2345:6789:abcd:EF01:2345:6789' # mixed hex case assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:abcd:EF01:2345:6789" }' execute '2001:DB8:0:0:8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8:0:0:8:800:200C:417A" }' execute '0:0:0:0:0:0:0:1' assert_output_json_eq '{ "f": "0:0:0:0:0:0:0:1" }' execute '2001:DB8::8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8::8:800:200C:417A" }' execute 'FF01::101' assert_output_json_eq '{ "f": "FF01::101" }' execute '::1' assert_output_json_eq '{ "f": "::1" }' execute '::' assert_output_json_eq '{ "f": "::" }' execute '0:0:0:0:0:0:13.1.68.3' assert_output_json_eq '{ "f": "0:0:0:0:0:0:13.1.68.3" }' execute '::13.1.68.3' assert_output_json_eq '{ "f": "::13.1.68.3" }' execute '::FFFF:129.144.52.38' assert_output_json_eq '{ "f": "::FFFF:129.144.52.38" }' # invalid samples execute '2001:DB8::8::800:200C:417A' # two :: sequences assert_output_json_eq '{ "originalmsg": "2001:DB8::8::800:200C:417A", "unparsed-data": "2001:DB8::8::800:200C:417A" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345::6789' # :: with too many blocks assert_output_json_eq '{ "originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789", "unparsed-data": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798' # too many blocks (9) assert_output_json_eq '{"originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798", "unparsed-data": ":6798" }' execute ':0:0:0:0:0:0:1' # missing first digit assert_output_json_eq '{ "originalmsg": ":0:0:0:0:0:0:1", "unparsed-data": ":0:0:0:0:0:0:1" }' execute '0:0:0:0:0:0:0:' # missing last digit assert_output_json_eq '{ "originalmsg": "0:0:0:0:0:0:0:", "unparsed-data": "0:0:0:0:0:0:0:" }' execute '13.1.68.3' # pure IPv4 address assert_output_json_eq '{ "originalmsg": "13.1.68.3", "unparsed-data": "13.1.68.3" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_ipv6_v1.sh000077500000000000000000000043261520037563000200220ustar00rootroot00000000000000#!/bin/bash # added 2015-06-23 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "IPv6 parser" add_rule 'rule=:%f:ipv6%' # examples from RFC4291, sect. 2.2 execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:6789' assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:ABCD:EF01:2345:6789" }' execute 'ABCD:EF01:2345:6789:abcd:EF01:2345:6789' # mixed hex case assert_output_json_eq '{ "f": "ABCD:EF01:2345:6789:abcd:EF01:2345:6789" }' execute '2001:DB8:0:0:8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8:0:0:8:800:200C:417A" }' execute '0:0:0:0:0:0:0:1' assert_output_json_eq '{ "f": "0:0:0:0:0:0:0:1" }' execute '2001:DB8::8:800:200C:417A' assert_output_json_eq '{ "f": "2001:DB8::8:800:200C:417A" }' execute 'FF01::101' assert_output_json_eq '{ "f": "FF01::101" }' execute '::1' assert_output_json_eq '{ "f": "::1" }' execute '::' assert_output_json_eq '{ "f": "::" }' execute '0:0:0:0:0:0:13.1.68.3' assert_output_json_eq '{ "f": "0:0:0:0:0:0:13.1.68.3" }' execute '::13.1.68.3' assert_output_json_eq '{ "f": "::13.1.68.3" }' execute '::FFFF:129.144.52.38' assert_output_json_eq '{ "f": "::FFFF:129.144.52.38" }' # invalid samples execute '2001:DB8::8::800:200C:417A' # two :: sequences assert_output_json_eq '{ "originalmsg": "2001:DB8::8::800:200C:417A", "unparsed-data": "2001:DB8::8::800:200C:417A" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345::6789' # :: with too many blocks assert_output_json_eq '{ "originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789", "unparsed-data": "ABCD:EF01:2345:6789:ABCD:EF01:2345::6789" }' execute 'ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798' # too many blocks (9) assert_output_json_eq '{"originalmsg": "ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798", "unparsed-data": "ABCD:EF01:2345:6789:ABCD:EF01:2345:1:6798" }' execute ':0:0:0:0:0:0:1' # missing first digit assert_output_json_eq '{ "originalmsg": ":0:0:0:0:0:0:1", "unparsed-data": ":0:0:0:0:0:0:1" }' execute '0:0:0:0:0:0:0:' # missing last digit assert_output_json_eq '{ "originalmsg": "0:0:0:0:0:0:0:", "unparsed-data": "0:0:0:0:0:0:0:" }' execute '13.1.68.3' # pure IPv4 address assert_output_json_eq '{ "originalmsg": "13.1.68.3", "unparsed-data": "13.1.68.3" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_json.sh000077500000000000000000000040571520037563000175020ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'version=2' add_rule 'rule=:%field:json%' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # let's see if something more complicated still works, so ADD some # more rules add_rule 'rule=:begin %field:json%' add_rule 'rule=:begin %field:json%end' add_rule 'rule=:%field:json%end' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check if trailing whitespace is ignored execute '{"f1": "1", "f2": 2} ' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # note: the parser takes all whitespace after the JSON # to be part of it! execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '{"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check cases where parsing failure must occur execute '{"f1": "1", f2: 2}' assert_output_json_eq '{ "originalmsg": "{\"f1\": \"1\", f2: 2}", "unparsed-data": "{\"f1\": \"1\", f2: 2}" }' #some more complex cases add_rule 'rule=:%field1:json%-%field2:json%' execute '{"f1": "1"}-{"f2": 2}' assert_output_json_eq '{ "field2": { "f2": 2 }, "field1": { "f1": "1" } }' # re-check previous def still works execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # now check some strange cases reset_rules add_rule 'version=2' add_rule 'rule=:%field:json%' # this check is because of bug in json-c: # https://github.com/json-c/json-c/issues/181 execute '15:00' assert_output_json_eq '{ "originalmsg": "15:00", "unparsed-data": "15:00" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_json_jsoncnf.sh000077500000000000000000000041041520037563000212130ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'version=2' add_rule 'rule=:%{"name":"field", "type":"json"}%' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # let's see if something more complicated still works, so ADD some # more rules add_rule 'rule=:begin %field:json%' add_rule 'rule=:begin %field:json%end' add_rule 'rule=:%field:json%end' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check if trailing whitespace is ignored execute '{"f1": "1", "f2": 2} ' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # note: the parser takes all whitespace after the JSON # to be part of it! execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '{"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check cases where parsing failure must occur execute '{"f1": "1", f2: 2}' assert_output_json_eq '{ "originalmsg": "{\"f1\": \"1\", f2: 2}", "unparsed-data": "{\"f1\": \"1\", f2: 2}" }' #some more complex cases add_rule 'rule=:%field1:json%-%field2:json%' execute '{"f1": "1"}-{"f2": 2}' assert_output_json_eq '{ "field2": { "f2": 2 }, "field1": { "f1": "1" } }' # re-check previous def still works execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # now check some strange cases reset_rules add_rule 'version=2' add_rule 'rule=:%field:json%' # this check is because of bug in json-c: # https://github.com/json-c/json-c/issues/181 execute '15:00' assert_output_json_eq '{ "originalmsg": "15:00", "unparsed-data": "15:00" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_json_skipempty.sh000077500000000000000000000021051520037563000215770ustar00rootroot00000000000000# added 2018-08-27 by Noriko Hosoi # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'version=2' add_rule 'rule=:%field:json%' # default behaviour execute '{"f1": "1", "f2": 2, "f3": "", "f4": {}, "f5": []}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 , "f3": "", "f4": {}, "f5": []} }' # skip empty json values reset_rules add_rule 'version=2' add_rule 'rule=:%field:json:skipempty%' execute '{"f1": "1", "f2": 2, "f3": "", "f4": {}, "f5": []}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # invalid parameter must be rejected at rule-load time reset_rules add_rule 'version=2' add_rule 'rule=:%field:json:bogus%' err_file="$test_tmpdir/test.err" set +e printf '%s\n' '{"f1": "1", "f2": 2, "f3": "", "f4": {}, "f5": []}' \ | $cmd $ln_opts -r "$(rulebase_file_name)" -e json >"$test_out" 2>"$err_file" rc=$? set -e if [ "$rc" -eq 0 ]; then echo "FAIL: invalid json skipempty config was accepted" exit 1 fi grep -F "invalid flag for JSON parser: bogus" "$err_file" cleanup_tmp_files liblognorm-2.1.0/tests/field_json_v1.sh000077500000000000000000000040051520037563000201010ustar00rootroot00000000000000#!/bin/bash # added 2015-03-01 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "JSON field" add_rule 'rule=:%field:json%' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # let's see if something more complicated still works, so ADD some # more rules add_rule 'rule=:begin %field:json%' add_rule 'rule=:begin %field:json%end' add_rule 'rule=:%field:json%end' execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check if trailing whitespace is ignored execute '{"f1": "1", "f2": 2} ' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # note: the parser takes all whitespace after the JSON # to be part of it! execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute 'begin {"f1": "1", "f2": 2} end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' execute '{"f1": "1", "f2": 2}end' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' #check cases where parsing failure must occur execute '{"f1": "1", f2: 2}' assert_output_json_eq '{ "originalmsg": "{\"f1\": \"1\", f2: 2}", "unparsed-data": "{\"f1\": \"1\", f2: 2}" }' #some more complex cases add_rule 'rule=:%field1:json%-%field2:json%' execute '{"f1": "1"}-{"f2": 2}' assert_output_json_eq '{ "field2": { "f2": 2 }, "field1": { "f1": "1" } }' # re-check previous def still works execute '{"f1": "1", "f2": 2}' assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }' # now check some strange cases reset_rules add_rule 'rule=:%field:json%' # this check is because of bug in json-c: # https://github.com/json-c/json-c/issues/181 execute '15:00' assert_output_json_eq '{ "originalmsg": "15:00", "unparsed-data": "15:00" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_kernel_timestamp.sh000077500000000000000000000033371520037563000220740ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "kernel timestamp parser" add_rule 'version=2' add_rule 'rule=:begin %timestamp:kernel-timestamp% end' execute 'begin [12345.123456] end' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'version=2' add_rule 'rule=:begin %timestamp:kernel-timestamp%' execute 'begin [12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'version=2' add_rule 'rule=:%timestamp:kernel-timestamp%' execute '[12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' execute '[154469.133028]' assert_output_json_eq '{ "timestamp": "[154469.133028]"}' execute '[123456789012.123456]' assert_output_json_eq '{ "timestamp": "[123456789012.123456]"}' #check cases where parsing failure must occur execute '[1234.123456]' assert_output_json_eq '{"originalmsg": "[1234.123456]", "unparsed-data": "[1234.123456]" }' execute '[1234567890123.123456]' assert_output_json_eq '{"originalmsg": "[1234567890123.123456]", "unparsed-data": "[1234567890123.123456]" }' execute '[123456789012.12345]' assert_output_json_eq '{ "originalmsg": "[123456789012.12345]", "unparsed-data": "[123456789012.12345]" }' execute '[123456789012.1234567]' assert_output_json_eq '{ "originalmsg": "[123456789012.1234567]", "unparsed-data": "[123456789012.1234567]" }' execute '(123456789012.123456]' assert_output_json_eq '{ "originalmsg": "(123456789012.123456]", "unparsed-data": "(123456789012.123456]" }' execute '[123456789012.123456' assert_output_json_eq '{ "originalmsg": "[123456789012.123456", "unparsed-data": "[123456789012.123456" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_kernel_timestamp_jsoncnf.sh000077500000000000000000000034361520037563000236140ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "kernel timestamp parser" add_rule 'version=2' add_rule 'rule=:begin %{"name":"timestamp", "type":"kernel-timestamp"}% end' execute 'begin [12345.123456] end' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'version=2' add_rule 'rule=:begin %{"name":"timestamp", "type":"kernel-timestamp"}%' execute 'begin [12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'version=2' add_rule 'rule=:%{"name":"timestamp", "type":"kernel-timestamp"}%' execute '[12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' execute '[154469.133028]' assert_output_json_eq '{ "timestamp": "[154469.133028]"}' execute '[123456789012.123456]' assert_output_json_eq '{ "timestamp": "[123456789012.123456]"}' #check cases where parsing failure must occur execute '[1234.123456]' assert_output_json_eq '{"originalmsg": "[1234.123456]", "unparsed-data": "[1234.123456]" }' execute '[1234567890123.123456]' assert_output_json_eq '{"originalmsg": "[1234567890123.123456]", "unparsed-data": "[1234567890123.123456]" }' execute '[123456789012.12345]' assert_output_json_eq '{ "originalmsg": "[123456789012.12345]", "unparsed-data": "[123456789012.12345]" }' execute '[123456789012.1234567]' assert_output_json_eq '{ "originalmsg": "[123456789012.1234567]", "unparsed-data": "[123456789012.1234567]" }' execute '(123456789012.123456]' assert_output_json_eq '{ "originalmsg": "(123456789012.123456]", "unparsed-data": "(123456789012.123456]" }' execute '[123456789012.123456' assert_output_json_eq '{ "originalmsg": "[123456789012.123456", "unparsed-data": "[123456789012.123456" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_kernel_timestamp_v1.sh000077500000000000000000000032551520037563000225010ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "kernel timestamp parser" add_rule 'rule=:begin %timestamp:kernel-timestamp% end' execute 'begin [12345.123456] end' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'rule=:begin %timestamp:kernel-timestamp%' execute 'begin [12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' reset_rules add_rule 'rule=:%timestamp:kernel-timestamp%' execute '[12345.123456]' assert_output_json_eq '{ "timestamp": "[12345.123456]"}' execute '[154469.133028]' assert_output_json_eq '{ "timestamp": "[154469.133028]"}' execute '[123456789012.123456]' assert_output_json_eq '{ "timestamp": "[123456789012.123456]"}' #check cases where parsing failure must occur execute '[1234.123456]' assert_output_json_eq '{"originalmsg": "[1234.123456]", "unparsed-data": "[1234.123456]" }' execute '[1234567890123.123456]' assert_output_json_eq '{"originalmsg": "[1234567890123.123456]", "unparsed-data": "[1234567890123.123456]" }' execute '[123456789012.12345]' assert_output_json_eq '{ "originalmsg": "[123456789012.12345]", "unparsed-data": "[123456789012.12345]" }' execute '[123456789012.1234567]' assert_output_json_eq '{ "originalmsg": "[123456789012.1234567]", "unparsed-data": "[123456789012.1234567]" }' execute '(123456789012.123456]' assert_output_json_eq '{ "originalmsg": "(123456789012.123456]", "unparsed-data": "(123456789012.123456]" }' execute '[123456789012.123456' assert_output_json_eq '{ "originalmsg": "[123456789012.123456", "unparsed-data": "[123456789012.123456" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_mac48.sh000077500000000000000000000013111520037563000174330ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "dmac48 syntax" reset_rules add_rule 'version=2' add_rule 'rule=:%field:mac48%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to NOT match execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "originalmsg": "f0-f6:1c:5f:cc-a2", "unparsed-data": "f0-f6:1c:5f:cc-a2" }' execute 'f0:f6:1c:xf:cc:a2' assert_output_json_eq '{ "originalmsg": "f0:f6:1c:xf:cc:a2", "unparsed-data": "f0:f6:1c:xf:cc:a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_mac48_jsoncnf.sh000077500000000000000000000013221520037563000211550ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "dmac48 syntax" add_rule 'version=2' add_rule 'rule=:%{"name":"field", "type":"mac48"}%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to NOT match execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "originalmsg": "f0-f6:1c:5f:cc-a2", "unparsed-data": "f0-f6:1c:5f:cc-a2" }' execute 'f0:f6:1c:xf:cc:a2' assert_output_json_eq '{ "originalmsg": "f0:f6:1c:xf:cc:a2", "unparsed-data": "f0:f6:1c:xf:cc:a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_mac48_v1.sh000077500000000000000000000012501520037563000200430ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "dmac48 syntax" add_rule 'rule=:%field:mac48%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to NOT match execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "originalmsg": "f0-f6:1c:5f:cc-a2", "unparsed-data": "f0-f6:1c:5f:cc-a2" }' execute 'f0:f6:1c:xf:cc:a2' assert_output_json_eq '{ "originalmsg": "f0:f6:1c:xf:cc:a2", "unparsed-data": "f0:f6:1c:xf:cc:a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_name_value.sh000077500000000000000000000024251520037563000206420ustar00rootroot00000000000000#!/bin/bash # added 2015-04-25 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "name/value parser" add_rule 'version=2' add_rule 'rule=:%f:name-value-list%' execute 'name=value' assert_output_json_eq '{ "f": { "name": "value" } }' execute 'name1=value1 name2=value2 name3=value3' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1=value1 name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1= name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "", "name2": "value2", "name3": "value3" } }' execute 'origin=core.action processed=67 failed=0 suspended=0 suspended.duration=0 resumed=0 ' assert_output_json_eq '{ "f": { "origin": "core.action", "processed": "67", "failed": "0", "suspended": "0", "suspended.duration": "0", "resumed": "0" } }' # check for required non-matches execute 'name' assert_output_json_eq ' {"originalmsg": "name", "unparsed-data": "name" }' execute 'noname1 name2=value2 name3=value3 ' assert_output_json_eq '{ "originalmsg": "noname1 name2=value2 name3=value3 ", "unparsed-data": "noname1 name2=value2 name3=value3 " }' cleanup_tmp_files liblognorm-2.1.0/tests/field_name_value_jsoncnf.sh000077500000000000000000000024521520037563000223620ustar00rootroot00000000000000#!/bin/bash # added 2015-04-25 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "name/value parser" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"name-value-list"}%' execute 'name=value' assert_output_json_eq '{ "f": { "name": "value" } }' execute 'name1=value1 name2=value2 name3=value3' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1=value1 name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1= name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "", "name2": "value2", "name3": "value3" } }' execute 'origin=core.action processed=67 failed=0 suspended=0 suspended.duration=0 resumed=0 ' assert_output_json_eq '{ "f": { "origin": "core.action", "processed": "67", "failed": "0", "suspended": "0", "suspended.duration": "0", "resumed": "0" } }' # check for required non-matches execute 'name' assert_output_json_eq ' {"originalmsg": "name", "unparsed-data": "name" }' execute 'noname1 name2=value2 name3=value3 ' assert_output_json_eq '{ "originalmsg": "noname1 name2=value2 name3=value3 ", "unparsed-data": "noname1 name2=value2 name3=value3 " }' cleanup_tmp_files liblognorm-2.1.0/tests/field_name_value_quoted.sh000077500000000000000000000034301520037563000222200ustar00rootroot00000000000000#!/bin/bash # added 2021-11-08 by @KGuillemot # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "name/value parser" add_rule 'version=2' add_rule 'rule=:%f:name-value-list%' execute 'name="value"' assert_output_json_eq '{ "f": { "name": "value" } }' execute 'name1="value1" name2="value2" name3="value3"' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1="value1 name2=value2" name3=value3 ' assert_output_json_eq '{ "f": { "name1": "value1 name2=value2", "name3": "value3" } }' execute 'name1="" name2="value2" name3="value3" ' assert_output_json_eq '{ "f": { "name1": "", "name2": "value2", "name3": "value3" } }' execute 'origin="core.action" processed=67 failed=0 suspended=0 suspended.duration=0 resumed=0 ' assert_output_json_eq '{ "f": { "origin": "core.action", "processed": "67", "failed": "0", "suspended": "0", "suspended.duration": "0", "resumed": "0" } }' # check escaped caracters execute 'name1="a\"b" name2="c\\\"d" name3="e\\\\\"f" ' assert_output_json_eq '{ "f": { "name1": "a\"b", "name2": "c\\\"d", "name3": "e\\\\\"f" } }' execute 'name1="a\"b\\" name2="c\\\"d\\\\" name3="e\\\\\"f\\\\\\" ' assert_output_json_eq '{ "f": { "name1": "a\"b\\", "name2": "c\\\"d\\\\", "name3": "e\\\\\"f\\\\\\" } }' # check for required non-matches execute 'name' assert_output_json_eq ' {"originalmsg": "name", "unparsed-data": "name" }' # check escaped caracters execute 'name1="" rest' assert_output_json_eq ' {"originalmsg": "name1=\"\" rest", "unparsed-data": "rest" }' execute 'noname1 name2="value2" name3="value3" ' assert_output_json_eq '{ "originalmsg": "noname1 name2=\"value2\" name3=\"value3\" ", "unparsed-data": "noname1 name2=\"value2\" name3=\"value3\" " }' cleanup_tmp_files liblognorm-2.1.0/tests/field_name_value_v1.sh000077500000000000000000000024001520037563000212410ustar00rootroot00000000000000#!/bin/bash # added 2015-04-25 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "name/value parser" add_rule 'rule=:%f:name-value-list%' execute 'name=value' assert_output_json_eq '{ "f": { "name": "value" } }' execute 'name1=value1 name2=value2 name3=value3' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1=value1 name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1= name2=value2 name3=value3 ' assert_output_json_eq '{ "f": { "name1": "", "name2": "value2", "name3": "value3" } }' execute 'origin=core.action processed=67 failed=0 suspended=0 suspended.duration=0 resumed=0 ' assert_output_json_eq '{ "f": { "origin": "core.action", "processed": "67", "failed": "0", "suspended": "0", "suspended.duration": "0", "resumed": "0" } }' # check for required non-matches execute 'name' assert_output_json_eq ' {"originalmsg": "name", "unparsed-data": "name" }' execute 'noname1 name2=value2 name3=value3 ' assert_output_json_eq '{ "originalmsg": "noname1 name2=value2 name3=value3 ", "unparsed-data": "noname1 name2=value2 name3=value3 " }' cleanup_tmp_files liblognorm-2.1.0/tests/field_name_value_whitespace.sh000077500000000000000000000025561520037563000230630ustar00rootroot00000000000000#!/bin/bash # added 2022-03-28 by @KGuillemot # This file is part of the liblognorm project, released under ASL 2.0 srcdir="${srcdir:-.}" # shellcheck disable=SC1091 . "$srcdir"/exec.sh test_def "$0" "name/value parser" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"name-value-list", "separator":",", "assignator":":", "ignore_whitespaces":true}%' execute 'name:value' assert_output_json_eq '{ "f": { "name": "value" } }' execute 'name1:value1,name2:value2,name3:value3' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute ' name1: abcd, name2 : value2 ,name3 :value3 ' assert_output_json_eq '{ "f": { "name1": "abcd", "name2": "value2", "name3": "value3" } }' execute 'name1:"value1" , name2 : "value2" , name3 : value3 ' assert_output_json_eq '{ "f": { "name1": "value1", "name2": "value2", "name3": "value3" } }' execute 'name1: , name2 : value2' assert_output_json_eq '{ "f": { "name1": "", "name2": "value2" } }' execute 'name1: ' assert_output_json_eq '{ "f": { "name1": "" } }' # Check old behavior (default) reset_rules add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"name-value-list", "separator":",", "assignator":":"}%' execute ' name1: abcd, name2 : value2 ,name3 :value3 ' assert_output_json_eq '{ "f": { " name1": " abcd", " name2 ": " value2 ", "name3 ": "value3 " } }' cleanup_tmp_files liblognorm-2.1.0/tests/field_number-fmt_number.sh000077500000000000000000000011371520037563000221510ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "number field in native format" add_rule 'version=2' add_rule 'rule=:here is a number %{ "type":"number", "name":"num", "format":"number"}% in dec form' execute 'here is a number 1234 in dec form' assert_output_json_eq '{"num": 1234}' #check cases where parsing failure must occur execute 'here is a number 1234in dec form' assert_output_json_eq '{ "originalmsg": "here is a number 1234in dec form", "unparsed-data": "in dec form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_number.sh000077500000000000000000000010471520037563000200150ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "number field" add_rule 'version=2' add_rule 'rule=:here is a number %num:number% in dec form' execute 'here is a number 1234 in dec form' assert_output_json_eq '{"num": "1234"}' #check cases where parsing failure must occur execute 'here is a number 1234in dec form' assert_output_json_eq '{ "originalmsg": "here is a number 1234in dec form", "unparsed-data": "in dec form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_number_maxval.sh000077500000000000000000000011321520037563000213600ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "number field with maxval" add_rule 'version=2' add_rule 'rule=:here is a number %{ "type":"number", "name":"num", "maxval":1000}% in dec form' execute 'here is a number 234 in dec form' assert_output_json_eq '{"num": "234"}' #check cases where parsing failure must occur execute 'here is a number 1234in dec form' assert_output_json_eq '{ "originalmsg": "here is a number 1234in dec form", "unparsed-data": "1234in dec form" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_op_quoted_string_escape.sh000077500000000000000000000011671520037563000234350ustar00rootroot00000000000000#!/bin/bash # added 2026-03-24 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "op-quoted-string escape handling" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"op-quoted-string", "escape":true}%' execute '"test with \" quote"' assert_output_json_eq '{ "f": "test with \" quote" }' execute '"test with \\ slash"' assert_output_json_eq '{ "f": "test with \\ slash" }' execute '"mixed \\ and \" escapes"' assert_output_json_eq '{ "f": "mixed \\ and \" escapes" }' execute 'word' assert_output_json_eq '{ "f": "word" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_op_quoted_string_escape_invalid.sh000077500000000000000000000011671520037563000251430ustar00rootroot00000000000000#!/bin/bash # added 2026-03-24 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "op-quoted-string invalid escape config" add_rule 'version=2' add_rule 'rule=:%{"name":"f", "type":"op-quoted-string", "escape":"yes"}%' err_file="$test_tmpdir/test.err" set +e printf '"value"\n' | $cmd $ln_opts -r "$(rulebase_file_name)" -e json >"$test_out" 2>"$err_file" rc=$? set -e if [ "$rc" -eq 0 ]; then echo "FAIL: invalid op-quoted-string escape config was accepted" exit 1 fi grep -F "op-quoted-string's 'escape' field should be boolean" "$err_file" cleanup_tmp_files liblognorm-2.1.0/tests/field_quoted_string.sh000077500000000000000000000010551520037563000214130ustar00rootroot00000000000000#!/bin/bash # added 2026-04-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 srcdir="${srcdir:-.}" # shellcheck disable=SC1091 . "$srcdir"/exec.sh test_def "$0" "quoted-string strips outer quotes" add_rule 'version=2' add_rule 'rule=:%f:quoted-string%' execute '"alpha beta"' assert_output_json_eq '{ "f": "alpha beta" }' execute '""' assert_output_json_eq '{ "f": "" }' execute '"unterminated' assert_output_json_eq '{ "originalmsg": "\"unterminated", "unparsed-data": "\"unterminated" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_recursive.sh000077500000000000000000000056771520037563000205510ustar00rootroot00000000000000#!/bin/bash # added 2014-11-26 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "recursive parsing field" #tail recursion with default tail field add_rule 'rule=:%word:word% %next:recursive%' add_rule 'rule=:%word:word%' execute '123 abc 456 def' assert_output_json_eq '{"word": "123", "next": {"word": "abc", "next": {"word": "456", "next" : {"word": "def"}}}}' #tail recursion with explicitly named 'tail' field reset_rules add_rule 'rule=:%word:word% %next:recursive:tail%' add_rule 'rule=:%word:word%' execute '123 abc 456 def' assert_output_json_eq '{"word": "123", "next": {"word": "abc", "next": {"word": "456", "next" : {"word": "def"}}}}' #tail recursion with tail field having arbitrary name reset_rules add_rule 'rule=:%word:word% %next:recursive:foo%' add_rule 'rule=:%word:word%' execute '123 abc 456 def' assert_output_json_eq '{"word": "123", "next": {"word": "abc", "next": {"word": "456", "next" : {"word": "def"}}}}' #non tail recursion with default tail field reset_rules add_rule 'rule=:blocked on %device:word% %net:recursive%at %tm:date-rfc5424%' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %tail:rest%' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' #non tail recursion with tail field being explicitly named 'tail' reset_rules add_rule 'rule=:blocked on %device:word% %net:recursive:tail%at %tm:date-rfc5424%' add_rule 'rule=:%ip_addr:ipv4% %tail:rest%' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %tail:rest%' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' #non tail recursion with tail field having arbitrary name reset_rules add_rule 'rule=:blocked on %device:word% %net:recursive:remaining%at %tm:date-rfc5424%' add_rule 'rule=:%ip_addr:ipv4% %remaining:rest%' add_rule 'rule=:%subnet_addr:ipv4%/%mask:number% %remaining:rest%' execute 'blocked on gw-1 10.20.30.40 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"ip_addr": "10.20.30.40"}, "tm": "2014-12-08T08:53:33.05+05:30"}' execute 'blocked on gw-1 10.20.30.40/16 at 2014-12-08T08:53:33.05+05:30' assert_output_json_eq '{"device": "gw-1", "net": {"subnet_addr": "10.20.30.40", "mask": "16"}, "tm": "2014-12-08T08:53:33.05+05:30"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_default_group_parse_and_return.sh000077500000000000000000000006551520037563000261760ustar00rootroot00000000000000#!/bin/bash # added 2014-11-14 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 export ln_opts='-oallowRegex' test_def $0 "type ERE for regex field" add_rule 'rule=:%first:regex:[a-z]+% %second:regex:\d+\x25\x3a[a-f0-9]+\x25%' execute 'foo 122%:7a%' assert_output_contains '"first": "foo"' assert_output_contains '"second": "122%:7a%"' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_invalid_args.sh000077500000000000000000000023051520037563000223570ustar00rootroot00000000000000#!/bin/bash # added 2014-11-14 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 export ln_opts='-oallowRegex' . $srcdir/exec.sh test_def $0 "invalid type for regex field with everything else defaulted" add_rule 'rule=:%first:regex:[a-z]+:Q%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' reset_rules add_rule 'rule=:%first:regex:[a-z]+:%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' reset_rules add_rule 'rule=:%first:regex:[a-z]+:0:%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' reset_rules add_rule 'rule=:%first:regex:[a-z]+:0:0q%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' reset_rules add_rule 'rule=:%first:regex:[a-z]+:0a:0%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' reset_rules add_rule 'rule=:%first:regex:::::%%%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_while_regex_support_is_disabled.sh000077500000000000000000000005541520037563000263410ustar00rootroot00000000000000#!/bin/bash # added 2014-11-14 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "field regex, while regex support is disabled" add_rule 'rule=:%first:regex:[a-z]+%' execute 'foo' assert_output_contains '"originalmsg": "foo"' assert_output_contains '"unparsed-data": "foo"' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_with_consume_group.sh000077500000000000000000000012461520037563000236400ustar00rootroot00000000000000#!/bin/bash # added 2014-11-14 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 export ln_opts='-oallowRegex' test_def $0 "regex field with consume-group" add_rule 'rule=:%first:regex:([a-z]{2}([a-f0-9]+,)+):0%%rest:rest%' execute 'ad1234abcd,4567ef12,8901abef' assert_output_contains '"first": "ad1234abcd,4567ef12,"' assert_output_contains '"rest": "8901abef"' reset_rules add_rule 'rule=:%first:regex:(([a-z]{2})([a-f0-9]+,)+):2%%rest:rest%' execute 'ad1234abcd,4567ef12,8901abef' assert_output_contains '"first": "ad"' assert_output_contains '"rest": "1234abcd,4567ef12,8901abef"' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_with_consume_group_and_return_group.sh000077500000000000000000000012761520037563000273000ustar00rootroot00000000000000#!/bin/bash # added 2014-11-14 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh export ln_opts='-oallowRegex' no_solaris10 test_def $0 "regex field with consume-group and return-group" set -x add_rule 'rule=:%first:regex:[a-z]{2}(([a-f0-9]+),)+:0:2%%rest:rest%' execute 'ad1234abcd,4567ef12,8901abef' assert_output_contains '"first": "4567ef12"' assert_output_contains '"rest": "8901abef"' reset_rules add_rule 'rule=:%first:regex:(([a-z]{2})(([a-f0-9]+),)+):2:4%%rest:rest%' execute 'ad1234abcd,4567ef12,8901abef' assert_output_contains '"first": "4567ef12"' assert_output_contains '"rest": "1234abcd,4567ef12,8901abef"' cleanup_tmp_files liblognorm-2.1.0/tests/field_regex_with_negation.sh000077500000000000000000000011741520037563000225570ustar00rootroot00000000000000#!/bin/bash # added 2014-11-17 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 export ln_opts='-oallowRegex' test_def $0 "regex field with negation" add_rule 'rule=:%text:regex:[^,]+%,%more:rest%' execute '123,abc' assert_output_contains '"text": "123"' assert_output_contains '"more": "abc"' reset_rules add_rule 'rule=:%text:regex:([^ ,|]+( |\||,)?)+%%more:rest%' execute '123 abc|456 789,def|ghi,jkl| and some more text' assert_output_contains '"text": "123 abc|456 789,def|ghi,jkl|"' assert_output_contains '"more": " and some more text"' cleanup_tmp_files liblognorm-2.1.0/tests/field_rest.sh000077500000000000000000000035741520037563000175110ustar00rootroot00000000000000#!/bin/bash # some more tests for the "rest" motif, especially to ensure that # "rest" will not interfere with more specific rules. # added 2015-04-27 # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "rest matches" #tail recursion with default tail field add_rule 'version=2' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number% (%label2:char-to:)%)' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number% (%label2:char-to:)%)%tail:rest%' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number%' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number%%tail:rest%' # real-world cisco samples execute 'Outside:10.20.30.40/35 (40.30.20.10/35)' assert_output_json_eq '{ "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35) with rest' assert_output_json_eq '{ "tail": " with rest", "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35 brace missing' assert_output_json_eq '{ "tail": " (40.30.20.10\/35 brace missing", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "tail": " 40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' # # test expected mismatches # execute 'not at all!' assert_output_json_eq '{ "originalmsg": "not at all!", "unparsed-data": "not at all!" }' execute 'Outside 10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside 10.20.30.40\/35 40.30.20.10\/35", "unparsed-data": "Outside 10.20.30.40\/35 40.30.20.10\/35" }' execute 'Outside:10.20.30.40/aa 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside:10.20.30.40\/aa 40.30.20.10\/35", "unparsed-data": "aa 40.30.20.10\/35" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_rest_jsoncnf.sh000077500000000000000000000044161520037563000212250ustar00rootroot00000000000000#!/bin/bash # some more tests for the "rest" motif, especially to ensure that # "rest" will not interfere with more specific rules. # added 2015-04-27 # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "rest matches" #tail recursion with default tail field add_rule 'version=2' add_rule 'rule=:%{"name":"iface", "type":"char-to", "extradata":":"}%:%{"name":"ip", "type":"ipv4"}%/%{"name":"port", "type":"number"}% (%{"name":"label2", "type":"char-to", "extradata":")"}%)' add_rule 'rule=:%{"name":"iface", "type":"char-to", "extradata":":"}%:%{"name":"ip", "type":"ipv4"}%/%{"name":"port", "type":"number"}% (%{"name":"label2", "type":"char-to", "extradata":")"}%)%{"name":"tail", "type":"rest"}%' add_rule 'rule=:%{"name":"iface", "type":"char-to", "extradata":":"}%:%{"name":"ip", "type":"ipv4"}%/%{"name":"port", "type":"number"}%' add_rule 'rule=:%{"name":"iface", "type":"char-to", "extradata":":"}%:%{"name":"ip", "type":"ipv4"}%/%{"name":"port", "type":"number"}%%{"name":"tail", "type":"rest"}%' # real-world cisco samples execute 'Outside:10.20.30.40/35 (40.30.20.10/35)' assert_output_json_eq '{ "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35) with rest' assert_output_json_eq '{ "tail": " with rest", "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35 brace missing' assert_output_json_eq '{ "tail": " (40.30.20.10\/35 brace missing", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "tail": " 40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' # # test expected mismatches # execute 'not at all!' assert_output_json_eq '{ "originalmsg": "not at all!", "unparsed-data": "not at all!" }' execute 'Outside 10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside 10.20.30.40\/35 40.30.20.10\/35", "unparsed-data": "Outside 10.20.30.40\/35 40.30.20.10\/35" }' execute 'Outside:10.20.30.40/aa 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside:10.20.30.40\/aa 40.30.20.10\/35", "unparsed-data": "aa 40.30.20.10\/35" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_rest_v1.sh000077500000000000000000000035641520037563000201160ustar00rootroot00000000000000#!/bin/bash # some more tests for the "rest" motif, especially to ensure that # "rest" will not interfere with more specific rules. # added 2015-04-27 # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "rest matches" #tail recursion with default tail field add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number% (%label2:char-to:)%)' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number% (%label2:char-to:)%)%tail:rest%' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number%' add_rule 'rule=:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number%%tail:rest%' # real-world cisco samples execute 'Outside:10.20.30.40/35 (40.30.20.10/35)' assert_output_json_eq '{ "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35) with rest' assert_output_json_eq '{ "tail": " with rest", "label2": "40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 (40.30.20.10/35 brace missing' assert_output_json_eq '{ "tail": " (40.30.20.10\/35 brace missing", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' execute 'Outside:10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "tail": " 40.30.20.10\/35", "port": "35", "ip": "10.20.30.40", "iface": "Outside" }' # # test expected mismatches # execute 'not at all!' assert_output_json_eq '{ "originalmsg": "not at all!", "unparsed-data": "not at all!" }' execute 'Outside 10.20.30.40/35 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside 10.20.30.40\/35 40.30.20.10\/35", "unparsed-data": "Outside 10.20.30.40\/35 40.30.20.10\/35" }' execute 'Outside:10.20.30.40/aa 40.30.20.10/35' assert_output_json_eq '{ "originalmsg": "Outside:10.20.30.40\/aa 40.30.20.10\/35", "unparsed-data": "aa 40.30.20.10\/35" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_rfc5424timestamp-fmt_timestamp-unix-ms.sh000077500000000000000000000027651520037563000260170ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "RFC5424 timestamp in timestamp-unix format" add_rule 'version=2' add_rule 'rule=:here is a timestamp %{ "type":"date-rfc5424", "name":"num", "format":"timestamp-unix-ms"}% in RFC5424 format' execute 'here is a timestamp 2000-03-11T14:15:16+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516000}' # with milliseconds (too-low precision) execute 'here is a timestamp 2000-03-11T14:15:16.1+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516100 }' execute 'here is a timestamp 2000-03-11T14:15:16.12+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516120 }' # with milliseconds (exactly right precision) execute 'here is a timestamp 2000-03-11T14:15:16.123+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516123 }' # with overdone precision execute 'here is a timestamp 2000-03-11T14:15:16.1234+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516123 }' execute 'here is a timestamp 2000-03-11T14:15:16.123456789+01:00 in RFC5424 format' assert_output_json_eq '{ "num": 952780516123 }' #check cases where parsing failure must occur execute 'here is a timestamp 2000-03-11T14:15:16+01:00in RFC5424 format' assert_output_json_eq '{ "originalmsg": "here is a timestamp 2000-03-11T14:15:16+01:00in RFC5424 format", "unparsed-data": "2000-03-11T14:15:16+01:00in RFC5424 format" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_rfc5424timestamp-fmt_timestamp-unix.sh000077500000000000000000000016631520037563000253760ustar00rootroot00000000000000#!/bin/bash # added 2017-10-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "RFC5424 timestamp in timestamp-unix format" add_rule 'version=2' add_rule 'rule=:here is a timestamp %{ "type":"date-rfc5424", "name":"num", "format":"timestamp-unix"}% in RFC5424 format' execute 'here is a timestamp 2000-03-11T14:15:16+01:00 in RFC5424 format' assert_output_json_eq '{"num": 952780516}' # with milliseconds (must be ignored with this format!) execute 'here is a timestamp 2000-03-11T14:15:16.321+01:00 in RFC5424 format' assert_output_json_eq '{"num": 952780516}' #check cases where parsing failure must occur execute 'here is a timestamp 2000-03-11T14:15:16+01:00in RFC5424 format' assert_output_json_eq '{ "originalmsg": "here is a timestamp 2000-03-11T14:15:16+01:00in RFC5424 format", "unparsed-data": "2000-03-11T14:15:16+01:00in RFC5424 format" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_string.sh000077500000000000000000000024031520037563000200300ustar00rootroot00000000000000#!/bin/bash # added 2015-09-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "string syntax" reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string% b' execute 'a test b' assert_output_json_eq '{"f": "test"}' execute 'a "test" b' assert_output_json_eq '{"f": "test"}' execute 'a "test with space" b' assert_output_json_eq '{"f": "test with space"}' execute 'a "test with "" double escape" b' assert_output_json_eq '{ "f": "test with \" double escape" }' execute 'a "test with \" backslash escape" b' assert_output_json_eq '{ "f": "test with \" backslash escape" }' echo test quoting.mode reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"quoting.mode":"none"}% b' execute 'a test b' assert_output_json_eq '{"f": "test"}' execute 'a "test" b' assert_output_json_eq '{"f": "\"test\""}' echo "test quoting.char.*" reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"quoting.char.begin":"[", "quoting.char.end":"]"}% b' execute 'a test b' assert_output_json_eq '{"f": "test"}' execute 'a [test] b' assert_output_json_eq '{"f": "test"}' execute 'a [test test2] b' assert_output_json_eq '{"f": "test test2"}' # things that need to NOT match cleanup_tmp_files liblognorm-2.1.0/tests/field_string_dashIsEmpty.sh000077500000000000000000000010741520037563000223450ustar00rootroot00000000000000#!/bin/bash # added 2021-06-07 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 srcdir="${srcdir:-.}" # shellcheck disable=SC1091 . "$srcdir"/exec.sh no_solaris10 test_def "$0" "quoted string with dash" add_rule 'version=2' add_rule 'rule=:% {"type":"string", "name":"str", "option.dashIsEmpty":True} %' execute '"-"' assert_output_json_eq '{ "str": ""}' reset_rules add_rule 'version=2' add_rule 'rule=:% {"type":"quoted-string", "name":"str"} %' execute '"-"' assert_output_json_eq '{ "str": "-"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_string_doc_sample_lazy.sh000077500000000000000000000006561520037563000232650ustar00rootroot00000000000000#!/bin/bash # added 2018-06-26 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "string syntax" reset_rules add_rule 'version=2' add_rule 'rule=:%f:string{"matching.permitted":[ {"class":"digit"} ], "matching.mode":"lazy"} %%r:rest%' execute '12:34 56' assert_output_json_eq '{ "r": ":34 56", "f": "12" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_string_lazy_matching.sh000077500000000000000000000016361520037563000227500ustar00rootroot00000000000000#!/bin/bash # added 2018-06-26 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "string syntax" reset_rules add_rule 'version=2' add_rule 'rule=:Rule-ID:%-:whitespace%% f:string{"matching.permitted":[ {"class":"digit"}, {"chars":"abcdefghijklmnopqrstuvwxyz"}, {"chars":"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}, {"chars":"-"}, ], "quoting.escape.mode":"none", "matching.mode":"lazy"}%%resta:rest%' execute 'Rule-ID: XY7azl704-84a39894783423467a33f5b48bccd23c-a0n63i2\r\nQNas: ' assert_output_json_eq '{ "resta": "\\r\\nQNas: ", "f": "XY7azl704-84a39894783423467a33f5b48bccd23c-a0n63i2" }' execute 'Rule-ID: XY7azl704-84a39894783423467a33f5b48bccd23c-a0n63i2 LWL' assert_output_json_eq '{ "resta": " LWL", "f": "XY7azl704-84a39894783423467a33f5b48bccd23c-a0n63i2" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_string_perm_chars.sh000077500000000000000000000035401520037563000222360ustar00rootroot00000000000000#!/bin/bash # added 2015-09-02 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "string type with permitted chars" reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"matching.permitted":"abc"}% b' execute 'a abc b' assert_output_json_eq '{"f": "abc"}' execute 'a abcd b' assert_output_json_eq '{"originalmsg": "a abcd b", "unparsed-data": "abcd b" }' execute 'a abbbbbcccbaaaa b' assert_output_json_eq '{"f": "abbbbbcccbaaaa"}' execute 'a "abc" b' assert_output_json_eq '{"f": "abc"}' echo "param array" reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"matching.permitted":[ {"chars":"ab"}, {"chars":"c"} ]}% b' execute 'a abc b' assert_output_json_eq '{"f": "abc"}' reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"matching.permitted":[ {"class":"digit"}, {"chars":"x"} ]}% b' execute 'a 12x3 b' assert_output_json_eq '{"f": "12x3"}' echo alpha reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"matching.permitted":[ {"class":"alpha"} ]}% b' execute 'a abcdefghijklmnopqrstuvwxyZ b' assert_output_json_eq '{"f": "abcdefghijklmnopqrstuvwxyZ"}' execute 'a abcd1 b' assert_output_json_eq '{"originalmsg": "a abcd1 b", "unparsed-data": "abcd1 b" }' echo alnum reset_rules add_rule 'version=2' add_rule 'rule=:a %f:string{"matching.permitted":[ {"class":"alnum"} ]}% b' execute 'a abcdefghijklmnopqrstuvwxyZ b' assert_output_json_eq '{"f": "abcdefghijklmnopqrstuvwxyZ"}' execute 'a abcd1 b' assert_output_json_eq '{"f": "abcd1" }' execute 'a abcd1_ b' assert_output_json_eq '{ "originalmsg": "a abcd1_ b", "unparsed-data": "abcd1_ b" } ' cleanup_tmp_files liblognorm-2.1.0/tests/field_suffixed.sh000077500000000000000000000054231520037563000203440ustar00rootroot00000000000000#!/bin/bash # added 2015-02-25 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "field with one of many possible suffixes" add_rule 'rule=:gc reclaimed %eden_free:suffixed:,:b,kb,mb,gb:number% eden [surviver: %surviver_used:suffixed:;:kb;mb;gb;b:number%/%surviver_size:suffixed:|:b|kb|mb|gb:float%]' execute 'gc reclaimed 559mb eden [surviver: 95b/30.2mb]' assert_output_json_eq '{"eden_free": {"value": "559", "suffix":"mb"}, "surviver_used": {"value": "95", "suffix": "b"}, "surviver_size": {"value": "30.2", "suffix": "mb"}}' reset_rules add_rule 'rule=:gc reclaimed %eden_free:named_suffixed:size:unit:,:b,kb,mb,gb:number% eden [surviver: %surviver_used:named_suffixed:sz:u:;:kb;mb;gb;b:number%/%surviver_size:suffixed:|:b|kb|mb|gb:float%]' execute 'gc reclaimed 559mb eden [surviver: 95b/30.2mb]' assert_output_json_eq '{"eden_free": {"size": "559", "unit":"mb"}, "surviver_used": {"sz": "95", "u": "b"}, "surviver_size": {"value": "30.2", "suffix": "mb"}}' reset_rules add_rule 'rule=:gc reclaimed %eden_free:named_suffixed:size:unit:,:b,kb,mb,gb:interpret:int:number% from eden' execute 'gc reclaimed 559mb from eden' assert_output_json_eq '{"eden_free": {"size": 559, "unit":"mb"}}' reset_rules add_rule 'rule=:disk free: %free:named_suffixed:size:unit:,:\x25,gb:interpret:int:number%' execute 'disk free: 12%' assert_output_json_eq '{"free": {"size": 12, "unit":"%"}}' execute 'disk free: 130gb' assert_output_json_eq '{"free": {"size": 130, "unit":"gb"}}' reset_rules add_rule 'rule=:disk free: %free:named_suffixed:size:unit:\x3a:gb\x3a\x25:interpret:int:number%' execute 'disk free: 12%' assert_output_json_eq '{"free": {"size": 12, "unit":"%"}}' execute 'disk free: 130gb' assert_output_json_eq '{"free": {"size": 130, "unit":"gb"}}' reset_rules add_rule 'rule=:eden,surviver,old-gen available-capacity: %available_memory:tokenized:,:named_suffixed:size:unit:,:mb,gb:interpret:int:number%' execute 'eden,surviver,old-gen available-capacity: 400mb,40mb,1gb' assert_output_json_eq '{"available_memory": [{"size": 400, "unit":"mb"}, {"size": 40, "unit":"mb"}, {"size": 1, "unit":"gb"}]}' reset_rules add_rule 'rule=:eden,surviver,old-gen available-capacity: %available_memory:named_suffixed:size:unit:,:mb,gb:tokenized:,:interpret:int:number%' execute 'eden,surviver,old-gen available-capacity: 400,40,1024mb' assert_output_json_eq '{"available_memory": {"size": [400, 40, 1024], "unit":"mb"}}' reset_rules add_rule 'rule=:eden:surviver:old-gen available-capacity: %available_memory:named_suffixed:size:unit:,:mb,gb:tokenized:\x3a:interpret:int:number%' execute 'eden:surviver:old-gen available-capacity: 400:40:1024mb' assert_output_json_eq '{"available_memory": {"size": [400, 40, 1024], "unit":"mb"}}' cleanup_tmp_files liblognorm-2.1.0/tests/field_suffixed_with_invalid_ruledef.sh000077500000000000000000000043131520037563000246100ustar00rootroot00000000000000#!/bin/bash # added 2015-02-26 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "field with one of many possible suffixes, but invalid ruledef" add_rule 'rule=:reclaimed %eden_free:suffixe:,:b,kb,mb,gb:number% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:kb,mb% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:kb,mb% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:,:% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:,:kb,mb% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:,:kb,mb:% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:,:kb,mb:floa% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' reset_rules add_rule 'rule=:reclaimed %eden_free:suffixed:,:kb,m:b:floa% eden' execute 'reclaimed 559mb eden' assert_output_json_eq '{ "originalmsg": "reclaimed 559mb eden", "unparsed-data": "559mb eden" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_tokenized.sh000077500000000000000000000023401520037563000205160ustar00rootroot00000000000000#!/bin/bash # added 2014-11-17 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "tokenized field" add_rule 'rule=:%arr:tokenized: , :word% %more:rest%' execute '123 , abc , 456 , def ijk789' assert_output_contains '"arr": [ "123", "abc", "456", "def" ]' assert_output_contains '"more": "ijk789"' reset_rules add_rule 'rule=:%ips:tokenized:, :ipv4% %text:rest%' execute '10.20.30.40, 50.60.70.80, 90.100.110.120 are blocked' assert_output_contains '"text": "are blocked"' assert_output_contains '"ips": [ "10.20.30.40", "50.60.70.80", "90.100.110.120" ]' reset_rules add_rule 'rule=:comma separated list of colon separated list of # separated numbers: %some_nos:tokenized:, :tokenized: \x3a :tokenized:#:number%' execute 'comma separated list of colon separated list of # separated numbers: 10, 20 : 30#40#50 : 60#70#80, 90 : 100' assert_output_contains '"some_nos": [ [ [ "10" ] ], [ [ "20" ], [ "30", "40", "50" ], [ "60", "70", "80" ] ], [ [ "90" ], [ "100" ] ] ]' reset_rules add_rule 'rule=:%arr:tokenized:\x3a:number% %more:rest%' execute '123:456:789 ijk789' assert_output_json_eq '{"arr": [ "123", "456", "789" ], "more": "ijk789"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_tokenized_recursive.sh000077500000000000000000000051431520037563000226110ustar00rootroot00000000000000#!/bin/bash # added 2014-12-08 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "tokenized field with recursive field matching tokens" #recursive field inside tokenized field with default tail field add_rule 'rule=:%subnet_addr:ipv4%/%subnet_mask:number%%tail:rest%' add_rule 'rule=:%ip_addr:ipv4%%tail:rest%' add_rule 'rule=:blocked inbound via: %via_ip:ipv4% from: %addresses:tokenized:, :recursive% to %server_ip:ipv4%' execute 'blocked inbound via: 192.168.1.1 from: 1.2.3.4, 5.6.16.0/12, 8.9.10.11, 12.13.14.15, 16.17.18.0/8, 19.20.21.24/3 to 192.168.1.5' assert_output_json_eq '{ "addresses": [ {"ip_addr": "1.2.3.4"}, {"subnet_addr": "5.6.16.0", "subnet_mask": "12"}, {"ip_addr": "8.9.10.11"}, {"ip_addr": "12.13.14.15"}, {"subnet_addr": "16.17.18.0", "subnet_mask": "8"}, {"subnet_addr": "19.20.21.24", "subnet_mask": "3"}], "server_ip": "192.168.1.5", "via_ip": "192.168.1.1"}' reset_rules #recursive field inside tokenized field with default tail field reset_rules add_rule 'rule=:%subnet_addr:ipv4%/%subnet_mask:number%%remains:rest%' add_rule 'rule=:%ip_addr:ipv4%%remains:rest%' add_rule 'rule=:blocked inbound via: %via_ip:ipv4% from: %addresses:tokenized:, :recursive:remains% to %server_ip:ipv4%' execute 'blocked inbound via: 192.168.1.1 from: 1.2.3.4, 5.6.16.0/12, 8.9.10.11, 12.13.14.15, 16.17.18.0/8, 19.20.21.24/3 to 192.168.1.5' assert_output_json_eq '{ "addresses": [ {"ip_addr": "1.2.3.4"}, {"subnet_addr": "5.6.16.0", "subnet_mask": "12"}, {"ip_addr": "8.9.10.11"}, {"ip_addr": "12.13.14.15"}, {"subnet_addr": "16.17.18.0", "subnet_mask": "8"}, {"subnet_addr": "19.20.21.24", "subnet_mask": "3"}], "server_ip": "192.168.1.5", "via_ip": "192.168.1.1"}' #recursive field inside tokenized field with default tail field reset_rules 'net' add_rule 'rule=:%subnet_addr:ipv4%/%subnet_mask:number%%remains:rest%' 'net' add_rule 'rule=:%ip_addr:ipv4%%remains:rest%' 'net' reset_rules add_rule 'rule=:blocked inbound via: %via_ip:ipv4% from: %addresses:tokenized:, :descent:./net.rulebase:remains% to %server_ip:ipv4%' execute 'blocked inbound via: 192.168.1.1 from: 1.2.3.4, 5.6.16.0/12, 8.9.10.11, 12.13.14.15, 16.17.18.0/8, 19.20.21.24/3 to 192.168.1.5' assert_output_json_eq '{ "addresses": [ {"ip_addr": "1.2.3.4"}, {"subnet_addr": "5.6.16.0", "subnet_mask": "12"}, {"ip_addr": "8.9.10.11"}, {"ip_addr": "12.13.14.15"}, {"subnet_addr": "16.17.18.0", "subnet_mask": "8"}, {"subnet_addr": "19.20.21.24", "subnet_mask": "3"}], "server_ip": "192.168.1.5", "via_ip": "192.168.1.1"}' cleanup_tmp_files liblognorm-2.1.0/tests/field_tokenized_with_invalid_ruledef.sh000077500000000000000000000026121520037563000247670ustar00rootroot00000000000000#!/bin/bash # added 2014-11-18 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "tokenized field with invalid rule definition" add_rule 'rule=:%arr:tokenized%' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' reset_rules add_rule 'rule=:%arr:tokenized: %' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' reset_rules add_rule 'rule=:%arr:tokenized:quux:%' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' reset_rules add_rule 'rule=:%arr:tokenized:quux:some_non_existent_type%' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' reset_rules add_rule 'rule=:%arr:tokenized:quux:some_non_existent_type:%' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' reset_rules add_rule 'rule=:%arr:tokenized::::%%%%' execute '123 abc 456 def' assert_output_contains '"unparsed-data": "123 abc 456 def"' assert_output_contains '"originalmsg": "123 abc 456 def"' cleanup_tmp_files liblognorm-2.1.0/tests/field_tokenized_with_regex.sh000077500000000000000000000015701520037563000227470ustar00rootroot00000000000000#!/bin/bash # added 2014-11-17 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 #test that tokenized disabled regex if parent context has it disabled . $srcdir/exec.sh no_solaris10 test_def $0 "tokenized field with regex based field" add_rule 'rule=:%parts:tokenized:,:regex:[^, ]+% %text:rest%' execute '123,abc,456,def foo bar' assert_output_contains '"unparsed-data": "123,abc,456,def foo bar"' assert_output_contains '"originalmsg": "123,abc,456,def foo bar"' #and then enables it when parent context has it enabled export ln_opts='-oallowRegex' . $srcdir/exec.sh test_def $0 "tokenized field with regex based field" add_rule 'rule=:%parts:tokenized:,:regex:[^, ]+% %text:rest%' execute '123,abc,456,def foo bar' assert_output_contains '"parts": [ "123", "abc", "456", "def" ]' assert_output_contains '"text": "foo bar"' cleanup_tmp_files liblognorm-2.1.0/tests/field_v2-iptables.sh000077500000000000000000000047721520037563000206650ustar00rootroot00000000000000#!/bin/bash # added 2015-04-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "v2-iptables field" add_rule 'version=2' add_rule 'rule=:iptables output denied: %field:v2-iptables%' # first, a real-world case execute 'iptables output denied: IN= OUT=eth0 SRC=176.9.56.141 DST=168.192.14.3 LEN=32 TOS=0x00 PREC=0x00 TTL=64 ID=39110 DF PROTO=UDP SPT=49564 DPT=2010 LEN=12' assert_output_json_eq '{ "field": { "IN": "", "OUT": "eth0", "SRC": "176.9.56.141", "DST": "168.192.14.3", "LEN": "12", "TOS": "0x00", "PREC": "0x00", "TTL": "64", "ID": "39110", "DF": null, "PROTO": "UDP", "SPT": "49564", "DPT": "2010" } }' # now some more "fabricated" cases for better readable test reset_rules add_rule 'rule=:iptables: %field:v2-iptables%' execute 'iptables: IN=value SECOND=test' assert_output_json_eq '{ "field": { "IN": "value", "SECOND": "test" }} }' execute 'iptables: IN= SECOND=test' assert_output_json_eq '{ "field": { "IN": ""} }' execute 'iptables: IN SECOND=test' assert_output_json_eq '{ "field": { "IN": null} }' execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "invalue", "OUT": "outvalue" } }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "", "OUT": "outvalue" } }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "field": { "IN": null, "OUT": "outvalue" } }' # #check cases where parsing failure must occur # echo verify failure cases # lower case is not permitted execute 'iptables: in=value' assert_output_json_eq '{ "originalmsg": "iptables: in=value", "unparsed-data": "in=value" }' execute 'iptables: in=' assert_output_json_eq '{ "originalmsg": "iptables: in=", "unparsed-data": "in=" }' execute 'iptables: in' assert_output_json_eq '{ "originalmsg": "iptables: in", "unparsed-data": "in" }' execute 'iptables: IN' # single field is NOT permitted! assert_output_json_eq '{ "originalmsg": "iptables: IN", "unparsed-data": "IN" }' # multiple spaces between n=v pairs are not permitted execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN=invalue OUT=outvalue", "unparsed-data": "IN=invalue OUT=outvalue" }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN= OUT=outvalue", "unparsed-data": "IN= OUT=outvalue" }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN OUT=outvalue", "unparsed-data": "IN OUT=outvalue" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_v2-iptables_jsoncnf.sh000077500000000000000000000050441520037563000223760ustar00rootroot00000000000000#!/bin/bash # added 2015-04-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "v2-iptables field" add_rule 'version=2' add_rule 'rule=:iptables output denied: %{"name":"field", "type":"v2-iptables"}%' # first, a real-world case execute 'iptables output denied: IN= OUT=eth0 SRC=176.9.56.141 DST=168.192.14.3 LEN=32 TOS=0x00 PREC=0x00 TTL=64 ID=39110 DF PROTO=UDP SPT=49564 DPT=2010 LEN=12' assert_output_json_eq '{ "field": { "IN": "", "OUT": "eth0", "SRC": "176.9.56.141", "DST": "168.192.14.3", "LEN": "12", "TOS": "0x00", "PREC": "0x00", "TTL": "64", "ID": "39110", "DF": null, "PROTO": "UDP", "SPT": "49564", "DPT": "2010" } }' # now some more "fabricated" cases for better readable test reset_rules add_rule 'version=2' add_rule 'rule=:iptables: %field:v2-iptables%' execute 'iptables: IN=value SECOND=test' assert_output_json_eq '{ "field": { "IN": "value", "SECOND": "test" }} }' execute 'iptables: IN= SECOND=test' assert_output_json_eq '{ "field": { "IN": ""} }' execute 'iptables: IN SECOND=test' assert_output_json_eq '{ "field": { "IN": null} }' execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "invalue", "OUT": "outvalue" } }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "", "OUT": "outvalue" } }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "field": { "IN": null, "OUT": "outvalue" } }' # #check cases where parsing failure must occur # echo verify failure cases # lower case is not permitted execute 'iptables: in=value' assert_output_json_eq '{ "originalmsg": "iptables: in=value", "unparsed-data": "in=value" }' execute 'iptables: in=' assert_output_json_eq '{ "originalmsg": "iptables: in=", "unparsed-data": "in=" }' execute 'iptables: in' assert_output_json_eq '{ "originalmsg": "iptables: in", "unparsed-data": "in" }' execute 'iptables: IN' # single field is NOT permitted! assert_output_json_eq '{ "originalmsg": "iptables: IN", "unparsed-data": "IN" }' # multiple spaces between n=v pairs are not permitted execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN=invalue OUT=outvalue", "unparsed-data": "IN=invalue OUT=outvalue" }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN= OUT=outvalue", "unparsed-data": "IN= OUT=outvalue" }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN OUT=outvalue", "unparsed-data": "IN OUT=outvalue" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_v2-iptables_v1.sh000077500000000000000000000047451520037563000212730ustar00rootroot00000000000000#!/bin/bash # added 2015-04-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "v2-iptables field" add_rule 'rule=:iptables output denied: %field:v2-iptables%' # first, a real-world case execute 'iptables output denied: IN= OUT=eth0 SRC=176.9.56.141 DST=168.192.14.3 LEN=32 TOS=0x00 PREC=0x00 TTL=64 ID=39110 DF PROTO=UDP SPT=49564 DPT=2010 LEN=12' assert_output_json_eq '{ "field": { "IN": "", "OUT": "eth0", "SRC": "176.9.56.141", "DST": "168.192.14.3", "LEN": "12", "TOS": "0x00", "PREC": "0x00", "TTL": "64", "ID": "39110", "DF": null, "PROTO": "UDP", "SPT": "49564", "DPT": "2010" } }' # now some more "fabricated" cases for better readable test reset_rules add_rule 'rule=:iptables: %field:v2-iptables%' execute 'iptables: IN=value SECOND=test' assert_output_json_eq '{ "field": { "IN": "value", "SECOND": "test" }} }' execute 'iptables: IN= SECOND=test' assert_output_json_eq '{ "field": { "IN": ""} }' execute 'iptables: IN SECOND=test' assert_output_json_eq '{ "field": { "IN": null} }' execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "invalue", "OUT": "outvalue" } }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "field": { "IN": "", "OUT": "outvalue" } }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "field": { "IN": null, "OUT": "outvalue" } }' # #check cases where parsing failure must occur # echo verify failure cases # lower case is not permitted execute 'iptables: in=value' assert_output_json_eq '{ "originalmsg": "iptables: in=value", "unparsed-data": "in=value" }' execute 'iptables: in=' assert_output_json_eq '{ "originalmsg": "iptables: in=", "unparsed-data": "in=" }' execute 'iptables: in' assert_output_json_eq '{ "originalmsg": "iptables: in", "unparsed-data": "in" }' execute 'iptables: IN' # single field is NOT permitted! assert_output_json_eq '{ "originalmsg": "iptables: IN", "unparsed-data": "IN" }' # multiple spaces between n=v pairs are not permitted execute 'iptables: IN=invalue OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN=invalue OUT=outvalue", "unparsed-data": "IN=invalue OUT=outvalue" }' execute 'iptables: IN= OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN= OUT=outvalue", "unparsed-data": "IN= OUT=outvalue" }' execute 'iptables: IN OUT=outvalue' assert_output_json_eq '{ "originalmsg": "iptables: IN OUT=outvalue", "unparsed-data": "IN OUT=outvalue" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_whitespace.sh000077500000000000000000000021551520037563000206620ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "whitespace parser" # the "word" parser unfortunately treats everything except # a SP as being in the word. So a HT inside a word is # permitted, which does not work well with what we # want to test here. to solve this problem, we use op-quoted-string. # However, we must actually quote the samples with HT, because # that parser also treats HT as being part of the word. But thanks # to the quotes, we can force it to not do that. # rgerhards, 2015-04-30 add_rule 'version=2' add_rule 'rule=:%a:op-quoted-string%%-:whitespace%%b:op-quoted-string%' execute 'word1 word2' # multiple spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute 'word1 word2' # single space assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # tab (US-ASCII HT) assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # mix of tab and spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_whitespace_jsoncnf.sh000077500000000000000000000022541520037563000224020ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "whitespace parser" # the "word" parser unfortunately treats everything except # a SP as being in the word. So a HT inside a word is # permitted, which does not work well with what we # want to test here. to solve this problem, we use op-quoted-string. # However, we must actually quote the samples with HT, because # that parser also treats HT as being part of the word. But thanks # to the quotes, we can force it to not do that. # rgerhards, 2015-04-30 add_rule 'version=2' add_rule 'rule=:%{"name":"a", "type":"op-quoted-string"}%%{"name":"-", "type":"whitespace"}%%{"name":"b", "type":"op-quoted-string"}%' execute 'word1 word2' # multiple spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute 'word1 word2' # single space assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # tab (US-ASCII HT) assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # mix of tab and spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' cleanup_tmp_files liblognorm-2.1.0/tests/field_whitespace_v1.sh000077500000000000000000000021301520037563000212610ustar00rootroot00000000000000#!/bin/bash # added 2015-03-12 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "whitespace parser" # the "word" parser unfortunately treats everything except # a SP as being in the word. So a HT inside a word is # permitted, which does not work well with what we # want to test here. to solve this problem, we use op-quoted-string. # However, we must actually quote the samples with HT, because # that parser also treats HT as being part of the word. But thanks # to the quotes, we can force it to not do that. # rgerhards, 2015-04-30 add_rule 'rule=:%a:op-quoted-string%%-:whitespace%%b:op-quoted-string%' execute 'word1 word2' # multiple spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute 'word1 word2' # single space assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # tab (US-ASCII HT) assert_output_json_eq '{ "b": "word2", "a": "word1" }' execute '"word1" "word2"' # mix of tab and spaces assert_output_json_eq '{ "b": "word2", "a": "word1" }' cleanup_tmp_files liblognorm-2.1.0/tests/include.sh000077500000000000000000000007661520037563000170140ustar00rootroot00000000000000#!/bin/bash # added 2015-08-28 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "include (success case)" reset_rules add_rule 'version=2' add_rule 'include=inc.rulebase' reset_rules inc add_rule 'version=2' inc add_rule 'rule=:%field:mac48%' inc execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' # single test is sufficient, because that only works if the include # worked ;) cleanup_tmp_files liblognorm-2.1.0/tests/include_RULEBASES.sh000077500000000000000000000013541520037563000204130ustar00rootroot00000000000000#!/bin/bash # added 2015-08-28 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "include (via LIBLOGNORM_RULEBASES directory)" reset_rules add_rule 'version=2' add_rule 'include=inc.rulebase' reset_rules inc add_rule 'version=2' inc add_rule 'rule=:%field:mac48%' inc RULEBASE_DIR="$(mktemp -d "$test_tmpdir/liblognorm-rulebases.XXXXXX")" mv "$(rulebase_file_name inc)" "$RULEBASE_DIR/inc.rulebase" export LIBLOGNORM_RULEBASES="$RULEBASE_DIR" execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' export LIBLOGNORM_RULEBASES="$RULEBASE_DIR/" execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' cleanup_tmp_files liblognorm-2.1.0/tests/json_eq.c000066400000000000000000000053451520037563000166320ustar00rootroot00000000000000#include "config.h" #include #include #include #include #include #include "internal.h" typedef struct json_object obj; static int eq(obj* expected, obj* actual); static int obj_eq(obj* expected, obj* actual) { int eql = 1; struct json_object_iterator it = json_object_iter_begin(expected); struct json_object_iterator itEnd = json_object_iter_end(expected); while (!json_object_iter_equal(&it, &itEnd)) { obj *actual_val; json_object_object_get_ex(actual, json_object_iter_peek_name(&it), &actual_val); eql &= eq(json_object_iter_peek_value(&it), actual_val); json_object_iter_next(&it); } return eql; } static int arr_eq(obj* expected, obj* actual) { int eql = 1; int expected_len = json_object_array_length(expected); int actual_len = json_object_array_length(actual); if (expected_len != actual_len) return 0; for (int i = 0; i < expected_len; i++) { obj* _exp = json_object_array_get_idx(expected, i); obj* act = json_object_array_get_idx(actual, i); eql &= eq(_exp, act); } return eql; } static int str_eq(obj* expected, obj* actual) { const char* exp_str = json_object_to_json_string(expected); const char* act_str = json_object_to_json_string(actual); return strcmp(exp_str, act_str) == 0; } static int eq(obj* expected, obj* actual) { if (expected == NULL && actual == NULL) { return 1; } else if (expected == NULL) { return 0; } else if (actual == NULL) { return 0; } enum json_type expected_type = json_object_get_type(expected); enum json_type actual_type = json_object_get_type(actual); if (expected_type != actual_type) return 0; switch(expected_type) { case json_type_null: return 1; case json_type_boolean: return json_object_get_boolean(expected) == json_object_get_boolean(actual); case json_type_double: return (fabs(json_object_get_double(expected) - json_object_get_double(actual)) < 0.001); case json_type_int: return json_object_get_int64(expected) == json_object_get_int64(actual); case json_type_object: return obj_eq(expected, actual); case json_type_array: return arr_eq(expected, actual); case json_type_string: return str_eq(expected, actual); default: fprintf(stderr, "unexpected type in %s:%d\n", __FILE__, __LINE__); abort(); } return 0; } int main(int argc, char** argv) { if (argc != 3) { fprintf(stderr, "expected and actual json not given, number of args was: %d\n", argc); exit(100); } obj* expected = json_tokener_parse(argv[1]); obj* actual = json_tokener_parse(argv[2]); int result = eq(expected, actual) ? 0 : 1; json_object_put(expected); json_object_put(actual); if (result != 0) { printf("JSONs weren't equal. \n\tExpected: \n\t\t%s\n\tActual: \n\t\t%s\n", argv[1], argv[2]); } return result; } liblognorm-2.1.0/tests/literal.sh000077500000000000000000000016461520037563000170230ustar00rootroot00000000000000#!/bin/bash # added 2016-12-21 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "using names with literal" add_rule 'version=2' add_rule 'rule=:%{"type":"literal", "text":"a", "name":"var"}%' execute 'a' assert_output_json_eq '{ "var": "a" }' reset_rules add_rule 'version=2' add_rule 'rule=:Test %{"type":"literal", "text":"a", "name":"var"}%' execute 'Test a' assert_output_json_eq '{ "var": "a" }' reset_rules add_rule 'version=2' add_rule 'rule=:Test %{"type":"literal", "text":"a", "name":"var"}% End' execute 'Test a End' assert_output_json_eq '{ "var": "a" }' reset_rules add_rule 'version=2' add_rule 'rule=:a %[{"name":"num", "type":"number"}, {"name":"colon", "type":"literal", "text":":"}, {"name":"hex", "type":"hexnumber"}]% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712", "colon": ":", "num": "4711" }' cleanup_tmp_files liblognorm-2.1.0/tests/lognormalizer-invld-call.sh000077500000000000000000000005201520037563000222640ustar00rootroot00000000000000#!/bin/bash # This file is part of the liblognorm project, released under ASL 2.0 echo running test $0 if ../src/lognormalizer ; then echo "FAIL: loganalyzer did not detect missing rulebase" exit 1 fi if ../src/lognormalizer -r test -R test ; then echo "FAIL: loganalyzer did not detect both -r and -R given" exit 1 fi liblognorm-2.1.0/tests/missing_line_ending.sh000077500000000000000000000013251520037563000213650ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "missing line ending" reset_rules add_rule 'version=2' add_rule_no_LF 'rule=:%field:mac48%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to NOT match execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "originalmsg": "f0-f6:1c:5f:cc-a2", "unparsed-data": "f0-f6:1c:5f:cc-a2" }' execute 'f0:f6:1c:xf:cc:a2' assert_output_json_eq '{ "originalmsg": "f0:f6:1c:xf:cc:a2", "unparsed-data": "f0:f6:1c:xf:cc:a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/missing_line_ending_v1.sh000077500000000000000000000013041520037563000217700ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "missing line ending (v1)" reset_rules add_rule_no_LF 'rule=:%field:mac48%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to NOT match execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "originalmsg": "f0-f6:1c:5f:cc-a2", "unparsed-data": "f0-f6:1c:5f:cc-a2" }' execute 'f0:f6:1c:xf:cc:a2' assert_output_json_eq '{ "originalmsg": "f0:f6:1c:xf:cc:a2", "unparsed-data": "f0:f6:1c:xf:cc:a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/names.sh000077500000000000000000000022371520037563000164670ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "using names with literal" add_rule 'version=2' add_rule 'rule=:a %[{"name":"num", "type":"number"}, {"type":"literal", "text":":"}, {"name":"hex", "type":"hexnumber"}]% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712", "num": "4711" }' reset_rules add_rule 'version=2' add_rule 'rule=:a %[{"name":"num", "type":"number"}, {"name":"literal", "type":"literal", "text":":"}, {"name":"hex", "type":"hexnumber"}]% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712", "num": "4711" }' # check that "-" is still discarded reset_rules add_rule 'version=2' add_rule 'rule=:a %[{"name":"num", "type":"number"}, {"name":"-", "type":"literal", "text":":"}, {"name":"hex", "type":"hexnumber"}]% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712", "num": "4711" }' # now let's check old style. Here we need "-". reset_rules add_rule 'version=2' add_rule 'rule=:a %-:number%:%hex:hexnumber% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712" }' cleanup_tmp_files liblognorm-2.1.0/tests/options.sh.in000066400000000000000000000003311520037563000174520ustar00rootroot00000000000000use_valgrind=@VALGRIND@ echo "Using valgrind: $use_valgrind" if [ $use_valgrind == "yes" ]; then cmd="valgrind --error-exitcode=191 --malloc-fill=ff --free-fill=fe --leak-check=full --trace-children=yes $cmd" fi liblognorm-2.1.0/tests/parser_LF.sh000077500000000000000000000012451520037563000172370ustar00rootroot00000000000000#!/bin/bash # added 2015-07-15 by Rainer Gerhards # This checks if whitespace inside parser definitions is properly treated # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "LF in parser definition" add_rule 'rule=:here is a number % num:hexnumber % in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_LF_jsoncnf.sh000077500000000000000000000013031520037563000207520ustar00rootroot00000000000000#!/bin/bash # added 2015-07-15 by Rainer Gerhards # This checks if whitespace inside parser definitions is properly treated # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "LF in parser definition" add_rule 'version=2' add_rule 'rule=:here is a number %{ "name":"num", "type":"hexnumber" }% in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_eof_hardening.sh000077500000000000000000000022331520037563000215240ustar00rootroot00000000000000#!/bin/bash # added 2026-05-06 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 # shellcheck source=tests/exec.sh disable=SC1091,SC2154 . "$srcdir"/exec.sh test_def "$0" "v2 parsers handle EOF safely" add_rule 'version=2' add_rule 'rule=:%f:string%' execute 'abc' assert_output_json_eq '{ "f": "abc" }' reset_rules add_rule 'version=2' add_rule 'rule=:pre%f:json%' execute 'pre' assert_output_json_eq '{ "originalmsg": "pre", "unparsed-data": "" }' reset_rules add_rule 'version=2' add_rule 'rule=:%f:json%' execute '[1,2]' assert_output_json_eq '{ "f": [ 1, 2 ] }' reset_rules add_rule 'version=2' add_rule 'rule=:pre%f:op-quoted-string%' execute 'pre' assert_output_json_eq '{ "originalmsg": "pre", "unparsed-data": "" }' reset_rules add_rule 'version=2' add_rule 'rule=:%f:checkpoint-lea%' execute 'src: ' assert_output_json_eq '{ "originalmsg": "src: ", "unparsed-data": "src: " }' execute 'src: "unterminated' assert_output_json_eq '{ "originalmsg": "src: \"unterminated", "unparsed-data": "src: \"unterminated" }' execute 'src::' assert_output_json_eq '{ "originalmsg": "src::", "unparsed-data": "src::" }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_eof_hardening_v1.sh000077500000000000000000000013571520037563000221400ustar00rootroot00000000000000#!/bin/bash # added 2026-05-06 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 # shellcheck source=tests/exec.sh disable=SC1091,SC2154 . "$srcdir"/exec.sh test_def "$0" "v1 parsers handle EOF safely" add_rule 'rule=:pre%f:json%' execute 'pre' assert_output_json_eq '{ "originalmsg": "pre", "unparsed-data": "" }' reset_rules add_rule 'rule=:%f:json%' execute '[1,2]' assert_output_json_eq '{ "f": [ 1, 2 ] }' reset_rules add_rule 'rule=:pre%f:op-quoted-string%' execute 'pre' assert_output_json_eq '{ "originalmsg": "pre", "unparsed-data": "" }' reset_rules add_rule 'rule=:%f:checkpoint-lea%' execute 'src: ' assert_output_json_eq '{ "originalmsg": "src: ", "unparsed-data": "src: " }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_prios.sh000077500000000000000000000021331520037563000200670ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "parser priorities, simple case" add_rule 'version=2' add_rule 'rule=:%{"name":"field", "type":"mac48"}%' add_rule 'rule=:%{"name":"rest", "type":"rest"}%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"field": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"field": "f0-f6-1c-5f-cc-a2"}' # things that need to match rest execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "rest": "f0-f6:1c:5f:cc-a2" }' # now the same with inverted priorities. We should now always have # rest matches. reset_rules add_rule 'version=2' add_rule 'rule=:%{"name":"field", "type":"mac48", "priority":100}%' add_rule 'rule=:%{"name":"rest", "type":"rest", "priority":10}%' execute 'f0:f6:1c:5f:cc:a2' assert_output_json_eq '{"rest": "f0:f6:1c:5f:cc:a2"}' execute 'f0-f6-1c-5f-cc-a2' assert_output_json_eq '{"rest": "f0-f6-1c-5f-cc-a2"}' execute 'f0-f6:1c:5f:cc-a2' assert_output_json_eq '{ "rest": "f0-f6:1c:5f:cc-a2" }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_whitespace.sh000077500000000000000000000012211520037563000210640ustar00rootroot00000000000000#!/bin/bash # added 2015-07-15 by Rainer Gerhards # This checks if whitespace inside parser definitions is properly treated # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh no_solaris10 test_def $0 "whitespace in parser definition" add_rule 'rule=:here is a number % num:hexnumber % in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/parser_whitespace_jsoncnf.sh000077500000000000000000000012571520037563000226150ustar00rootroot00000000000000#!/bin/bash # added 2015-07-15 by Rainer Gerhards # This checks if whitespace inside parser definitions is properly treated # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "whitespace in parser definition" add_rule 'version=2' add_rule 'rule=:here is a number % {"name":"num", "type":"hexnumber"} % in hex form' execute 'here is a number 0x1234 in hex form' assert_output_json_eq '{"num": "0x1234"}' #check cases where parsing failure must occur execute 'here is a number 0x1234in hex form' assert_output_json_eq '{ "originalmsg": "here is a number 0x1234in hex form", "unparsed-data": "0x1234in hex form" }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_alternative_nested.sh000077500000000000000000000024021520037563000225760ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple alternative syntax" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "parser": { "type":"alternative", "parser": [ [ {"type":"number", "name":"n1"}, {"type":"literal", "text":":"}, {"type":"number", "name":"n2"}, ], {"type":"hexnumber", "name":"hex"} ] }, "while":[ {"type":"literal", "text":", "} ] }% b' execute 'a 1:2, 3:4, 5:6, 7:8 b' assert_output_json_eq '{ "numbers": [ { "n2": "2", "n1": "1" }, { "n2": "4", "n1": "3" }, { "n2": "6", "n1": "5" }, { "n2": "8", "n1": "7" } ] }' execute 'a 0x4711 b' assert_output_json_eq '{ "numbers": [ { "hex": "0x4711" } ] }' # note: 0x4711, 1:2 does not work because hexnumber expects a SP after # the number! Thus we use the reverse. We could add this case once # we have added an option for more relaxed matching to hexnumber. execute 'a 1:2, 0x4711 b' assert_output_json_eq '{ "numbers": [ { "n2": "2", "n1": "1" }, { "hex": "0x4711" } ] }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_fail_on_duplicate.sh000077500000000000000000000011671520037563000223660ustar00rootroot00000000000000#!/bin/bash # added 2026-03-25 by Codex # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "repeat failOnDuplicate rejects duplicate fields in one element" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "option.failOnDuplicate": true, "parser":[ {"name":"n", "type":"number"}, {"type":"literal", "text":":"}, {"name":"n", "type":"number"} ], "while": {"type":"literal", "text":", "} }% b' execute 'a 1:2 b' assert_output_json_eq '{ "originalmsg": "a 1:2 b", "unparsed-data": "1:2 b" }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_mismatch_in_while.sh000077500000000000000000000051321520037563000224040ustar00rootroot00000000000000#!/bin/bash # added 2015-08-26 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 # This is based on a practical support case, see # https://github.com/rsyslog/liblognorm/issues/130 . $srcdir/exec.sh test_def $0 "repeat with mismatch in parser part" reset_rules add_rule 'version=2' add_rule 'prefix=%timestamp:date-rfc3164% %hostname:word%' add_rule 'rule=cisco,fwblock: \x25ASA-6-106015\x3a Deny %proto:word% (no connection) from %source:cisco-interface-spec% to %dest:cisco-interface-spec% flags %flags:repeat{ "parser": {"type":"word", "name":"."}, "while":{"type":"literal", "text":" "} }% on interface %srciface:word%' echo step 1 execute 'Aug 18 13:18:45 192.168.99.2 %ASA-6-106015: Deny TCP (no connection) from 173.252.88.66/443 to 76.79.249.222/52746 flags RST on interface outside' assert_output_json_eq '{ "originalmsg": "Aug 18 13:18:45 192.168.99.2 %ASA-6-106015: Deny TCP (no connection) from 173.252.88.66\/443 to 76.79.249.222\/52746 flags RST on interface outside", "unparsed-data": "RST on interface outside" }' # now check case where we permit a mismatch inside the parser part and still # accept this as valid. This is needed for some use cases. See github # issue mentioned above for more details. # Note: there is something odd with the testbench driver: I cannot use two # consecutive spaces reset_rules add_rule 'version=2' add_rule 'prefix=%timestamp:date-rfc3164% %hostname:word%' add_rule 'rule=cisco,fwblock: \x25ASA-6-106015\x3a Deny %proto:word% (no connection) from %source:cisco-interface-spec% to %dest:cisco-interface-spec% flags %flags:repeat{ "option.permitMismatchInParser":true, "parser": {"type":"word", "name":"."}, "while":{"type":"literal", "text":" "} }%\x20 on interface %srciface:word%' echo step 2 execute 'Aug 18 13:18:45 192.168.99.2 %ASA-6-106015: Deny TCP (no connection) from 173.252.88.66/443 to 76.79.249.222/52746 flags RST on interface outside' assert_output_json_eq '{ "srciface": "outside", "flags": [ "RST" ], "dest": { "ip": "76.79.249.222", "port": "52746" }, "source": { "ip": "173.252.88.66", "port": "443" }, "proto": "TCP", "hostname": "192.168.99.2", "timestamp": "Aug 18 13:18:45" }' echo step 3 execute 'Aug 18 13:18:45 192.168.99.2 %ASA-6-106015: Deny TCP (no connection) from 173.252.88.66/443 to 76.79.249.222/52746 flags RST XST on interface outside' assert_output_json_eq '{ "srciface": "outside", "flags": [ "RST", "XST" ], "dest": { "ip": "76.79.249.222", "port": "52746" }, "source": { "ip": "173.252.88.66", "port": "443" }, "proto": "TCP", "hostname": "192.168.99.2", "timestamp": "Aug 18 13:18:45" }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_name_dot.sh000077500000000000000000000012121520037563000205020ustar00rootroot00000000000000#!/bin/bash # added 2023-02-14 by Kevin Guillemot # This file is part of the liblognorm project, released under ASL 2.0 srcdir="${srcdir:-.}" # shellcheck disable=SC1091 . "$srcdir"/exec.sh test_def "$0" "Repeat with one parser named dot" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "parser":[ {"type":"number"}, {"type":"literal", "text":":"}, {"name":".", "type":"number"} ], "while":[ {"type":"literal", "text":", "} ] }% b %w:word% ' execute 'a 1:2, 3:4, 5:6, 7:8 b test' assert_output_json_eq '{ "w": "test", "numbers": [ "2", "4", "6", "8" ] }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_named_while_segfault.sh000077500000000000000000000012161520037563000230660ustar00rootroot00000000000000#!/bin/bash # added 2026-05-05 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 # shellcheck source=tests/exec.sh disable=SC1091,SC2154 . "$srcdir"/exec.sh test_def "$0" "repeat while clause with named parser does not segfault" add_rule 'version=2' add_rule 'rule=:a % {"name":"numbers", "type":"repeat", "parser": {"type":"number", "name":"n"}, "while": {"type":"char-to", "name":"host", "extradata":":"} }% b' execute 'a 1 : 2 b' assert_output_json_eq '{ "originalmsg": "a 1 : 2 b", "unparsed-data": "1 : 2 b" }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_simple.sh000077500000000000000000000012521520037563000202110ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple repeat syntax" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "parser":[ {"name":"n1", "type":"number"}, {"type":"literal", "text":":"}, {"name":"n2", "type":"number"} ], "while":[ {"type":"literal", "text":", "} ] }% b %w:word% ' execute 'a 1:2, 3:4, 5:6, 7:8 b test' assert_output_json_eq '{ "w": "test", "numbers": [ { "n2": "2", "n1": "1" }, { "n2": "4", "n1": "3" }, { "n2": "6", "n1": "5" }, { "n2": "8", "n1": "7" } ] }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_very_simple.sh000077500000000000000000000010361520037563000212560ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "very simple repeat syntax" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "parser": {"name":"n", "type":"number"}, "while": {"type":"literal", "text":", "} }% b %w:word% ' execute 'a 1, 2, 3, 4 b test' assert_output_json_eq '{ "w": "test", "numbers": [ { "n": "1" }, { "n": "2" }, { "n": "3" }, { "n": "4" } ] }' cleanup_tmp_files liblognorm-2.1.0/tests/repeat_while_alternative.sh000077500000000000000000000014531520037563000224310ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "repeat syntax with alternative terminators" add_rule 'version=2' add_rule 'rule=:a %{"name":"numbers", "type":"repeat", "parser":[ {"name":"n1", "type":"number"}, {"type":"literal", "text":":"}, {"name":"n2", "type":"number"} ], "while": { "type":"alternative", "parser": [ {"type":"literal", "text":", "}, {"type":"literal", "text":","} ] } }% b %w:word% ' execute 'a 1:2, 3:4,5:6, 7:8 b test' assert_output_json_eq '{ "w": "test", "numbers": [ { "n2": "2", "n1": "1" }, { "n2": "4", "n1": "3" }, { "n2": "6", "n1": "5" }, { "n2": "8", "n1": "7" } ] }' cleanup_tmp_files liblognorm-2.1.0/tests/rule_empty_tag_segfault.sh000077500000000000000000000005731520037563000222770ustar00rootroot00000000000000#!/bin/bash # added 2026-05-05 by AI agent # This file is part of the liblognorm project, released under ASL 2.0 # shellcheck source=tests/exec.sh disable=SC1091,SC2154 . "$srcdir"/exec.sh test_def "$0" "empty v1 tag list entry is handled without segfault" add_rule 'rule=,' execute 'x' assert_output_json_eq '{ "originalmsg": "x", "unparsed-data": "x" }' cleanup_tmp_files liblognorm-2.1.0/tests/rule_last_str_long.sh000077500000000000000000000012511520037563000212600ustar00rootroot00000000000000#!/bin/bash . $srcdir/exec.sh no_solaris10 test_def $0 "multiple formats including string (see also: rule_last_str_short.sh)" add_rule 'version=2' add_rule 'rule=:%string:string%' add_rule 'rule=:before %string:string%' add_rule 'rule=:%string:string% after' add_rule 'rule=:before %string:string% after' add_rule 'rule=:before %string:string% middle %string:string%' execute 'string' execute 'before string' execute 'string after' execute 'before string after' execute 'before string middle string' assert_output_json_eq '{"string": "string" }' '{"string": "string" }''{"string": "string" }''{"string": "string" }''{"string": "string", "string": "string" }' cleanup_tmp_files liblognorm-2.1.0/tests/rule_last_str_short.sh000077500000000000000000000003671520037563000214670ustar00rootroot00000000000000#!/bin/bash . $srcdir/exec.sh test_def $0 "string being last in a rule (see also: rule_last_str_long.sh)" add_rule 'version=2' add_rule 'rule=:%string:string%' execute 'string' assert_output_json_eq '{"string": "string" }' cleanup_tmp_files liblognorm-2.1.0/tests/runaway_rule.sh000077500000000000000000000011741520037563000201000ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 # Note that this test produces an error message, as it encounters the # runaway rule. This is OK and actually must happen. The prime point # of the test is that it correctly loads the second rule, which # would otherwise be consumed by the runaway rule. . $srcdir/exec.sh test_def $0 "runaway rule (unmatched percent signs)" reset_rules add_rule 'version=2' add_rule 'rule=:test %f1:word unmatched percent' add_rule 'rule=:%field:word%' execute 'data' assert_output_json_eq '{"field": "data"}' cleanup_tmp_files liblognorm-2.1.0/tests/runaway_rule_comment.sh000077500000000000000000000012321520037563000216150ustar00rootroot00000000000000#!/bin/bash # added 2015-09-16 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 # Note that this test produces an error message, as it encounters the # runaway rule. This is OK and actually must happen. The prime point # of the test is that it correctly loads the second rule, which # would otherwise be consumed by the runaway rule. . $srcdir/exec.sh test_def $0 "runaway rule with comment lines (v2)" reset_rules add_rule 'version=2' add_rule 'rule=:test %f1:word unmatched percent' add_rule '' add_rule '#comment' add_rule 'rule=:%field:word%' execute 'data' assert_output_json_eq '{"field": "data"}' cleanup_tmp_files liblognorm-2.1.0/tests/runaway_rule_comment_v1.sh000077500000000000000000000012051520037563000222230ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 # Note that this test produces an error message, as it encounters the # runaway rule. This is OK and actually must happen. The prime point # of the test is that it correctly loads the second rule, which # would otherwise be consumed by the runaway rule. . $srcdir/exec.sh test_def $0 "runaway rule with comment lines (v1)" reset_rules add_rule 'rule=:test %f1:word unmatched percent' add_rule '' add_rule '#comment' add_rule 'rule=:%field:word%' execute 'data' assert_output_json_eq '{"field": "data"}' cleanup_tmp_files liblognorm-2.1.0/tests/runaway_rule_v1.sh000077500000000000000000000011621520037563000205030ustar00rootroot00000000000000#!/bin/bash # added 2015-05-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 # Note that this test produces an error message, as it encounters the # runaway rule. This is OK and actually must happen. The prime point # of the test is that it correctly loads the second rule, which # would otherwise be consumed by the runaway rule. . $srcdir/exec.sh test_def $0 "runaway rule (unmatched percent signs) v1 version" reset_rules add_rule 'rule=:test %f1:word unmatched percent' add_rule 'rule=:%field:word%' execute 'data' assert_output_json_eq '{"field": "data"}' cleanup_tmp_files liblognorm-2.1.0/tests/seq_simple.sh000077500000000000000000000006771520037563000175330ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple sequence (array) syntax" add_rule 'version=2' add_rule 'rule=:a %[{"name":"num", "type":"number"}, {"name":"-", "type":"literal", "text":":"}, {"name":"hex", "type":"hexnumber"}]% b' execute 'a 4711:0x4712 b' assert_output_json_eq '{ "hex": "0x4712", "num": "4711" }' cleanup_tmp_files liblognorm-2.1.0/tests/strict_prefix_actual_sample1.sh000077500000000000000000000016211520037563000232200ustar00rootroot00000000000000#!/bin/bash # added 2015-11-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "one rule is strict prefix of a longer one" add_rule 'version=2' add_rule 'prefix=%timestamp:date-rfc3164% %hostname:word% BL-WLC01: *%programname:char-to:\x3a%: %timestamp:date-rfc3164%.%fracsec:number%:' add_rule 'rule=wifi: #LOG-3-Q_IND: webauth_redirect.c:1238 read error on server socket, errno=131[...It occurred %count:number% times.!]' add_rule 'rule=wifi: #LOG-3-Q_IND: webauth_redirect.c:1238 read error on server socket, errno=131' execute 'Sep 28 23:53:19 192.168.123.99 BL-WLC01: *dtlArpTask: Sep 28 23:53:19.614: #LOG-3-Q_IND: webauth_redirect.c:1238 read error on server socket, errno=131' assert_output_json_eq '{ "fracsec": "614", "timestamp": "Sep 28 23:53:19", "programname": "dtlArpTask", "hostname": "192.168.123.99" }' cleanup_tmp_files liblognorm-2.1.0/tests/strict_prefix_matching_1.sh000077500000000000000000000007331520037563000223420ustar00rootroot00000000000000#!/bin/bash # added 2015-11-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "one rule is strict prefix of a longer one" add_rule 'version=2' add_rule 'rule=:a word %w1:word%' add_rule 'rule=:a word %w1:word% another word %w2:word%' execute 'a word w1 another word w2' assert_output_json_eq '{ "w2": "w2", "w1": "w1" }' execute 'a word w1' assert_output_json_eq '{ "w1": "w1" }' cleanup_tmp_files liblognorm-2.1.0/tests/strict_prefix_matching_2.sh000077500000000000000000000011261520037563000223400ustar00rootroot00000000000000#!/bin/bash # added 2015-11-05 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "named literal compaction" add_rule 'version=2' add_rule 'rule=:a word %w1:word% %l1:literal{"text":"l"}% b' add_rule 'rule=:a word %w1:word% %l2:literal{"text":"l2"}% b' add_rule 'rule=:a word %w1:word% l3 b' execute 'a word w1 l b' assert_output_json_eq '{ "l1": "l", "w1": "w1" }' execute 'a word w1 l2 b' assert_output_json_eq '{ "l2": "l2", "w1": "w1" }' execute 'a word w1 l3 b' assert_output_json_eq '{ "w1": "w1" }' cleanup_tmp_files liblognorm-2.1.0/tests/string_rb_simple.sh000077500000000000000000000003661520037563000207270ustar00rootroot00000000000000#!/bin/bash # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple rulebase via string" execute_with_string 'rule=:%w:word%' 'test' assert_output_json_eq '{ "w": "test" }' cleanup_tmp_files liblognorm-2.1.0/tests/string_rb_simple_2_lines.sh000077500000000000000000000011751520037563000223410ustar00rootroot00000000000000#!/bin/bash # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple rulebase via string" execute_with_string 'rule=:%w:word% rule=:%n:number%' 'test' assert_output_json_eq '{ "w": "test" }' execute_with_string 'rule=:%w:word% rule=:%n:number%' '2' assert_output_json_eq '{ "n": "2" }' #This is a correct word... execute_with_string 'rule=:%w:word% rule=:%n:number%' '2.3' assert_output_json_eq '{ "w": "2.3" }' #check error case execute_with_string 'rule=:%w:word% rule=:%n:number%' '2 3' assert_output_json_eq '{ "originalmsg": "2 3", "unparsed-data": " 3" }' cleanup_tmp_files liblognorm-2.1.0/tests/turbo_smoke.sh000077500000000000000000000007671520037563000177230ustar00rootroot00000000000000#!/bin/bash # added 2026-04-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 srcdir="${srcdir:-.}" # shellcheck disable=SC1091 . "$srcdir"/exec.sh export ln_opts='-oturbo' test_def "$0" "TurboVM end-to-end smoke path" add_rule 'version=2' add_rule 'rule=:%source.ip:ipv4% count=%count:number% msg=%msg:word%' execute '192.0.2.5 count=42 msg=ready' assert_output_json_eq '{ "source": { "ip": "192.0.2.5" }, "count": 42, "msg": "ready" }' cleanup_tmp_files liblognorm-2.1.0/tests/turbo_test_arena.c000066400000000000000000000706401520037563000205340ustar00rootroot00000000000000/** * @file turbo_test_arena.c * @brief Comprehensive test suite for turbo_arena * * Coverage: * - Lifecycle: init (default, sized, static), destroy (idempotent, NULL-safe) * - Allocation: basic, alignment (8/16/32/64), zero-size, exhaustion, calloc * - Invalid alignments: 0, non-power-of-2, excessive * - String operations: strdup, strndup (with embedded NUL), memdup * - Reset and backtracking: reset, mark/restore, nested marks * - Query functions: capacity, used, available, peak, has_space * - Statistics: get_stats, NULL safety * - Stress: many small allocations, alignment stress * - Macros: LN_ARENA_NEW, LN_ARENA_ARRAY, LN_ARENA_NEW_ALIGNED * - Peak tracking: preserved across reset, monotonic * - Configuration constants: LN_ARENA_DEFAULT_CAPACITY, MIN, MAX * * @author Jeremie Jourdin / Advens * @copyright 2026 Advens. Released under ASL 2.0. */ #include "config.h" #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #ifdef ENABLE_TURBO #include "turbo_arena.h" #include #include #include #include /*============================================================================ * Test Framework *============================================================================*/ static int tests_run = 0; static int tests_passed = 0; static int tests_failed = 0; #define TEST_ASSERT(cond, msg) do { \ if (!(cond)) { \ fprintf(stderr, " FAIL: %s (line %d)\n", msg, __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b, msg) do { \ if ((a) != (b)) { \ fprintf(stderr, " FAIL: %s - got %lld, expected %lld (line %d)\n", \ msg, (long long)(a), (long long)(b), __LINE__); \ return 0; \ } \ } while(0) #define RUN_TEST(test_func) do { \ tests_run++; \ printf(" Running %s... ", #test_func); \ fflush(stdout); \ if (test_func()) { \ tests_passed++; \ printf("OK\n"); \ } else { \ tests_failed++; \ printf("FAILED\n"); \ } \ } while(0) /*============================================================================ * Configuration Constants Tests *============================================================================*/ static int test_constants(void) { TEST_ASSERT(LN_ARENA_DEFAULT_CAPACITY == 16384, "default capacity should be 16KB"); TEST_ASSERT(LN_ARENA_MIN_CAPACITY == 64, "min capacity should be 64 bytes"); TEST_ASSERT(LN_ARENA_MAX_CAPACITY == (16 * 1024 * 1024), "max capacity should be 16MB"); TEST_ASSERT(LN_ARENA_DEFAULT_ALIGN == 8, "default alignment should be 8 bytes"); TEST_ASSERT(LN_ARENA_CACHE_LINE == 64, "cache line should be 64 bytes"); return 1; } /*============================================================================ * Basic Lifecycle Tests *============================================================================*/ static int test_init_default(void) { ln_arena_t arena; int r = ln_arena_init(&arena); TEST_ASSERT_EQ(r, LN_ARENA_OK, "init should succeed"); TEST_ASSERT(arena.base != NULL, "base should be allocated"); TEST_ASSERT_EQ((long long)arena.capacity, LN_ARENA_DEFAULT_CAPACITY, "capacity should be default"); TEST_ASSERT_EQ((long long)arena.used, 0, "used should be 0"); TEST_ASSERT_EQ((long long)arena.peak, 0, "peak should be 0"); TEST_ASSERT(arena.flags & LN_ARENA_FLAG_OWNED, "should own memory"); ln_arena_destroy(&arena); TEST_ASSERT(arena.base == NULL, "base should be NULL after destroy"); return 1; } static int test_init_sized(void) { ln_arena_t arena; /* Test with various sizes */ int r = ln_arena_init_sized(&arena, 4096); TEST_ASSERT_EQ(r, LN_ARENA_OK, "init_sized should succeed"); TEST_ASSERT(arena.capacity >= 4096, "capacity should be at least 4096"); ln_arena_destroy(&arena); /* Test minimum clamping */ r = ln_arena_init_sized(&arena, 1); TEST_ASSERT_EQ(r, LN_ARENA_OK, "init_sized with small value should succeed"); TEST_ASSERT(arena.capacity >= LN_ARENA_MIN_CAPACITY, "capacity should be clamped to minimum"); ln_arena_destroy(&arena); /* Test maximum clamping */ r = ln_arena_init_sized(&arena, SIZE_MAX); TEST_ASSERT_EQ(r, LN_ARENA_OK, "init_sized with huge value should succeed"); TEST_ASSERT(arena.capacity <= LN_ARENA_MAX_CAPACITY, "capacity should be clamped to maximum"); ln_arena_destroy(&arena); return 1; } static int test_init_static(void) { ln_arena_t arena; uint8_t buffer[1024]; int r = ln_arena_init_static(&arena, buffer, sizeof(buffer)); TEST_ASSERT_EQ(r, LN_ARENA_OK, "init_static should succeed"); TEST_ASSERT(arena.base == buffer, "base should point to provided buffer"); TEST_ASSERT_EQ((long long)arena.capacity, (long long)sizeof(buffer), "capacity should match buffer size"); TEST_ASSERT(arena.flags & LN_ARENA_FLAG_STATIC, "should be marked static"); TEST_ASSERT(!(arena.flags & LN_ARENA_FLAG_OWNED), "should not own memory"); /* Destroy should not crash or free the static buffer */ ln_arena_destroy(&arena); TEST_ASSERT(arena.base == NULL, "base should be NULL after destroy"); /* Buffer should still be usable (not freed) */ buffer[0] = 42; TEST_ASSERT_EQ(buffer[0], 42, "static buffer should still be valid"); return 1; } static int test_init_errors(void) { int r; ln_arena_t arena; /* NULL arena */ r = ln_arena_init(NULL); TEST_ASSERT_EQ(r, LN_ARENA_EINVAL, "init with NULL should fail"); r = ln_arena_init_sized(NULL, 1024); TEST_ASSERT_EQ(r, LN_ARENA_EINVAL, "init_sized with NULL should fail"); /* Static buffer errors */ r = ln_arena_init_static(NULL, (void *)1, 1024); TEST_ASSERT_EQ(r, LN_ARENA_EINVAL, "init_static with NULL arena should fail"); r = ln_arena_init_static(&arena, NULL, 1024); TEST_ASSERT_EQ(r, LN_ARENA_EINVAL, "init_static with NULL buffer should fail"); r = ln_arena_init_static(&arena, (void *)1, 1); TEST_ASSERT_EQ(r, LN_ARENA_EINVAL, "init_static with tiny buffer should fail"); return 1; } static int test_destroy_idempotent(void) { ln_arena_t arena; /* Destroy NULL should be safe */ ln_arena_destroy(NULL); /* Double destroy should be safe */ ln_arena_init(&arena); ln_arena_destroy(&arena); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Allocation Tests *============================================================================*/ static int test_alloc_basic(void) { ln_arena_t arena; ln_arena_init(&arena); void *p1 = ln_arena_alloc(&arena, 100); TEST_ASSERT(p1 != NULL, "first alloc should succeed"); void *p2 = ln_arena_alloc(&arena, 200); TEST_ASSERT(p2 != NULL, "second alloc should succeed"); TEST_ASSERT(p2 != p1, "allocations should be distinct"); TEST_ASSERT((uintptr_t)p2 > (uintptr_t)p1, "allocations should grow upward"); TEST_ASSERT_EQ(arena.alloc_count, 2, "alloc_count should be 2"); TEST_ASSERT(arena.used >= 300, "used should be at least 300"); ln_arena_destroy(&arena); return 1; } static int test_alloc_alignment(void) { ln_arena_t arena; ln_arena_init(&arena); /* Default alignment should be 8 bytes */ void *p1 = ln_arena_alloc(&arena, 1); TEST_ASSERT(((uintptr_t)p1 % LN_ARENA_DEFAULT_ALIGN) == 0, "default alloc should be aligned"); /* Explicit alignments */ void *p16 = ln_arena_alloc_aligned(&arena, 32, 16); TEST_ASSERT(p16 != NULL, "16-byte aligned alloc should succeed"); TEST_ASSERT(((uintptr_t)p16 % 16) == 0, "should be 16-byte aligned"); void *p32 = ln_arena_alloc_aligned(&arena, 64, 32); TEST_ASSERT(p32 != NULL, "32-byte aligned alloc should succeed"); TEST_ASSERT(((uintptr_t)p32 % 32) == 0, "should be 32-byte aligned"); void *p64 = ln_arena_alloc_aligned(&arena, 128, 64); TEST_ASSERT(p64 != NULL, "64-byte aligned alloc should succeed"); TEST_ASSERT(((uintptr_t)p64 % 64) == 0, "should be 64-byte aligned"); /* Invalid alignments */ void *bad1 = ln_arena_alloc_aligned(&arena, 32, 0); TEST_ASSERT(bad1 == NULL, "0 alignment should fail"); void *bad2 = ln_arena_alloc_aligned(&arena, 32, 3); TEST_ASSERT(bad2 == NULL, "non-power-of-2 alignment should fail"); void *bad3 = ln_arena_alloc_aligned(&arena, 32, 8192); TEST_ASSERT(bad3 == NULL, "excessive alignment should fail"); ln_arena_destroy(&arena); return 1; } static int test_alloc_zero_size(void) { ln_arena_t arena; ln_arena_init(&arena); /* Zero-size allocation should return valid unique pointer */ void *p1 = ln_arena_alloc(&arena, 0); TEST_ASSERT(p1 != NULL, "zero-size alloc should succeed"); void *p2 = ln_arena_alloc(&arena, 0); TEST_ASSERT(p2 != NULL, "second zero-size alloc should succeed"); TEST_ASSERT(p2 != p1, "zero-size allocations should be distinct"); ln_arena_destroy(&arena); return 1; } static int test_alloc_exhaustion(void) { ln_arena_t arena; uint8_t buffer[256]; ln_arena_init_static(&arena, buffer, sizeof(buffer)); /* Fill most of the arena */ void *p1 = ln_arena_alloc(&arena, 200); TEST_ASSERT(p1 != NULL, "first large alloc should succeed"); /* This should fail - not enough space */ void *p2 = ln_arena_alloc(&arena, 100); TEST_ASSERT(p2 == NULL, "alloc beyond capacity should fail"); /* Smaller alloc might still succeed depending on alignment padding */ void *p3 = ln_arena_alloc(&arena, 10); (void)p3; /* May or may not be NULL depending on alignment */ ln_arena_destroy(&arena); return 1; } static int test_calloc(void) { ln_arena_t arena; ln_arena_init(&arena); /* Allocate and verify zeroed */ uint8_t *p = (uint8_t *)ln_arena_calloc(&arena, 256); TEST_ASSERT(p != NULL, "calloc should succeed"); int all_zero = 1; for (int i = 0; i < 256; i++) { if (p[i] != 0) { all_zero = 0; break; } } TEST_ASSERT(all_zero, "calloc memory should be zeroed"); ln_arena_destroy(&arena); return 1; } static int test_alloc_write_read(void) { ln_arena_t arena; ln_arena_init(&arena); /* Allocate and write data, then verify it's still there */ char *p1 = (char *)ln_arena_alloc(&arena, 32); TEST_ASSERT(p1 != NULL, "alloc should succeed"); memcpy(p1, "hello world", 12); int *p2 = (int *)ln_arena_alloc(&arena, sizeof(int) * 10); TEST_ASSERT(p2 != NULL, "alloc should succeed"); for (int i = 0; i < 10; i++) p2[i] = i * 100; /* Verify first allocation is still intact */ TEST_ASSERT(memcmp(p1, "hello world", 12) == 0, "first allocation data should be intact"); /* Verify second allocation */ for (int i = 0; i < 10; i++) { TEST_ASSERT_EQ(p2[i], i * 100, "array data should be intact"); } ln_arena_destroy(&arena); return 1; } /*============================================================================ * String Operations Tests *============================================================================*/ static int test_strdup(void) { ln_arena_t arena; ln_arena_init(&arena); const char *original = "Hello, World!"; char *copy = ln_arena_strdup(&arena, original); TEST_ASSERT(copy != NULL, "strdup should succeed"); TEST_ASSERT(copy != original, "copy should be different pointer"); TEST_ASSERT(strcmp(copy, original) == 0, "copy should match original"); /* NULL string */ char *null_copy = ln_arena_strdup(&arena, NULL); TEST_ASSERT(null_copy == NULL, "strdup of NULL should return NULL"); /* Empty string */ char *empty = ln_arena_strdup(&arena, ""); TEST_ASSERT(empty != NULL, "strdup of empty string should succeed"); TEST_ASSERT(empty[0] == '\0', "empty string copy should be empty"); ln_arena_destroy(&arena); return 1; } static int test_strndup(void) { ln_arena_t arena; ln_arena_init(&arena); const char *original = "Hello, World!"; /* Copy partial string */ char *partial = ln_arena_strndup(&arena, original, 5); TEST_ASSERT(partial != NULL, "strndup should succeed"); TEST_ASSERT(strcmp(partial, "Hello") == 0, "partial copy should match"); TEST_ASSERT_EQ((long long)strlen(partial), 5, "partial copy length should be 5"); /* Copy full string by specifying exact length */ size_t orig_len = strlen(original); char *full = ln_arena_strndup(&arena, original, orig_len); TEST_ASSERT(full != NULL, "strndup with exact len should succeed"); TEST_ASSERT(strcmp(full, original) == 0, "full copy should match original"); /* Copy binary data (contains null byte) */ const char binary[] = "AB\0CD"; char *bin_copy = ln_arena_strndup(&arena, binary, 5); TEST_ASSERT(bin_copy != NULL, "strndup of binary should succeed"); TEST_ASSERT(bin_copy[0] == 'A', "first byte should be A"); TEST_ASSERT(bin_copy[2] == '\0', "embedded null should be preserved"); TEST_ASSERT(bin_copy[3] == 'C', "byte after null should be C"); TEST_ASSERT(bin_copy[5] == '\0', "should be null-terminated"); ln_arena_destroy(&arena); return 1; } static int test_memdup(void) { ln_arena_t arena; ln_arena_init(&arena); uint8_t data[] = {1, 2, 3, 4, 5, 0, 7, 8}; uint8_t *copy = (uint8_t *)ln_arena_memdup(&arena, data, sizeof(data)); TEST_ASSERT(copy != NULL, "memdup should succeed"); TEST_ASSERT(memcmp(copy, data, sizeof(data)) == 0, "memdup should match"); /* NULL source */ void *null_copy = ln_arena_memdup(&arena, NULL, 10); TEST_ASSERT(null_copy == NULL, "memdup of NULL should return NULL"); /* Zero size */ void *zero_copy = ln_arena_memdup(&arena, data, 0); TEST_ASSERT(zero_copy == NULL, "memdup of size 0 should return NULL"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Reset and Backtracking Tests *============================================================================*/ static int test_reset(void) { ln_arena_t arena; ln_arena_init(&arena); /* Allocate some memory */ ln_arena_alloc(&arena, 100); ln_arena_alloc(&arena, 200); ln_arena_alloc(&arena, 300); size_t used_before = arena.used; size_t peak_before = arena.peak; TEST_ASSERT(used_before > 0, "should have used some memory"); TEST_ASSERT_EQ(arena.alloc_count, 3, "should have 3 allocations"); /* Reset */ ln_arena_reset(&arena); TEST_ASSERT_EQ((long long)arena.used, 0, "used should be 0 after reset"); TEST_ASSERT_EQ(arena.alloc_count, 0, "alloc_count should be 0 after reset"); TEST_ASSERT_EQ((long long)arena.peak, (long long)peak_before, "peak should be preserved after reset"); /* Should be able to allocate again */ void *p = ln_arena_alloc(&arena, 100); TEST_ASSERT(p != NULL, "should be able to alloc after reset"); ln_arena_destroy(&arena); return 1; } static int test_mark_restore(void) { ln_arena_t arena; ln_arena_init(&arena); /* Allocate initial data */ char *p1 = ln_arena_strdup(&arena, "first"); TEST_ASSERT(p1 != NULL, "first strdup should succeed"); /* Save mark */ ln_arena_mark_t mark; ln_arena_save(&arena, &mark); size_t used_at_mark = arena.used; /* Allocate more data */ char *p2 = ln_arena_strdup(&arena, "second"); char *p3 = ln_arena_strdup(&arena, "third"); TEST_ASSERT(p2 != NULL && p3 != NULL, "additional allocations should succeed"); TEST_ASSERT(arena.used > used_at_mark, "should have used more memory"); /* Restore to mark */ ln_arena_restore(&arena, &mark); TEST_ASSERT_EQ((long long)arena.used, (long long)used_at_mark, "used should be restored"); /* p2 and p3 are now invalid, but p1 should still be valid */ TEST_ASSERT(strcmp(p1, "first") == 0, "p1 should still be valid"); /* New allocation should reuse space */ char *p4 = ln_arena_strdup(&arena, "new"); TEST_ASSERT(p4 != NULL, "allocation after restore should succeed"); ln_arena_destroy(&arena); return 1; } static int test_nested_marks(void) { ln_arena_t arena; ln_arena_init(&arena); ln_arena_strdup(&arena, "level0"); ln_arena_mark_t mark1; ln_arena_save(&arena, &mark1); size_t used1 = arena.used; ln_arena_strdup(&arena, "level1"); ln_arena_mark_t mark2; ln_arena_save(&arena, &mark2); size_t used2 = arena.used; ln_arena_strdup(&arena, "level2"); size_t used3 = arena.used; TEST_ASSERT(used1 < used2 && used2 < used3, "usage should increase"); /* Restore to mark2 */ ln_arena_restore(&arena, &mark2); TEST_ASSERT_EQ((long long)arena.used, (long long)used2, "should restore to mark2"); /* Restore to mark1 */ ln_arena_restore(&arena, &mark1); TEST_ASSERT_EQ((long long)arena.used, (long long)used1, "should restore to mark1"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Query Function Tests *============================================================================*/ static int test_query_functions(void) { ln_arena_t arena; ln_arena_init_sized(&arena, 1024); TEST_ASSERT(ln_arena_capacity(&arena) >= 1024, "capacity should be at least 1024"); TEST_ASSERT_EQ((long long)ln_arena_used(&arena), 0, "initial used should be 0"); TEST_ASSERT_EQ((long long)ln_arena_available(&arena), (long long)ln_arena_capacity(&arena), "initial available should equal capacity"); TEST_ASSERT_EQ((long long)ln_arena_peak(&arena), 0, "initial peak should be 0"); /* Allocate some memory */ ln_arena_alloc(&arena, 100); TEST_ASSERT(ln_arena_used(&arena) >= 100, "used should be at least 100"); TEST_ASSERT(ln_arena_available(&arena) < ln_arena_capacity(&arena), "available should decrease"); TEST_ASSERT_EQ((long long)ln_arena_peak(&arena), (long long)ln_arena_used(&arena), "peak should equal used"); TEST_ASSERT(ln_arena_has_space(&arena, 100), "should have space for 100"); TEST_ASSERT(!ln_arena_has_space(&arena, 100000), "should not have space for 100000"); ln_arena_destroy(&arena); /* Query on NULL should return 0/safe values */ TEST_ASSERT_EQ((long long)ln_arena_capacity(NULL), 0, "NULL capacity should be 0"); TEST_ASSERT_EQ((long long)ln_arena_used(NULL), 0, "NULL used should be 0"); TEST_ASSERT_EQ((long long)ln_arena_available(NULL), 0, "NULL available should be 0"); TEST_ASSERT(!ln_arena_has_space(NULL, 1), "NULL should not have space"); return 1; } static int test_stats(void) { ln_arena_t arena; ln_arena_init_sized(&arena, 4096); ln_arena_alloc(&arena, 100); ln_arena_alloc(&arena, 200); ln_arena_alloc(&arena, 300); ln_arena_stats_t stats; ln_arena_get_stats(&arena, &stats); TEST_ASSERT(stats.capacity >= 4096, "stats capacity should match"); TEST_ASSERT(stats.used >= 600, "stats used should be at least 600"); TEST_ASSERT_EQ((long long)stats.peak, (long long)stats.used, "stats peak should equal used"); TEST_ASSERT_EQ((long long)stats.available, (long long)(stats.capacity - stats.used), "stats available should be correct"); TEST_ASSERT_EQ(stats.alloc_count, 3, "stats alloc_count should be 3"); TEST_ASSERT(stats.utilization > 0.0 && stats.utilization < 1.0, "utilization should be between 0 and 1"); /* Stats on NULL */ ln_arena_stats_t null_stats; memset(&null_stats, 0xFF, sizeof(null_stats)); ln_arena_get_stats(NULL, &null_stats); TEST_ASSERT_EQ((long long)null_stats.capacity, 0, "NULL arena stats should be zeroed"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Peak Tracking Tests *============================================================================*/ static int test_peak_tracking(void) { ln_arena_t arena; ln_arena_init(&arena); /* Allocate some, note peak */ ln_arena_alloc(&arena, 500); size_t peak1 = ln_arena_peak(&arena); TEST_ASSERT(peak1 >= 500, "peak should be >= 500"); /* Allocate more */ ln_arena_alloc(&arena, 300); size_t peak2 = ln_arena_peak(&arena); TEST_ASSERT(peak2 >= peak1, "peak should be monotonically increasing"); /* Reset and re-allocate less */ ln_arena_reset(&arena); ln_arena_alloc(&arena, 100); size_t peak3 = ln_arena_peak(&arena); TEST_ASSERT_EQ((long long)peak3, (long long)peak2, "peak should survive reset (high water mark)"); /* Allocate past previous peak */ ln_arena_alloc(&arena, 1000); size_t peak4 = ln_arena_peak(&arena); TEST_ASSERT(peak4 > peak2, "peak should increase when exceeding HWM"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Stress Tests *============================================================================*/ static int test_many_allocations(void) { ln_arena_t arena; ln_arena_init_sized(&arena, 1024 * 1024); /* 1MB */ /* Many small allocations */ for (int i = 0; i < 10000; i++) { void *p = ln_arena_alloc(&arena, 16); if (!p) { TEST_ASSERT(0, "allocation should not fail with 1MB arena"); } } TEST_ASSERT_EQ(arena.alloc_count, 10000, "should have 10000 allocations"); /* Reset and do it again */ ln_arena_reset(&arena); TEST_ASSERT_EQ(arena.alloc_count, 0, "alloc_count should reset"); for (int i = 0; i < 10000; i++) { void *p = ln_arena_alloc(&arena, 16); TEST_ASSERT(p != NULL, "allocation after reset should succeed"); } ln_arena_destroy(&arena); return 1; } static int test_alignment_stress(void) { ln_arena_t arena; ln_arena_init_sized(&arena, 1024 * 1024); /* Mix of different alignments */ size_t alignments[] = {1, 2, 4, 8, 16, 32, 64}; int num_alignments = sizeof(alignments) / sizeof(alignments[0]); for (int i = 0; i < 1000; i++) { size_t align = alignments[i % num_alignments]; size_t size = (size_t)(i % 100) + 1; void *p = ln_arena_alloc_aligned(&arena, size, align); TEST_ASSERT(p != NULL, "aligned alloc should succeed"); TEST_ASSERT(((uintptr_t)p % align) == 0, "alignment should be correct"); } ln_arena_destroy(&arena); return 1; } /*============================================================================ * Macro Tests *============================================================================*/ static int test_macros(void) { ln_arena_t arena; ln_arena_init(&arena); /* Test LN_ARENA_NEW */ typedef struct { int x; double y; char z[32]; } test_struct_t; test_struct_t *obj = LN_ARENA_NEW(&arena, test_struct_t); TEST_ASSERT(obj != NULL, "LN_ARENA_NEW should succeed"); obj->x = 42; obj->y = 3.14; strcpy(obj->z, "test"); /* Test LN_ARENA_ARRAY */ int *arr = LN_ARENA_ARRAY(&arena, int, 100); TEST_ASSERT(arr != NULL, "LN_ARENA_ARRAY should succeed"); for (int i = 0; i < 100; i++) { arr[i] = i; } /* Verify data integrity after more allocations */ TEST_ASSERT_EQ(obj->x, 42, "struct data should be intact"); TEST_ASSERT_EQ(arr[50], 50, "array data should be intact"); /* Test LN_ARENA_NEW_ALIGNED */ typedef struct { uint8_t data[64]; } aligned_struct_t; aligned_struct_t *aligned_obj = LN_ARENA_NEW_ALIGNED(&arena, aligned_struct_t, 64); TEST_ASSERT(aligned_obj != NULL, "LN_ARENA_NEW_ALIGNED should succeed"); TEST_ASSERT(((uintptr_t)aligned_obj % 64) == 0, "should be 64-byte aligned"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Error Code Tests *============================================================================*/ static int test_error_codes(void) { /* Verify error code values are distinct and negative */ TEST_ASSERT_EQ(LN_ARENA_OK, 0, "OK should be 0"); TEST_ASSERT(LN_ARENA_EINVAL < 0, "EINVAL should be negative"); TEST_ASSERT(LN_ARENA_ENOMEM < 0, "ENOMEM should be negative"); TEST_ASSERT(LN_ARENA_EOVERFLOW < 0, "EOVERFLOW should be negative"); TEST_ASSERT(LN_ARENA_EALIGN < 0, "EALIGN should be negative"); /* All error codes should be distinct */ TEST_ASSERT(LN_ARENA_EINVAL != LN_ARENA_ENOMEM, "EINVAL != ENOMEM"); TEST_ASSERT(LN_ARENA_EINVAL != LN_ARENA_EOVERFLOW, "EINVAL != EOVERFLOW"); TEST_ASSERT(LN_ARENA_EINVAL != LN_ARENA_EALIGN, "EINVAL != EALIGN"); TEST_ASSERT(LN_ARENA_ENOMEM != LN_ARENA_EOVERFLOW, "ENOMEM != EOVERFLOW"); TEST_ASSERT(LN_ARENA_ENOMEM != LN_ARENA_EALIGN, "ENOMEM != EALIGN"); TEST_ASSERT(LN_ARENA_EOVERFLOW != LN_ARENA_EALIGN, "EOVERFLOW != EALIGN"); return 1; } /*============================================================================ * Per-Message Reuse Pattern *============================================================================*/ static int test_per_message_reuse(void) { ln_arena_t arena; ln_arena_init(&arena); /* Simulate processing 100 log messages with arena reuse */ for (int msg = 0; msg < 100; msg++) { ln_arena_reset(&arena); /* Simulate parse result allocations */ char *host = ln_arena_strdup(&arena, "fw01.prod.example.com"); char *ip = ln_arena_strndup(&arena, "192.168.1.100", 13); int *port = LN_ARENA_NEW(&arena, int); *port = 443 + msg; /* Verify data */ TEST_ASSERT(host != NULL, "host alloc should succeed"); TEST_ASSERT(ip != NULL, "ip alloc should succeed"); TEST_ASSERT_EQ(*port, 443 + msg, "port should be correct"); } /* Peak should reflect single-message usage, not accumulated */ TEST_ASSERT(ln_arena_peak(&arena) < 1024, "peak should be small (single message worth)"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Main *============================================================================*/ int main(void) { printf("=== ln_arena Comprehensive Test Suite ===\n\n"); /* Constants */ printf("Configuration constants:\n"); RUN_TEST(test_constants); RUN_TEST(test_error_codes); printf("\n"); /* Lifecycle tests */ printf("Lifecycle tests:\n"); RUN_TEST(test_init_default); RUN_TEST(test_init_sized); RUN_TEST(test_init_static); RUN_TEST(test_init_errors); RUN_TEST(test_destroy_idempotent); printf("\n"); /* Allocation tests */ printf("Allocation tests:\n"); RUN_TEST(test_alloc_basic); RUN_TEST(test_alloc_alignment); RUN_TEST(test_alloc_zero_size); RUN_TEST(test_alloc_exhaustion); RUN_TEST(test_calloc); RUN_TEST(test_alloc_write_read); printf("\n"); /* String tests */ printf("String operation tests:\n"); RUN_TEST(test_strdup); RUN_TEST(test_strndup); RUN_TEST(test_memdup); printf("\n"); /* Reset and backtracking tests */ printf("Reset and backtracking tests:\n"); RUN_TEST(test_reset); RUN_TEST(test_mark_restore); RUN_TEST(test_nested_marks); printf("\n"); /* Query tests */ printf("Query function tests:\n"); RUN_TEST(test_query_functions); RUN_TEST(test_stats); printf("\n"); /* Peak tracking */ printf("Peak tracking tests:\n"); RUN_TEST(test_peak_tracking); printf("\n"); /* Stress tests */ printf("Stress tests:\n"); RUN_TEST(test_many_allocations); RUN_TEST(test_alignment_stress); printf("\n"); /* Macro tests */ printf("Macro tests:\n"); RUN_TEST(test_macros); printf("\n"); /* Usage pattern tests */ printf("Usage pattern tests:\n"); RUN_TEST(test_per_message_reuse); printf("\n"); /* Summary */ printf("=== Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); printf("Tests failed: %d\n", tests_failed); return tests_failed > 0 ? 1 : 0; } #else /* !ENABLE_TURBO */ int main(void) { printf("Turbo mode not enabled, skipping tests.\n"); return 0; } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/tests/turbo_test_json.c000066400000000000000000001024541520037563000204160ustar00rootroot00000000000000/** * @file turbo_test_json.c * @brief Comprehensive test suite for fast JSON serialization * * Coverage: * - Empty result * - String fields (inline + external): basic, special chars, embedded NUL * - Integer fields: positive, negative, zero, extremes * - Double fields: positive, negative, zero * - Boolean fields (via type) * - Null fields (via type) * - Multiple fields * - Nested objects: one-level (source.ip), two-level (user.group.name), * mixed flat+nested, sibling nesting (source.ip + source.port) * - Tag serialization: single, multiple, empty * - JSON escaping: quotes, backslash, newline, tab, carriage return, * backspace, formfeed, control chars (\u00XX) * - Buffer overflow: small buffer rejection * - Estimate function: ln_fast_json_estimate accuracy * - Allocating version: ln_fast_to_json_alloc * - Large result: many fields in single output * * @author Jeremie Jourdin / Advens * @copyright 2026 Advens. Released under ASL 2.0. */ #include "config.h" #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #ifdef ENABLE_TURBO #include "turbo_result_fast.h" #include "turbo_arena.h" #include #include #include /*============================================================================ * Test Framework *============================================================================*/ static int tests_run = 0; static int tests_passed = 0; static int tests_failed = 0; #define TEST_ASSERT(cond, msg) do { \ if (!(cond)) { \ fprintf(stderr, " FAIL: %s (line %d)\n", msg, __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b, msg) do { \ if ((a) != (b)) { \ fprintf(stderr, " FAIL: %s - got %d, expected %d (line %d)\n", \ msg, (int)(a), (int)(b), __LINE__); \ return 0; \ } \ } while(0) #define RUN_TEST(test_func) do { \ tests_run++; \ printf(" Running %s... ", #test_func); \ fflush(stdout); \ if (test_func()) { \ tests_passed++; \ printf("OK\n"); \ } else { \ tests_failed++; \ printf("FAILED\n"); \ } \ } while(0) /*============================================================================ * Helper Functions *============================================================================*/ static int json_contains(const char *json, const char *substr) { return strstr(json, substr) != NULL; } /** Verify JSON starts with { and ends with } */ static int json_is_object(const char *json, size_t len) { return len >= 2 && json[0] == '{' && json[len - 1] == '}'; } /*============================================================================ * Empty / Minimal Tests *============================================================================*/ static int test_json_empty(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "empty result should succeed"); TEST_ASSERT(strcmp(buf, "{}") == 0, "empty result should be {}"); TEST_ASSERT_EQ((int)len, 2, "length should be 2"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * String Field Tests *============================================================================*/ static int test_json_string_field(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "server01", 8); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_is_object(buf, len), "should be valid JSON object"); TEST_ASSERT(json_contains(buf, "\"host\""), "should contain host field"); TEST_ASSERT(json_contains(buf, "\"server01\""), "should contain quoted host value"); ln_arena_destroy(&arena); return 1; } static int test_json_string_external(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Long string (external storage path) */ char long_val[100]; memset(long_val, 'z', 99); long_val[99] = '\0'; ln_fast_add_string_static(&result, "data", 4, long_val, 99); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"data\""), "should contain data field"); /* Value should have 99 z's */ TEST_ASSERT(json_contains(buf, "zzzzz"), "should contain z's"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Integer Field Tests *============================================================================*/ static int test_json_int_field(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_int_static(&result, "status", 6, 200); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"status\""), "should contain status field"); TEST_ASSERT(json_contains(buf, "200"), "should contain 200"); /* Integer should NOT be quoted */ TEST_ASSERT(!json_contains(buf, "\"200\""), "integer should not be quoted"); ln_arena_destroy(&arena); return 1; } static int test_json_int_negative(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_int_static(&result, "offset", 6, -42); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "-42"), "should contain -42"); ln_arena_destroy(&arena); return 1; } static int test_json_int_zero(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_int_static(&result, "count", 5, 0); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, ":0"), "should contain :0"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Double Field Tests *============================================================================*/ static int test_json_double_field(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_double_static(&result, "latency", 7, 1.50); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"latency\""), "should contain latency field"); TEST_ASSERT(json_contains(buf, "1.50"), "should contain 1.50"); /* Double should NOT be quoted */ TEST_ASSERT(!json_contains(buf, "\"1.50\""), "double should not be quoted"); ln_arena_destroy(&arena); return 1; } static int test_json_double_negative(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_double_static(&result, "temp", 4, -273.15); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "-273"), "should contain -273"); ln_arena_destroy(&arena); return 1; } static int test_json_double_zero(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_double_static(&result, "val", 3, 0.0); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "0.00"), "should contain 0.00"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Multiple Fields Test *============================================================================*/ static int test_json_multiple_fields(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "server01", 8); ln_fast_add_int_static(&result, "status", 6, 200); ln_fast_add_double_static(&result, "latency", 7, 0.42); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_is_object(buf, len), "should be valid JSON object"); TEST_ASSERT(json_contains(buf, "\"host\""), "should contain host"); TEST_ASSERT(json_contains(buf, "\"status\""), "should contain status"); TEST_ASSERT(json_contains(buf, "\"latency\""), "should contain latency"); /* Fields should be separated by commas */ int comma_count = 0; for (size_t i = 0; i < len; i++) { if (buf[i] == ',') comma_count++; } TEST_ASSERT(comma_count >= 2, "should have at least 2 commas for 3 fields"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Nested Object Tests *============================================================================*/ static int test_json_nested_one_level(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* "source.ip" should produce {"source":{"ip":"1.2.3.4"}} */ ln_fast_add_string_static(&result, "source.ip", 9, "1.2.3.4", 7); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"source\""), "should have source key"); TEST_ASSERT(json_contains(buf, "\"ip\""), "should have ip key"); TEST_ASSERT(json_contains(buf, "\"1.2.3.4\""), "should have ip value"); /* Should have nested structure */ TEST_ASSERT(json_contains(buf, "\"source\":{"), "should open source object"); ln_arena_destroy(&arena); return 1; } static int test_json_nested_siblings(void) { ln_arena_t arena; ln_fast_result_t result; char buf[1024]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Two fields under same prefix -> same object */ ln_fast_add_string_static(&result, "source.ip", 9, "10.0.0.1", 8); ln_fast_add_int_static(&result, "source.port", 11, 443); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"source\":{"), "should open source object"); TEST_ASSERT(json_contains(buf, "\"ip\""), "should have ip field"); TEST_ASSERT(json_contains(buf, "\"port\""), "should have port field"); TEST_ASSERT(json_contains(buf, "443"), "should have port value"); ln_arena_destroy(&arena); return 1; } static int test_json_nested_two_levels(void) { ln_arena_t arena; ln_fast_result_t result; char buf[1024]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* "user.group.name" -> {"user":{"group":{"name":"admins"}}} */ ln_fast_add_string_static(&result, "user.group.name", 15, "admins", 6); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"user\":{"), "should open user object"); TEST_ASSERT(json_contains(buf, "\"group\":{"), "should open group object"); TEST_ASSERT(json_contains(buf, "\"name\""), "should have name key"); TEST_ASSERT(json_contains(buf, "\"admins\""), "should have name value"); ln_arena_destroy(&arena); return 1; } static int test_json_nested_mixed_flat(void) { ln_arena_t arena; ln_fast_result_t result; char buf[1024]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Mix of flat and nested fields */ ln_fast_add_string_static(&result, "host", 4, "srv1", 4); ln_fast_add_string_static(&result, "source.ip", 9, "10.0.0.1", 8); ln_fast_add_int_static(&result, "status", 6, 200); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_is_object(buf, len), "should be valid JSON object"); TEST_ASSERT(json_contains(buf, "\"host\""), "should have flat host"); TEST_ASSERT(json_contains(buf, "\"source\":{"), "should have nested source"); TEST_ASSERT(json_contains(buf, "\"status\""), "should have flat status"); ln_arena_destroy(&arena); return 1; } static int test_json_nested_different_prefixes(void) { ln_arena_t arena; ln_fast_result_t result; char buf[2048]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Multiple nested prefixes — tests closing one object and opening another */ ln_fast_add_string_static(&result, "source.ip", 9, "10.0.0.1", 8); ln_fast_add_int_static(&result, "source.port", 11, 443); ln_fast_add_string_static(&result, "user.name", 9, "admin", 5); ln_fast_add_string_static(&result, "user.group.name", 15, "admins", 6); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"source\":{"), "should have source object"); TEST_ASSERT(json_contains(buf, "\"user\":{"), "should have user object"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Tag Serialization Tests *============================================================================*/ static int test_json_single_tag(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "srv1", 4); ln_fast_add_tag(&result, "web"); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"tags\""), "should have tags key"); TEST_ASSERT(json_contains(buf, "["), "should have array open"); TEST_ASSERT(json_contains(buf, "]"), "should have array close"); TEST_ASSERT(json_contains(buf, "\"web\""), "should contain tag value"); ln_arena_destroy(&arena); return 1; } static int test_json_multiple_tags(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_tag(&result, "web"); ln_fast_add_tag(&result, "http"); ln_fast_add_tag(&result, "firewall"); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"web\""), "should contain 'web' tag"); TEST_ASSERT(json_contains(buf, "\"http\""), "should contain 'http' tag"); TEST_ASSERT(json_contains(buf, "\"firewall\""), "should contain 'firewall' tag"); ln_arena_destroy(&arena); return 1; } static int test_json_tags_only(void) { ln_arena_t arena; ln_fast_result_t result; char buf[512]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Only tags, no fields */ ln_fast_add_tag(&result, "syslog"); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_is_object(buf, len), "should be valid JSON object"); TEST_ASSERT(json_contains(buf, "\"tags\":[\"syslog\"]"), "should have tags array with syslog"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * JSON Escaping Tests *============================================================================*/ static int test_json_escape_quotes(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "msg", 3, "say \"hi\"", 8); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\\"hi\\\""), "should escape quotes"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_backslash(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "path", 4, "C:\\Users\\test", 13); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "C:\\\\Users\\\\test"), "should escape backslashes in JSON output"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_newline(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "msg", 3, "line1\nline2", 11); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\n"), "should escape newline"); TEST_ASSERT(!json_contains(buf, "\n"), "should not contain literal newline"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_tab(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "msg", 3, "col1\tcol2", 9); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\t"), "should escape tab"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_carriage_return(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "msg", 3, "line1\r\nline2", 12); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\r"), "should escape carriage return"); TEST_ASSERT(json_contains(buf, "\\n"), "should escape newline"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_control_chars(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* 0x01 control char should produce \u0001 */ char ctrl_str[4] = {'A', 0x01, 'B', '\0'}; ln_fast_add_string_static(&result, "msg", 3, ctrl_str, 3); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\u0001"), "should escape control char as \\u00XX"); ln_arena_destroy(&arena); return 1; } static int test_json_escape_backspace_formfeed(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* \b = 0x08, \f = 0x0C */ char special[3] = {'\b', '\f', '\0'}; ln_fast_add_string_static(&result, "msg", 3, special, 2); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\\b"), "should escape backspace"); TEST_ASSERT(json_contains(buf, "\\f"), "should escape formfeed"); ln_arena_destroy(&arena); return 1; } static int test_json_clean_ascii(void) { ln_arena_t arena; ln_fast_result_t result; char buf[256]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Clean ASCII string - no escaping needed (fast path) */ ln_fast_add_string_static(&result, "msg", 3, "Hello World 123!", 16); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "should succeed"); TEST_ASSERT(json_contains(buf, "\"Hello World 123!\""), "clean ASCII should pass through unchanged"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Buffer Overflow Tests *============================================================================*/ static int test_json_buffer_too_small(void) { ln_arena_t arena; ln_fast_result_t result; char buf[4]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "field", 5, "value", 5); int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, -1, "should fail with small buffer"); ln_arena_destroy(&arena); return 1; } static int test_json_buffer_minimal(void) { ln_arena_t arena; ln_fast_result_t result; char buf[3]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Empty result needs exactly 3 bytes: "{}\0" */ int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, 0, "empty result in 3-byte buffer should succeed"); TEST_ASSERT(strcmp(buf, "{}") == 0, "should be {}"); ln_arena_destroy(&arena); return 1; } static int test_json_buffer_too_small_for_empty(void) { ln_arena_t arena; ln_fast_result_t result; char buf[2]; size_t len; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* 2 bytes is too small even for "{}\0" */ int r = ln_fast_to_json(&result, buf, sizeof(buf), &len); TEST_ASSERT_EQ(r, -1, "2-byte buffer should fail even for empty"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Estimate Tests *============================================================================*/ static int test_json_estimate(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "server01", 8); ln_fast_add_int_static(&result, "status", 6, 200); size_t est = ln_fast_json_estimate(&result); TEST_ASSERT(est > 20, "estimate should be reasonable (> 20)"); char *buf = malloc(est); TEST_ASSERT(buf != NULL, "malloc should succeed"); size_t len; int r = ln_fast_to_json(&result, buf, est, &len); TEST_ASSERT_EQ(r, 0, "estimate should be sufficient"); TEST_ASSERT(len < est, "actual should be less than estimate"); free(buf); ln_arena_destroy(&arena); return 1; } static int test_json_estimate_with_tags(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "srv1", 4); ln_fast_add_tag(&result, "web"); ln_fast_add_tag(&result, "http"); size_t est = ln_fast_json_estimate(&result); TEST_ASSERT(est > 40, "estimate with tags should be reasonable"); char *buf = malloc(est); size_t len; int r = ln_fast_to_json(&result, buf, est, &len); TEST_ASSERT_EQ(r, 0, "estimate should be sufficient for tagged result"); TEST_ASSERT(len < est, "actual should be less than estimate"); free(buf); ln_arena_destroy(&arena); return 1; } static int test_json_estimate_nested(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "user.group.name", 15, "admins", 6); ln_fast_add_string_static(&result, "source.ip", 9, "10.0.0.1", 8); size_t est = ln_fast_json_estimate(&result); TEST_ASSERT(est > 60, "estimate with nested fields should be sufficient"); char *buf = malloc(est); size_t len; int r = ln_fast_to_json(&result, buf, est, &len); TEST_ASSERT_EQ(r, 0, "estimate should be sufficient for nested result"); free(buf); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Allocating Version Tests *============================================================================*/ static int test_json_alloc_basic(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "server01", 8); ln_fast_add_int_static(&result, "status", 6, 200); char *json_str = NULL; size_t json_len = 0; int r = ln_fast_to_json_alloc(&result, &json_str, &json_len); TEST_ASSERT_EQ(r, 0, "alloc version should succeed"); TEST_ASSERT(json_str != NULL, "should allocate buffer"); TEST_ASSERT(json_len > 0, "should have non-zero length"); TEST_ASSERT(json_is_object(json_str, json_len), "should be valid JSON object"); TEST_ASSERT(json_contains(json_str, "\"host\""), "should contain host"); TEST_ASSERT(json_contains(json_str, "\"status\""), "should contain status"); free(json_str); ln_arena_destroy(&arena); return 1; } static int test_json_alloc_empty(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); char *json_str = NULL; size_t json_len = 0; int r = ln_fast_to_json_alloc(&result, &json_str, &json_len); TEST_ASSERT_EQ(r, 0, "alloc with empty result should succeed"); TEST_ASSERT(json_str != NULL, "should allocate buffer"); TEST_ASSERT(strcmp(json_str, "{}") == 0, "empty result should be {}"); free(json_str); ln_arena_destroy(&arena); return 1; } static int test_json_alloc_null_len(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "f", 1, "v", 1); char *json_str = NULL; /* json_len can be NULL (optional) */ int r = ln_fast_to_json_alloc(&result, &json_str, NULL); TEST_ASSERT_EQ(r, 0, "alloc with NULL len should succeed"); TEST_ASSERT(json_str != NULL, "should allocate buffer"); free(json_str); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Large Result Test *============================================================================*/ static int test_json_many_fields(void) { ln_arena_t arena; ln_fast_result_t result; char name[32]; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Add 20 fields of mixed types. * Names must be arena-allocated (static API stores pointer, not copy) */ for (int i = 0; i < 20; i++) { snprintf(name, sizeof(name), "field_%02d", i); const char *aname = ln_arena_strndup(&arena, name, strlen(name)); uint16_t nlen = (uint16_t)strlen(name); if (i % 3 == 0) { ln_fast_add_string_static(&result, aname, nlen, "value", 5); } else if (i % 3 == 1) { ln_fast_add_int_static(&result, aname, nlen, i * 100); } else { ln_fast_add_double_static(&result, aname, nlen, i * 0.5); } } char *json_str = NULL; size_t json_len = 0; int r = ln_fast_to_json_alloc(&result, &json_str, &json_len); TEST_ASSERT_EQ(r, 0, "should succeed with 20 fields"); TEST_ASSERT(json_str != NULL, "should allocate buffer"); TEST_ASSERT(json_is_object(json_str, json_len), "should be valid JSON object"); /* Verify all fields present */ for (int i = 0; i < 20; i++) { snprintf(name, sizeof(name), "\"field_%02d\"", i); TEST_ASSERT(json_contains(json_str, name), "should contain all fields"); } free(json_str); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Realistic Syslog-Like Output *============================================================================*/ static int test_json_syslog_like(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Typical syslog parse result */ ln_fast_add_string_static(&result, "timestamp", 9, "2026-01-15T10:30:00Z", 20); ln_fast_add_string_static(&result, "host", 4, "fw01.prod.example.com", 21); ln_fast_add_int_static(&result, "facility", 8, 1); ln_fast_add_int_static(&result, "severity", 8, 6); ln_fast_add_string_static(&result, "source.ip", 9, "192.168.1.100", 13); ln_fast_add_int_static(&result, "source.port", 11, 52340); ln_fast_add_string_static(&result, "destination.ip", 14, "10.0.0.1", 8); ln_fast_add_int_static(&result, "destination.port", 16, 443); ln_fast_add_string_static(&result, "event.action", 12, "allow", 5); ln_fast_set_rule_id(&result, "rule_fw_001"); ln_fast_add_tag(&result, "firewall"); ln_fast_add_tag(&result, "network"); char *json_str = NULL; size_t json_len = 0; int r = ln_fast_to_json_alloc(&result, &json_str, &json_len); TEST_ASSERT_EQ(r, 0, "syslog-like result should succeed"); TEST_ASSERT(json_str != NULL, "should allocate buffer"); /* Verify nested objects */ TEST_ASSERT(json_contains(json_str, "\"destination\":{"), "should have destination object"); TEST_ASSERT(json_contains(json_str, "\"source\":{"), "should have source object"); /* Verify tags */ TEST_ASSERT(json_contains(json_str, "\"tags\""), "should have tags"); TEST_ASSERT(json_contains(json_str, "\"firewall\""), "should contain firewall tag"); TEST_ASSERT(json_contains(json_str, "\"network\""), "should contain network tag"); free(json_str); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Main *============================================================================*/ int main(void) { printf("=== Fast JSON Comprehensive Test Suite ===\n\n"); /* Empty / minimal */ printf("Empty / minimal tests:\n"); RUN_TEST(test_json_empty); printf("\n"); /* String fields */ printf("String field tests:\n"); RUN_TEST(test_json_string_field); RUN_TEST(test_json_string_external); printf("\n"); /* Integer fields */ printf("Integer field tests:\n"); RUN_TEST(test_json_int_field); RUN_TEST(test_json_int_negative); RUN_TEST(test_json_int_zero); printf("\n"); /* Double fields */ printf("Double field tests:\n"); RUN_TEST(test_json_double_field); RUN_TEST(test_json_double_negative); RUN_TEST(test_json_double_zero); printf("\n"); /* Multiple fields */ printf("Multiple field tests:\n"); RUN_TEST(test_json_multiple_fields); printf("\n"); /* Nested objects */ printf("Nested object tests:\n"); RUN_TEST(test_json_nested_one_level); RUN_TEST(test_json_nested_siblings); RUN_TEST(test_json_nested_two_levels); RUN_TEST(test_json_nested_mixed_flat); RUN_TEST(test_json_nested_different_prefixes); printf("\n"); /* Tag serialization */ printf("Tag serialization tests:\n"); RUN_TEST(test_json_single_tag); RUN_TEST(test_json_multiple_tags); RUN_TEST(test_json_tags_only); printf("\n"); /* JSON escaping */ printf("JSON escaping tests:\n"); RUN_TEST(test_json_escape_quotes); RUN_TEST(test_json_escape_backslash); RUN_TEST(test_json_escape_newline); RUN_TEST(test_json_escape_tab); RUN_TEST(test_json_escape_carriage_return); RUN_TEST(test_json_escape_control_chars); RUN_TEST(test_json_escape_backspace_formfeed); RUN_TEST(test_json_clean_ascii); printf("\n"); /* Buffer overflow */ printf("Buffer overflow tests:\n"); RUN_TEST(test_json_buffer_too_small); RUN_TEST(test_json_buffer_minimal); RUN_TEST(test_json_buffer_too_small_for_empty); printf("\n"); /* Estimate */ printf("Estimate tests:\n"); RUN_TEST(test_json_estimate); RUN_TEST(test_json_estimate_with_tags); RUN_TEST(test_json_estimate_nested); printf("\n"); /* Allocating version */ printf("Allocating version tests:\n"); RUN_TEST(test_json_alloc_basic); RUN_TEST(test_json_alloc_empty); RUN_TEST(test_json_alloc_null_len); printf("\n"); /* Large result */ printf("Large result tests:\n"); RUN_TEST(test_json_many_fields); printf("\n"); /* Realistic */ printf("Realistic tests:\n"); RUN_TEST(test_json_syslog_like); printf("\n"); /* Summary */ printf("=== Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); printf("Tests failed: %d\n", tests_failed); return tests_failed > 0 ? 1 : 0; } #else /* !ENABLE_TURBO */ int main(void) { printf("Turbo mode not enabled, skipping tests.\n"); return 0; } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/tests/turbo_test_result.c000066400000000000000000001016261520037563000207630ustar00rootroot00000000000000/** * @file turbo_test_result.c * @brief Comprehensive test suite for ln_fast_result_t and snapshot * * Coverage: * - Lifecycle: init, clear, re-init * - String fields: inline (< 48B), external (>= 48B), boundary (47/48) * - Integer fields: positive, negative, zero, INT64_MIN, INT64_MAX * - Double fields: positive, negative, zero, fractional * - Tag management: add, dedup (same pointer), has_tag, overflow * - Capacity: max fields, overflow rejection * - Metadata: rule_id, original message, flags * - Nested field detection: dotted names, no dots * - Hash functions: ln_fast_hash, ln_fast_hash_n, consistency * - Snapshot: create, get, free, pointer rebasing, NULL safety * - Field flags: STATIC_NAME, STATIC_VAL, NESTED * * @author Jeremie Jourdin / Advens * @copyright 2026 Advens. Released under ASL 2.0. */ #include "config.h" #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #ifdef ENABLE_TURBO #include "turbo_result_fast.h" #include "turbo_arena.h" #include "turbo_snapshot.h" #include #include #include #include /*============================================================================ * Test Framework *============================================================================*/ static int tests_run = 0; static int tests_passed = 0; static int tests_failed = 0; #define TEST_ASSERT(cond, msg) do { \ if (!(cond)) { \ fprintf(stderr, " FAIL: %s (line %d)\n", msg, __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b, msg) do { \ if ((a) != (b)) { \ fprintf(stderr, " FAIL: %s - got %lld, expected %lld (line %d)\n", \ msg, (long long)(a), (long long)(b), __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_STR_EQ(a, b, msg) do { \ if (strcmp((a), (b)) != 0) { \ fprintf(stderr, " FAIL: %s - got \"%s\", expected \"%s\" (line %d)\n", \ msg, (a), (b), __LINE__); \ return 0; \ } \ } while(0) #define RUN_TEST(test_func) do { \ tests_run++; \ printf(" Running %s... ", #test_func); \ fflush(stdout); \ if (test_func()) { \ tests_passed++; \ printf("OK\n"); \ } else { \ tests_failed++; \ printf("FAILED\n"); \ } \ } while(0) /*============================================================================ * Lifecycle Tests *============================================================================*/ static int test_init(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); TEST_ASSERT_EQ(result.n_fields, 0, "fields should be 0"); TEST_ASSERT_EQ(result.n_tags, 0, "tags should be 0"); TEST_ASSERT_EQ(result.flags, 0, "flags should be 0"); TEST_ASSERT(result.rule_id == NULL, "rule_id should be NULL"); TEST_ASSERT(result.original == NULL, "original should be NULL"); TEST_ASSERT_EQ(result.original_len, 0, "original_len should be 0"); TEST_ASSERT(result.arena == &arena, "arena pointer should be set"); ln_arena_destroy(&arena); return 1; } static int test_clear(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Populate with data */ ln_fast_add_string_static(&result, "field1", 6, "value1", 6); ln_fast_add_tag(&result, "tag1"); ln_fast_set_rule_id(&result, "rule123"); ln_fast_set_original(&result, "test msg", 8); TEST_ASSERT_EQ(result.n_fields, 1, "should have 1 field"); TEST_ASSERT_EQ(result.n_tags, 1, "should have 1 tag"); TEST_ASSERT(result.flags != 0, "flags should be set"); /* Clear */ ln_fast_result_clear(&result); TEST_ASSERT_EQ(result.n_fields, 0, "fields should be 0 after clear"); TEST_ASSERT_EQ(result.n_tags, 0, "tags should be 0 after clear"); TEST_ASSERT_EQ(result.flags, 0, "flags should be 0 after clear"); TEST_ASSERT(result.rule_id == NULL, "rule_id should be NULL after clear"); TEST_ASSERT(result.original == NULL, "original should be NULL after clear"); TEST_ASSERT_EQ(result.original_len, 0, "original_len should be 0 after clear"); /* Arena pointer should still be valid */ TEST_ASSERT(result.arena == &arena, "arena should survive clear"); ln_arena_destroy(&arena); return 1; } static int test_clear_and_reuse(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* First message */ ln_fast_add_string_static(&result, "host", 4, "srv1", 4); ln_fast_set_rule_id(&result, "rule_a"); TEST_ASSERT_EQ(result.n_fields, 1, "first msg: 1 field"); /* Clear for next message */ ln_fast_result_clear(&result); /* Second message */ ln_fast_add_int_static(&result, "status", 6, 200); ln_fast_add_int_static(&result, "bytes", 5, 4096); ln_fast_set_rule_id(&result, "rule_b"); TEST_ASSERT_EQ(result.n_fields, 2, "second msg: 2 fields"); TEST_ASSERT_STR_EQ(result.rule_id, "rule_b", "rule_id should be rule_b"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * String Field Tests *============================================================================*/ static int test_add_string_inline(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Short string (8 < 48) should be stored inline */ int r = ln_fast_add_string_static(&result, "host", 4, "server01", 8); TEST_ASSERT_EQ(r, 0, "add_string should succeed"); TEST_ASSERT_EQ(result.n_fields, 1, "should have 1 field"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_STRING_INLINE, "short string should be inline"); TEST_ASSERT(memcmp(f->v.inl, "server01", 8) == 0, "value should match"); TEST_ASSERT_EQ(f->v.inl[8], '\0', "should be null-terminated"); TEST_ASSERT_STR_EQ(f->name, "host", "name should be 'host'"); TEST_ASSERT_EQ(f->name_len, 4, "name_len should be 4"); TEST_ASSERT(f->flags & LN_FFIELD_STATIC_NAME, "should have STATIC_NAME flag"); TEST_ASSERT(f->flags & LN_FFIELD_STATIC_VAL, "should have STATIC_VAL flag"); TEST_ASSERT(!(f->flags & LN_FFIELD_NESTED), "should not have NESTED flag"); ln_arena_destroy(&arena); return 1; } static int test_add_string_external(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* String >= 48 bytes should use external storage */ char long_value[100]; memset(long_value, 'x', sizeof(long_value)); int r = ln_fast_add_string_static(&result, "data", 4, long_value, sizeof(long_value)); TEST_ASSERT_EQ(r, 0, "add_string should succeed"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_STRING, "long string should be external"); TEST_ASSERT_EQ(f->v.str.len, sizeof(long_value), "length should match"); TEST_ASSERT(f->v.str.ptr == long_value, "pointer should reference original"); ln_arena_destroy(&arena); return 1; } static int test_add_string_boundary_47(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* 47 bytes: just under LN_FAST_INLINE_SIZE (48), should be inline */ char val47[47]; memset(val47, 'A', 47); int r = ln_fast_add_string_static(&result, "f47", 3, val47, 47); TEST_ASSERT_EQ(r, 0, "add should succeed"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_STRING_INLINE, "47-byte string should be inline"); TEST_ASSERT(memcmp(f->v.inl, val47, 47) == 0, "value should match"); TEST_ASSERT_EQ(f->v.inl[47], '\0', "should be null-terminated"); ln_arena_destroy(&arena); return 1; } static int test_add_string_boundary_48(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* 48 bytes: exactly LN_FAST_INLINE_SIZE, should be external */ char val48[48]; memset(val48, 'B', 48); int r = ln_fast_add_string_static(&result, "f48", 3, val48, 48); TEST_ASSERT_EQ(r, 0, "add should succeed"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_STRING, "48-byte string should be external"); TEST_ASSERT_EQ(f->v.str.len, 48, "length should be 48"); ln_arena_destroy(&arena); return 1; } static int test_add_string_empty(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Empty string should be inline */ int r = ln_fast_add_string_static(&result, "empty", 5, "", 0); TEST_ASSERT_EQ(r, 0, "add empty string should succeed"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_STRING_INLINE, "empty string should be inline"); TEST_ASSERT_EQ(f->v.inl[0], '\0', "should be null-terminated"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Integer Field Tests *============================================================================*/ static int test_add_int_positive(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_int_static(&result, "status", 6, 200); TEST_ASSERT_EQ(r, 0, "add_int should succeed"); TEST_ASSERT_EQ(result.n_fields, 1, "should have 1 field"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_INT, "type should be int"); TEST_ASSERT_EQ(f->v.i, 200, "value should be 200"); TEST_ASSERT(f->flags & LN_FFIELD_STATIC_NAME, "should have STATIC_NAME flag"); TEST_ASSERT(!(f->flags & LN_FFIELD_STATIC_VAL), "int should not have STATIC_VAL"); ln_arena_destroy(&arena); return 1; } static int test_add_int_negative(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_int_static(&result, "offset", 6, -42); TEST_ASSERT_EQ(r, 0, "add_int should succeed"); TEST_ASSERT_EQ(result.fields[0].v.i, -42, "value should be -42"); ln_arena_destroy(&arena); return 1; } static int test_add_int_zero(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_int_static(&result, "count", 5, 0); TEST_ASSERT_EQ(r, 0, "add_int should succeed"); TEST_ASSERT_EQ(result.fields[0].v.i, 0, "value should be 0"); ln_arena_destroy(&arena); return 1; } static int test_add_int_extremes(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_int_static(&result, "max", 3, INT64_MAX); TEST_ASSERT_EQ(r, 0, "add INT64_MAX should succeed"); TEST_ASSERT_EQ(result.fields[0].v.i, INT64_MAX, "should store INT64_MAX"); r = ln_fast_add_int_static(&result, "min", 3, INT64_MIN); TEST_ASSERT_EQ(r, 0, "add INT64_MIN should succeed"); TEST_ASSERT_EQ(result.fields[1].v.i, INT64_MIN, "should store INT64_MIN"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Double Field Tests *============================================================================*/ static int test_add_double_positive(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_double_static(&result, "latency", 7, 1.5); TEST_ASSERT_EQ(r, 0, "add_double should succeed"); const ln_fast_field_t *f = &result.fields[0]; TEST_ASSERT_EQ(f->type, LN_FTYPE_DOUBLE, "type should be double"); TEST_ASSERT(f->v.d > 1.4 && f->v.d < 1.6, "value should be ~1.5"); TEST_ASSERT(f->flags & LN_FFIELD_STATIC_NAME, "should have STATIC_NAME flag"); ln_arena_destroy(&arena); return 1; } static int test_add_double_negative(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_double_static(&result, "temp", 4, -273.15); TEST_ASSERT_EQ(r, 0, "add_double should succeed"); TEST_ASSERT(result.fields[0].v.d < -273.0, "value should be < -273.0"); ln_arena_destroy(&arena); return 1; } static int test_add_double_zero(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_double_static(&result, "rate", 4, 0.0); TEST_ASSERT_EQ(r, 0, "add_double should succeed"); TEST_ASSERT(result.fields[0].v.d == 0.0, "value should be 0.0"); ln_arena_destroy(&arena); return 1; } static int test_add_double_precise(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Test with a value that has many decimal places */ int r = ln_fast_add_double_static(&result, "pi", 2, 3.14159265358979); TEST_ASSERT_EQ(r, 0, "add_double should succeed"); TEST_ASSERT(result.fields[0].v.d > 3.141 && result.fields[0].v.d < 3.142, "value should be ~3.14159"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Tag Tests *============================================================================*/ static int test_add_tag(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); int r = ln_fast_add_tag(&result, "web"); TEST_ASSERT_EQ(r, 0, "add_tag should succeed"); TEST_ASSERT_EQ(result.n_tags, 1, "should have 1 tag"); TEST_ASSERT_STR_EQ(result.tags[0].tag, "web", "tag should be 'web'"); TEST_ASSERT(result.tags[0].hash != 0, "hash should be computed"); r = ln_fast_add_tag(&result, "http"); TEST_ASSERT_EQ(r, 0, "add second tag should succeed"); TEST_ASSERT_EQ(result.n_tags, 2, "should have 2 tags"); ln_arena_destroy(&arena); return 1; } static int test_add_tag_duplicate_same_pointer(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Same string literal (same pointer) should be deduped */ static const char *tag = "syslog"; ln_fast_add_tag(&result, tag); ln_fast_add_tag(&result, tag); /* Pointer equality check means same pointer is deduped */ TEST_ASSERT_EQ(result.n_tags, 1, "same-pointer duplicate should be deduped"); ln_arena_destroy(&arena); return 1; } static int test_has_tag(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_tag(&result, "web"); ln_fast_add_tag(&result, "http"); ln_fast_add_tag(&result, "firewall"); TEST_ASSERT(ln_fast_has_tag(&result, "web") == 1, "should have 'web' tag"); TEST_ASSERT(ln_fast_has_tag(&result, "http") == 1, "should have 'http' tag"); TEST_ASSERT(ln_fast_has_tag(&result, "firewall") == 1, "should have 'firewall' tag"); TEST_ASSERT(ln_fast_has_tag(&result, "ftp") == 0, "should not have 'ftp' tag"); TEST_ASSERT(ln_fast_has_tag(&result, "") == 0, "should not have empty tag"); ln_arena_destroy(&arena); return 1; } static int test_has_tag_null_safety(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); TEST_ASSERT(ln_fast_has_tag(NULL, "web") == 0, "NULL result should return 0"); TEST_ASSERT(ln_fast_has_tag(&result, NULL) == 0, "NULL tag should return 0"); ln_arena_destroy(&arena); return 1; } static int test_tag_overflow(void) { ln_arena_t arena; ln_fast_result_t result; char tag_name[32]; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Fill to max */ for (int i = 0; i < LN_FAST_MAX_TAGS; i++) { snprintf(tag_name, sizeof(tag_name), "tag_%d", i); int r = ln_fast_add_tag(&result, tag_name); TEST_ASSERT_EQ(r, 0, "adding tag should succeed"); } TEST_ASSERT_EQ(result.n_tags, LN_FAST_MAX_TAGS, "should have max tags"); /* One more should fail */ int r = ln_fast_add_tag(&result, "overflow_tag"); TEST_ASSERT_EQ(r, -1, "overflow should fail"); TEST_ASSERT_EQ(result.n_tags, LN_FAST_MAX_TAGS, "count should stay at max"); ln_arena_destroy(&arena); return 1; } static int test_tag_hash_bitmap(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* After adding a tag, the hash bitmap should be marked */ ln_fast_add_tag(&result, "test_tag"); uint32_t h = ln_fast_hash("test_tag"); uint8_t slot = h & (LN_FAST_TAG_HASH_SIZE - 1); TEST_ASSERT(result.tag_hash[slot] == 1, "hash bitmap should be set"); /* Clear should reset the bitmap */ ln_fast_result_clear(&result); TEST_ASSERT(result.tag_hash[slot] == 0, "hash bitmap should be cleared"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Capacity Tests *============================================================================*/ static int test_max_fields(void) { ln_arena_t arena; ln_fast_result_t result; char name[32]; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Fill to max */ for (int i = 0; i < LN_FAST_MAX_FIELDS; i++) { snprintf(name, sizeof(name), "field%d", i); int r = ln_fast_add_int_static(&result, name, (uint16_t)strlen(name), i); TEST_ASSERT_EQ(r, 0, "adding field should succeed"); } TEST_ASSERT_EQ(result.n_fields, LN_FAST_MAX_FIELDS, "should reach max"); /* One more should fail */ int r = ln_fast_add_int_static(&result, "overflow", 8, 999); TEST_ASSERT_EQ(r, -1, "overflow should fail"); TEST_ASSERT_EQ(result.n_fields, LN_FAST_MAX_FIELDS, "count should stay at max"); ln_arena_destroy(&arena); return 1; } static int test_field_overflow_all_types(void) { ln_arena_t arena; ln_fast_result_t result; char name[32]; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Fill to max with mixed types */ for (int i = 0; i < LN_FAST_MAX_FIELDS; i++) { snprintf(name, sizeof(name), "f%d", i); ln_fast_add_string_static(&result, name, (uint16_t)strlen(name), "v", 1); } /* All add functions should reject when full */ int r1 = ln_fast_add_string_static(&result, "s", 1, "v", 1); int r2 = ln_fast_add_int_static(&result, "i", 1, 42); int r3 = ln_fast_add_double_static(&result, "d", 1, 1.0); TEST_ASSERT_EQ(r1, -1, "string overflow should fail"); TEST_ASSERT_EQ(r2, -1, "int overflow should fail"); TEST_ASSERT_EQ(r3, -1, "double overflow should fail"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Metadata Tests *============================================================================*/ static int test_rule_id(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); TEST_ASSERT(result.rule_id == NULL, "initial rule_id should be NULL"); TEST_ASSERT(!(result.flags & LN_FRESULT_MATCHED), "initially not matched"); ln_fast_set_rule_id(&result, "rule_web_001"); TEST_ASSERT(result.rule_id != NULL, "rule_id should be set"); TEST_ASSERT_STR_EQ(result.rule_id, "rule_web_001", "rule_id should match"); TEST_ASSERT(result.flags & LN_FRESULT_MATCHED, "MATCHED flag should be set"); ln_arena_destroy(&arena); return 1; } static int test_original_message(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); TEST_ASSERT(!(result.flags & LN_FRESULT_HAS_ORIG), "initially no original"); const char *msg = "Jan 1 00:00:00 host test[123]: hello world"; ln_fast_set_original(&result, msg, (uint32_t)strlen(msg)); TEST_ASSERT(result.original != NULL, "original should be set"); TEST_ASSERT_EQ(result.original_len, (uint32_t)strlen(msg), "length should match"); TEST_ASSERT(memcmp(result.original, msg, strlen(msg)) == 0, "content should match"); TEST_ASSERT(result.flags & LN_FRESULT_HAS_ORIG, "HAS_ORIG flag should be set"); ln_arena_destroy(&arena); return 1; } static int test_flags_combination(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_set_rule_id(&result, "rule1"); ln_fast_set_original(&result, "msg", 3); TEST_ASSERT(result.flags & LN_FRESULT_MATCHED, "MATCHED should be set"); TEST_ASSERT(result.flags & LN_FRESULT_HAS_ORIG, "HAS_ORIG should be set"); TEST_ASSERT((result.flags & (LN_FRESULT_MATCHED | LN_FRESULT_HAS_ORIG)) == (LN_FRESULT_MATCHED | LN_FRESULT_HAS_ORIG), "both flags should coexist"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Nested Field Detection Tests *============================================================================*/ static int test_nested_field_detection(void) { /* Test the ln_ffield_detect_nested inline function */ TEST_ASSERT_EQ(ln_ffield_detect_nested("host", 4), 0, "no dot -> not nested"); TEST_ASSERT_EQ(ln_ffield_detect_nested("source.ip", 9), LN_FFIELD_NESTED, "one dot -> nested"); TEST_ASSERT_EQ(ln_ffield_detect_nested("user.group.name", 15), LN_FFIELD_NESTED, "two dots -> nested"); TEST_ASSERT_EQ(ln_ffield_detect_nested(".", 1), LN_FFIELD_NESTED, "lone dot -> nested"); TEST_ASSERT_EQ(ln_ffield_detect_nested("", 0), 0, "empty -> not nested"); return 1; } static int test_nested_flag_on_add(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Flat field */ ln_fast_add_string_static(&result, "host", 4, "srv1", 4); TEST_ASSERT(!(result.fields[0].flags & LN_FFIELD_NESTED), "flat field should not have NESTED"); /* Dotted field */ ln_fast_add_string_static(&result, "source.ip", 9, "1.2.3.4", 7); TEST_ASSERT(result.fields[1].flags & LN_FFIELD_NESTED, "dotted field should have NESTED"); /* Deeply nested */ ln_fast_add_int_static(&result, "user.group.id", 13, 42); TEST_ASSERT(result.fields[2].flags & LN_FFIELD_NESTED, "deeply dotted field should have NESTED"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Hash Function Tests *============================================================================*/ static int test_hash_functions(void) { /* ln_fast_hash (null-terminated) */ uint32_t h1 = ln_fast_hash("hello"); uint32_t h2 = ln_fast_hash("hello"); TEST_ASSERT_EQ(h1, h2, "same string should give same hash"); uint32_t h3 = ln_fast_hash("world"); TEST_ASSERT(h1 != h3, "different strings should give different hash (probabilistic)"); /* ln_fast_hash_n (length-based) */ uint32_t h4 = ln_fast_hash_n("hello", 5); TEST_ASSERT_EQ(h1, h4, "hash and hash_n should agree for same input"); /* Partial hash */ uint32_t h5 = ln_fast_hash_n("helloworld", 5); TEST_ASSERT_EQ(h4, h5, "hash_n should only hash first n bytes"); /* Empty string */ uint32_t h6 = ln_fast_hash(""); uint32_t h7 = ln_fast_hash_n("anything", 0); TEST_ASSERT_EQ(h6, h7, "empty hash should be same as zero-length hash_n"); /* Known FNV-1a basis value for empty input */ TEST_ASSERT_EQ(h6, 2166136261u, "empty hash should be FNV offset basis"); return 1; } /*============================================================================ * Field Size Static Assert (compile-time check, just verify it compiled) *============================================================================*/ static int test_field_size(void) { /* This is primarily a compile-time check (_Static_assert in header). * Just verify the sizeof at runtime too. */ TEST_ASSERT_EQ(sizeof(ln_fast_field_t), 64, "field struct should be 64 bytes"); return 1; } /*============================================================================ * Multiple Field Types in One Result *============================================================================*/ static int test_mixed_field_types(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Add all supported types */ ln_fast_add_string_static(&result, "host", 4, "srv1", 4); ln_fast_add_int_static(&result, "status", 6, 200); ln_fast_add_double_static(&result, "latency", 7, 0.42); ln_fast_set_rule_id(&result, "mixed_rule"); ln_fast_add_tag(&result, "web"); ln_fast_set_original(&result, "test msg", 8); TEST_ASSERT_EQ(result.n_fields, 3, "should have 3 fields"); TEST_ASSERT_EQ(result.fields[0].type, LN_FTYPE_STRING_INLINE, "field 0 = string"); TEST_ASSERT_EQ(result.fields[1].type, LN_FTYPE_INT, "field 1 = int"); TEST_ASSERT_EQ(result.fields[2].type, LN_FTYPE_DOUBLE, "field 2 = double"); TEST_ASSERT_EQ(result.n_tags, 1, "should have 1 tag"); TEST_ASSERT(result.flags & LN_FRESULT_MATCHED, "should be matched"); TEST_ASSERT(result.flags & LN_FRESULT_HAS_ORIG, "should have original"); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Snapshot Tests *============================================================================*/ static int test_snapshot_create_basic(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Populate result */ ln_fast_add_string_static(&result, "host", 4, "server01", 8); ln_fast_add_int_static(&result, "status", 6, 200); ln_fast_set_rule_id(&result, "rule_001"); ln_fast_add_tag(&result, "web"); /* Create snapshot */ ln_fast_result_snapshot_t *snap = ln_fast_result_snapshot_create(&result, &arena); TEST_ASSERT(snap != NULL, "snapshot should be created"); /* Get result from snapshot */ const ln_fast_result_t *sr = ln_fast_result_snapshot_get(snap); TEST_ASSERT(sr != NULL, "get should return non-NULL"); TEST_ASSERT_EQ(sr->n_fields, 2, "snapshot should have 2 fields"); TEST_ASSERT_EQ(sr->n_tags, 1, "snapshot should have 1 tag"); TEST_ASSERT_STR_EQ(sr->rule_id, "rule_001", "rule_id should match"); /* Verify field values survive snapshot */ TEST_ASSERT_EQ(sr->fields[0].type, LN_FTYPE_STRING_INLINE, "field 0 type"); TEST_ASSERT(memcmp(sr->fields[0].v.inl, "server01", 8) == 0, "field 0 value"); TEST_ASSERT_EQ(sr->fields[1].type, LN_FTYPE_INT, "field 1 type"); TEST_ASSERT_EQ(sr->fields[1].v.i, 200, "field 1 value"); /* Arena pointer should be NULL in snapshot (self-contained) */ TEST_ASSERT(sr->arena == NULL, "snapshot arena should be NULL"); ln_fast_result_snapshot_free(snap); ln_arena_destroy(&arena); return 1; } static int test_snapshot_survives_arena_reset(void) { ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); /* Use arena-allocated string (external, large) */ char *val = ln_arena_strndup(&arena, "this-is-an-arena-allocated-value-that-is-long-enough-to-be-external", 66); ln_fast_add_string_static(&result, "data", 4, val, 66); ln_fast_set_rule_id(&result, "rule_snap"); /* Snapshot before reset */ ln_fast_result_snapshot_t *snap = ln_fast_result_snapshot_create(&result, &arena); TEST_ASSERT(snap != NULL, "snapshot should be created"); /* Reset arena (invalidates original pointers) */ ln_arena_reset(&arena); /* Snapshot should still be valid */ const ln_fast_result_t *sr = ln_fast_result_snapshot_get(snap); TEST_ASSERT(sr != NULL, "snapshot should still be valid"); TEST_ASSERT_EQ(sr->n_fields, 1, "should have 1 field"); /* The external string pointer should now point into the snapshot's arena_data */ if (sr->fields[0].type == LN_FTYPE_STRING) { TEST_ASSERT(memcmp(sr->fields[0].v.str.ptr, "this-is-an-arena-allocated-value-that-is-long-enough-to-be-external", 66) == 0, "rebased pointer should have correct data"); } ln_fast_result_snapshot_free(snap); ln_arena_destroy(&arena); return 1; } static int test_snapshot_null_safety(void) { /* NULL source */ ln_fast_result_snapshot_t *snap = ln_fast_result_snapshot_create(NULL, NULL); TEST_ASSERT(snap == NULL, "NULL source should return NULL"); /* NULL get */ const ln_fast_result_t *r = ln_fast_result_snapshot_get(NULL); TEST_ASSERT(r == NULL, "NULL get should return NULL"); /* NULL free should not crash */ ln_fast_result_snapshot_free(NULL); return 1; } static int test_snapshot_no_arena(void) { /* Result without arena data (all inline/static strings) */ ln_arena_t arena; ln_fast_result_t result; ln_arena_init(&arena); ln_fast_result_init(&result, &arena); ln_fast_add_string_static(&result, "host", 4, "srv1", 4); /* inline */ ln_fast_add_int_static(&result, "code", 4, 42); /* Create snapshot with arena that has no used data */ ln_arena_t empty_arena; ln_arena_init(&empty_arena); ln_fast_result_snapshot_t *snap = ln_fast_result_snapshot_create(&result, &empty_arena); TEST_ASSERT(snap != NULL, "snapshot should succeed with empty arena"); const ln_fast_result_t *sr = ln_fast_result_snapshot_get(snap); TEST_ASSERT_EQ(sr->n_fields, 2, "should have 2 fields"); TEST_ASSERT(memcmp(sr->fields[0].v.inl, "srv1", 4) == 0, "inline value preserved"); ln_fast_result_snapshot_free(snap); ln_arena_destroy(&empty_arena); ln_arena_destroy(&arena); return 1; } /*============================================================================ * Main *============================================================================*/ int main(void) { printf("=== ln_fast_result Comprehensive Test Suite ===\n\n"); /* Lifecycle tests */ printf("Lifecycle tests:\n"); RUN_TEST(test_init); RUN_TEST(test_clear); RUN_TEST(test_clear_and_reuse); printf("\n"); /* String field tests */ printf("String field tests:\n"); RUN_TEST(test_add_string_inline); RUN_TEST(test_add_string_external); RUN_TEST(test_add_string_boundary_47); RUN_TEST(test_add_string_boundary_48); RUN_TEST(test_add_string_empty); printf("\n"); /* Integer field tests */ printf("Integer field tests:\n"); RUN_TEST(test_add_int_positive); RUN_TEST(test_add_int_negative); RUN_TEST(test_add_int_zero); RUN_TEST(test_add_int_extremes); printf("\n"); /* Double field tests */ printf("Double field tests:\n"); RUN_TEST(test_add_double_positive); RUN_TEST(test_add_double_negative); RUN_TEST(test_add_double_zero); RUN_TEST(test_add_double_precise); printf("\n"); /* Tag tests */ printf("Tag tests:\n"); RUN_TEST(test_add_tag); RUN_TEST(test_add_tag_duplicate_same_pointer); RUN_TEST(test_has_tag); RUN_TEST(test_has_tag_null_safety); RUN_TEST(test_tag_overflow); RUN_TEST(test_tag_hash_bitmap); printf("\n"); /* Capacity tests */ printf("Capacity tests:\n"); RUN_TEST(test_max_fields); RUN_TEST(test_field_overflow_all_types); printf("\n"); /* Metadata tests */ printf("Metadata tests:\n"); RUN_TEST(test_rule_id); RUN_TEST(test_original_message); RUN_TEST(test_flags_combination); printf("\n"); /* Nested field detection */ printf("Nested field detection tests:\n"); RUN_TEST(test_nested_field_detection); RUN_TEST(test_nested_flag_on_add); printf("\n"); /* Hash functions */ printf("Hash function tests:\n"); RUN_TEST(test_hash_functions); printf("\n"); /* Structure tests */ printf("Structure tests:\n"); RUN_TEST(test_field_size); RUN_TEST(test_mixed_field_types); printf("\n"); /* Snapshot tests */ printf("Snapshot tests:\n"); RUN_TEST(test_snapshot_create_basic); RUN_TEST(test_snapshot_survives_arena_reset); RUN_TEST(test_snapshot_null_safety); RUN_TEST(test_snapshot_no_arena); printf("\n"); /* Summary */ printf("=== Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); printf("Tests failed: %d\n", tests_failed); return tests_failed > 0 ? 1 : 0; } #else /* !ENABLE_TURBO */ int main(void) { printf("Turbo mode not enabled, skipping tests.\n"); return 0; } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/tests/turbo_test_simd.c000066400000000000000000000610371520037563000204020ustar00rootroot00000000000000/** * @file turbo_test_simd.c * @brief Comprehensive test suite for turbo_simd primitives * * Tests all SIMD-accelerated parsing functions across all backends * (SSE4.2, NEON, scalar). Exercises both fast-path (>16 byte) and * tail-path (<16 byte) code paths to ensure correctness regardless * of the compiled backend. * * @author Jérémie Jourdin / Advens * @copyright 2026 Advens. Released under ASL 2.0. */ #include "config.h" #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #ifdef ENABLE_TURBO #include "turbo_simd.h" #include #include #include /*============================================================================ * Test Framework *============================================================================*/ static int tests_run = 0; static int tests_passed = 0; static int tests_failed = 0; #define TEST_ASSERT(cond, msg) do { \ if (!(cond)) { \ fprintf(stderr, " FAIL: %s (line %d)\n", msg, __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b, msg) do { \ if ((a) != (b)) { \ fprintf(stderr, " FAIL: %s - got %lld, expected %lld (line %d)\n", \ msg, (long long)(a), (long long)(b), __LINE__); \ return 0; \ } \ } while(0) #define RUN_TEST(test_func) do { \ tests_run++; \ printf("Running %s... ", #test_func); \ fflush(stdout); \ if (test_func()) { \ tests_passed++; \ printf("OK\n"); \ } else { \ tests_failed++; \ printf("FAILED\n"); \ } \ } while(0) /*============================================================================ * Backend Tests *============================================================================*/ static int test_backend_info(void) { const char *name = ln_simd_backend_name(); TEST_ASSERT(name != NULL, "backend name should not be NULL"); int width = ln_simd_width(); TEST_ASSERT(width >= 1, "width should be at least 1"); printf("(backend: %s, width: %d) ", name, width); return 1; } /*============================================================================ * find_char Tests *============================================================================*/ static int test_find_char_basic(void) { const char *buf = "hello world"; TEST_ASSERT_EQ(ln_simd_find_char(buf, 11, 'h'), 0, "find 'h' at start"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 11, 'w'), 6, "find 'w'"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 11, 'd'), 10, "find 'd' at end"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 11, ' '), 5, "find space"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 11, 'x'), 11, "not found"); return 1; } static int test_find_char_edge(void) { const char *buf = "x"; TEST_ASSERT_EQ(ln_simd_find_char(buf, 1, 'x'), 0, "single char found"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 1, 'y'), 1, "single char not found"); TEST_ASSERT_EQ(ln_simd_find_char(buf, 0, 'x'), 0, "empty buffer"); TEST_ASSERT_EQ(ln_simd_find_char(NULL, 0, 'x'), 0, "NULL buffer"); return 1; } static int test_find_char_long(void) { /* String longer than 16 bytes to exercise SIMD register boundary */ const char *buf = "abcdefghijklmnopqrstuvwxyz0123456789"; size_t len = strlen(buf); TEST_ASSERT_EQ(ln_simd_find_char(buf, len, 'a'), 0, "find 'a' at pos 0"); TEST_ASSERT_EQ(ln_simd_find_char(buf, len, 'q'), 16, "find 'q' at pos 16 (crosses SIMD boundary)"); TEST_ASSERT_EQ(ln_simd_find_char(buf, len, '9'), 35, "find '9' near end"); TEST_ASSERT_EQ(ln_simd_find_char(buf, len, '!'), len, "not found in long string"); return 1; } /*============================================================================ * find_char_set / find_not_char_set Tests *============================================================================*/ static int test_find_char_set_basic(void) { const char *buf = "hello world"; /* Find first whitespace */ TEST_ASSERT_EQ(ln_simd_find_char_set(buf, 11, " \t\n"), 5, "find whitespace"); /* Find first colon or space */ const char *buf2 = "field:value"; TEST_ASSERT_EQ(ln_simd_find_char_set(buf2, 11, ": "), 5, "find colon or space"); /* Nothing found */ TEST_ASSERT_EQ(ln_simd_find_char_set(buf, 11, "!@#"), 11, "no match returns len"); return 1; } static int test_find_char_set_edge(void) { /* Empty buffer */ TEST_ASSERT_EQ(ln_simd_find_char_set("", 0, "abc"), 0, "empty buffer"); /* Empty char set */ TEST_ASSERT_EQ(ln_simd_find_char_set("hello", 5, ""), 5, "empty set"); /* NULL inputs */ TEST_ASSERT_EQ(ln_simd_find_char_set(NULL, 5, "abc"), 5, "NULL buffer"); TEST_ASSERT_EQ(ln_simd_find_char_set("hello", 5, NULL), 5, "NULL chars"); return 1; } static int test_find_char_set_long(void) { /* Crosses SIMD register boundaries */ const char *buf = "aaaaaaaaaaaaaaaaaaaaaaaaaaaa:rest"; size_t len = strlen(buf); TEST_ASSERT_EQ(ln_simd_find_char_set(buf, len, ":"), 28, "find colon after many a's"); return 1; } static int test_find_not_char_set_basic(void) { /* Skip digits */ const char *buf = "12345abc"; size_t pos = ln_simd_find_not_char_set(buf, 8, "0123456789"); TEST_ASSERT_EQ(pos, 5, "first non-digit"); /* All match */ pos = ln_simd_find_not_char_set("aaaa", 4, "abc"); TEST_ASSERT_EQ(pos, 4, "all chars in set returns len"); return 1; } static int test_find_not_char_set_edge(void) { /* Empty buffer */ TEST_ASSERT_EQ(ln_simd_find_not_char_set("", 0, "abc"), 0, "empty buffer"); /* NULL inputs */ TEST_ASSERT_EQ(ln_simd_find_not_char_set(NULL, 5, "abc"), 0, "NULL buffer"); TEST_ASSERT_EQ(ln_simd_find_not_char_set("hello", 5, NULL), 0, "NULL chars"); /* First char doesn't match */ TEST_ASSERT_EQ(ln_simd_find_not_char_set("xyz", 3, "abc"), 0, "first char not in set"); return 1; } /*============================================================================ * skip_space Tests *============================================================================*/ static int test_skip_space(void) { TEST_ASSERT_EQ(ln_simd_skip_space(" hello", 7), 2, "skip 2 spaces"); TEST_ASSERT_EQ(ln_simd_skip_space("hello", 5), 0, "no leading ws"); TEST_ASSERT_EQ(ln_simd_skip_space(" ", 3), 3, "all whitespace"); TEST_ASSERT_EQ(ln_simd_skip_space("", 0), 0, "empty"); TEST_ASSERT_EQ(ln_simd_skip_space("\t\n\r hello", 9), 4, "mixed whitespace"); return 1; } static int test_skip_space_long(void) { /* >16 spaces to cross SIMD boundary */ const char *buf = " hello"; TEST_ASSERT_EQ(ln_simd_skip_space(buf, strlen(buf)), 20, "skip 20 spaces (crosses SIMD boundary)"); return 1; } /*============================================================================ * skip_chars Tests *============================================================================*/ static int test_skip_chars(void) { /* Skip digits */ TEST_ASSERT_EQ(ln_simd_skip_chars("123abc", 6, "0123456789"), 3, "skip 3 digits"); /* Skip nothing */ TEST_ASSERT_EQ(ln_simd_skip_chars("abc", 3, "0123456789"), 0, "skip nothing"); /* Skip all */ TEST_ASSERT_EQ(ln_simd_skip_chars("aaa", 3, "a"), 3, "skip all"); return 1; } /*============================================================================ * word Tests *============================================================================*/ static int test_word_basic(void) { ln_span_t span; int r = ln_simd_word("hello world", 11, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(span.len, 5, "word length"); TEST_ASSERT(memcmp(span.start, "hello", 5) == 0, "word content"); return 1; } static int test_word_no_space(void) { ln_span_t span; int r = ln_simd_word("hello", 5, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(span.len, 5, "entire string is word"); return 1; } static int test_word_empty(void) { ln_span_t span; /* Starting on whitespace returns ENOTFOUND */ int r = ln_simd_word(" ", 3, &span); TEST_ASSERT_EQ(r, LN_SIMD_ENOTFOUND, "should return not found"); return 1; } static int test_word_single_char(void) { ln_span_t span; int r = ln_simd_word("a", 1, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "single char word"); TEST_ASSERT_EQ(span.len, 1, "length 1"); TEST_ASSERT(span.start[0] == 'a', "char is 'a'"); return 1; } static int test_word_long(void) { ln_span_t span; /* Word longer than 16 bytes */ const char *buf = "abcdefghijklmnopqrstuvwxyz rest"; int r = ln_simd_word(buf, strlen(buf), &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "long word"); TEST_ASSERT_EQ(span.len, 26, "26-char word"); return 1; } /*============================================================================ * char_to Tests *============================================================================*/ static int test_char_to_basic(void) { ln_span_t span; int r = ln_simd_char_to("field:value", 11, ':', &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(span.len, 5, "field length"); TEST_ASSERT(memcmp(span.start, "field", 5) == 0, "field content"); return 1; } static int test_char_to_not_found(void) { ln_span_t span; int r = ln_simd_char_to("no delimiter", 12, ':', &span); TEST_ASSERT_EQ(r, LN_SIMD_ENOTFOUND, "should return not found"); return 1; } static int test_char_to_at_end(void) { ln_span_t span; int r = ln_simd_char_to("field:", 6, ':', &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "delimiter at end"); TEST_ASSERT_EQ(span.len, 5, "field is 'field'"); return 1; } static int test_char_to_long(void) { ln_span_t span; /* Delimiter past SIMD boundary */ const char *buf = "abcdefghijklmnopqrstuvwxyz:value"; int r = ln_simd_char_to(buf, strlen(buf), ':', &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "long char_to"); TEST_ASSERT_EQ(span.len, 26, "26 chars before ':'"); return 1; } /*============================================================================ * string_to Tests *============================================================================*/ static int test_string_to_basic(void) { ln_span_t span; int r = ln_simd_string_to("data]]more", 10, "]]", 2, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(span.len, 4, "data length"); TEST_ASSERT(memcmp(span.start, "data", 4) == 0, "data content"); return 1; } static int test_string_to_not_found(void) { ln_span_t span; int r = ln_simd_string_to("no match here", 13, "]]", 2, &span); TEST_ASSERT_EQ(r, LN_SIMD_ENOTFOUND, "not found"); return 1; } static int test_string_to_single_char_delim(void) { ln_span_t span; int r = ln_simd_string_to("field:value", 11, ":", 1, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "single char delim"); TEST_ASSERT_EQ(span.len, 5, "field length"); return 1; } /*============================================================================ * number Tests *============================================================================*/ static int test_number_basic(void) { ln_number_t num; int r = ln_simd_number("42", 2, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(num.value, 42, "value should be 42"); r = ln_simd_number("-123", 4, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(num.value, -123, "value should be -123"); return 1; } static int test_number_zero(void) { ln_number_t num; int r = ln_simd_number("0", 1, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse zero"); TEST_ASSERT_EQ(num.value, 0, "value should be 0"); return 1; } static int test_number_with_trailing(void) { ln_number_t num; int r = ln_simd_number("12345abc", 8, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "should succeed"); TEST_ASSERT_EQ(num.value, 12345, "value should be 12345"); TEST_ASSERT_EQ(num.consumed, 5, "consumed should be 5"); return 1; } static int test_number_not_a_number(void) { ln_number_t num; int r = ln_simd_number("abc", 3, &num); TEST_ASSERT_EQ(r, LN_SIMD_EFORMAT, "not a number"); return 1; } static int test_number_positive_sign(void) { ln_number_t num; int r = ln_simd_number("+42", 3, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "positive sign"); TEST_ASSERT_EQ(num.value, 42, "value should be 42"); return 1; } /*============================================================================ * unsigned Tests *============================================================================*/ static int test_unsigned_basic(void) { ln_number_t num; int r = ln_simd_unsigned("42", 2, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse unsigned"); TEST_ASSERT_EQ(num.value, 42, "value should be 42"); return 1; } static int test_unsigned_rejects_negative(void) { ln_number_t num; int r = ln_simd_unsigned("-42", 3, &num); /* Should reject negative numbers */ TEST_ASSERT(r != LN_SIMD_OK, "should reject negative"); return 1; } /*============================================================================ * hex Tests *============================================================================*/ static int test_hex_basic(void) { ln_number_t num; int r = ln_simd_hex("0xFF", 4, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse hex"); TEST_ASSERT_EQ(num.value, 255, "value should be 255"); return 1; } static int test_hex_upper(void) { ln_number_t num; int r = ln_simd_hex("0XAB", 4, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse uppercase hex prefix"); TEST_ASSERT_EQ(num.value, 0xAB, "value should be 0xAB"); return 1; } static int test_hex_with_trailing(void) { ln_number_t num; int r = ln_simd_hex("0xDEADbeef rest", 15, &num); TEST_ASSERT_EQ(r, LN_SIMD_OK, "hex with trailing"); TEST_ASSERT_EQ(num.value, (int64_t)0xDEADBEEF, "value should be 0xDEADBEEF"); return 1; } /*============================================================================ * ipv4 Tests *============================================================================*/ static int test_ipv4_basic(void) { ln_ipv4_t ip; int r = ln_simd_ipv4("192.168.1.1", 11, &ip); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse IPv4"); TEST_ASSERT(ip.valid, "should be valid"); TEST_ASSERT_EQ(ip.octets[0], 192, "octet 0"); TEST_ASSERT_EQ(ip.octets[1], 168, "octet 1"); TEST_ASSERT_EQ(ip.octets[2], 1, "octet 2"); TEST_ASSERT_EQ(ip.octets[3], 1, "octet 3"); return 1; } static int test_ipv4_with_trailing(void) { ln_ipv4_t ip; int r = ln_simd_ipv4("10.0.0.1:8080", 13, &ip); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse IPv4 with trailing"); TEST_ASSERT_EQ(ip.consumed, 8, "consumed 8 bytes"); return 1; } static int test_ipv4_all_zeros(void) { ln_ipv4_t ip; int r = ln_simd_ipv4("0.0.0.0", 7, &ip); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse 0.0.0.0"); return 1; } static int test_ipv4_max(void) { ln_ipv4_t ip; int r = ln_simd_ipv4("255.255.255.255", 15, &ip); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse 255.255.255.255"); return 1; } static int test_ipv4_invalid(void) { ln_ipv4_t ip; /* Octet > 255 */ int r = ln_simd_ipv4("256.1.1.1", 9, &ip); TEST_ASSERT(r != LN_SIMD_OK, "reject 256.x.x.x"); /* Too short */ r = ln_simd_ipv4("1.2.3", 5, &ip); TEST_ASSERT(r != LN_SIMD_OK, "reject incomplete IPv4"); /* Not an IP */ r = ln_simd_ipv4("hello", 5, &ip); TEST_ASSERT(r != LN_SIMD_OK, "reject non-IP"); return 1; } /*============================================================================ * quoted Tests *============================================================================*/ static int test_quoted_double(void) { ln_span_t span; int r = ln_simd_quoted("\"hello world\"", 13, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse double-quoted"); TEST_ASSERT_EQ(span.len, 11, "content length"); TEST_ASSERT(memcmp(span.start, "hello world", 11) == 0, "content"); return 1; } static int test_quoted_single(void) { ln_span_t span; int r = ln_simd_quoted("'hello'", 7, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse single-quoted"); TEST_ASSERT_EQ(span.len, 5, "content length"); return 1; } static int test_quoted_with_escape(void) { ln_span_t span; int r = ln_simd_quoted("\"hello\\\"world\"", 14, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse with escaped quote"); return 1; } static int test_quoted_empty(void) { ln_span_t span; int r = ln_simd_quoted("\"\"", 2, &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse empty quoted"); TEST_ASSERT_EQ(span.len, 0, "empty content"); return 1; } static int test_quoted_unclosed(void) { ln_span_t span; int r = ln_simd_quoted("\"unclosed", 9, &span); TEST_ASSERT_EQ(r, LN_SIMD_EFORMAT, "reject unclosed quote"); return 1; } /*============================================================================ * bracketed Tests *============================================================================*/ static int test_bracketed_basic(void) { ln_span_t span; int r = ln_simd_bracketed("{hello}", 7, '{', '}', &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse bracketed"); TEST_ASSERT_EQ(span.len, 5, "content length"); return 1; } static int test_bracketed_nested(void) { ln_span_t span; int r = ln_simd_bracketed("{a{b}c}", 7, '{', '}', &span); TEST_ASSERT_EQ(r, LN_SIMD_OK, "parse nested brackets"); TEST_ASSERT_EQ(span.len, 5, "content includes nested"); return 1; } static int test_bracketed_unclosed(void) { ln_span_t span; int r = ln_simd_bracketed("{unclosed", 9, '{', '}', &span); TEST_ASSERT_EQ(r, LN_SIMD_EFORMAT, "reject unclosed bracket"); return 1; } /*============================================================================ * unescape Tests *============================================================================*/ static int test_unescape_basic(void) { char buf[32]; strcpy(buf, "hello\\nworld"); size_t new_len = ln_simd_unescape(buf, strlen(buf)); TEST_ASSERT_EQ(new_len, 11, "unescaped length"); TEST_ASSERT_EQ(buf[5], '\n', "newline inserted"); return 1; } static int test_unescape_backslash(void) { char buf[32]; strcpy(buf, "a\\\\b"); size_t new_len = ln_simd_unescape(buf, strlen(buf)); TEST_ASSERT_EQ(new_len, 3, "unescaped length"); TEST_ASSERT_EQ(buf[1], '\\', "literal backslash"); return 1; } static int test_unescape_tab(void) { char buf[32]; strcpy(buf, "a\\tb"); size_t new_len = ln_simd_unescape(buf, strlen(buf)); TEST_ASSERT_EQ(new_len, 3, "unescaped length"); TEST_ASSERT_EQ(buf[1], '\t', "tab inserted"); return 1; } static int test_unescape_no_escapes(void) { char buf[32]; strcpy(buf, "hello"); size_t new_len = ln_simd_unescape(buf, 5); TEST_ASSERT_EQ(new_len, 5, "unchanged"); TEST_ASSERT(memcmp(buf, "hello", 5) == 0, "content unchanged"); return 1; } /*============================================================================ * Span Utility Tests *============================================================================*/ static int test_span_eq(void) { ln_span_t a = { .start = "hello", .len = 5 }; ln_span_t b = { .start = "hello", .len = 5 }; ln_span_t c = { .start = "world", .len = 5 }; ln_span_t d = { .start = "hell", .len = 4 }; ln_span_t e = { .start = NULL, .len = 0 }; ln_span_t f = { .start = NULL, .len = 0 }; TEST_ASSERT(ln_span_eq(&a, &b), "equal spans"); TEST_ASSERT(!ln_span_eq(&a, &c), "different content"); TEST_ASSERT(!ln_span_eq(&a, &d), "different length"); TEST_ASSERT(ln_span_eq(&e, &f), "both empty"); return 1; } static int test_span_eq_str(void) { ln_span_t a = { .start = "hello", .len = 5 }; ln_span_t b = { .start = NULL, .len = 0 }; TEST_ASSERT(ln_span_eq_str(&a, "hello"), "match string"); TEST_ASSERT(!ln_span_eq_str(&a, "world"), "no match"); TEST_ASSERT(!ln_span_eq_str(&a, "hell"), "different length"); TEST_ASSERT(ln_span_eq_str(&b, ""), "empty span matches empty string"); return 1; } /*============================================================================ * Character Classification Tests *============================================================================*/ static int test_char_classification(void) { /* ln_is_space */ TEST_ASSERT(ln_is_space(' '), "space is whitespace"); TEST_ASSERT(ln_is_space('\t'), "tab is whitespace"); TEST_ASSERT(ln_is_space('\n'), "newline is whitespace"); TEST_ASSERT(ln_is_space('\r'), "CR is whitespace"); TEST_ASSERT(!ln_is_space('a'), "'a' is not whitespace"); /* ln_is_digit */ TEST_ASSERT(ln_is_digit('0'), "'0' is digit"); TEST_ASSERT(ln_is_digit('9'), "'9' is digit"); TEST_ASSERT(!ln_is_digit('a'), "'a' is not digit"); /* ln_is_alnum */ TEST_ASSERT(ln_is_alnum('a'), "'a' is alnum"); TEST_ASSERT(ln_is_alnum('Z'), "'Z' is alnum"); TEST_ASSERT(ln_is_alnum('5'), "'5' is alnum"); TEST_ASSERT(!ln_is_alnum('.'), "'.' is not alnum"); return 1; } /*============================================================================ * Main *============================================================================*/ int main(void) { printf("=== ln_simd Test Suite ===\n\n"); printf("Backend tests:\n"); RUN_TEST(test_backend_info); printf("\n"); printf("find_char tests:\n"); RUN_TEST(test_find_char_basic); RUN_TEST(test_find_char_edge); RUN_TEST(test_find_char_long); printf("\n"); printf("find_char_set tests:\n"); RUN_TEST(test_find_char_set_basic); RUN_TEST(test_find_char_set_edge); RUN_TEST(test_find_char_set_long); printf("\n"); printf("find_not_char_set tests:\n"); RUN_TEST(test_find_not_char_set_basic); RUN_TEST(test_find_not_char_set_edge); printf("\n"); printf("skip_space tests:\n"); RUN_TEST(test_skip_space); RUN_TEST(test_skip_space_long); printf("\n"); printf("skip_chars tests:\n"); RUN_TEST(test_skip_chars); printf("\n"); printf("word tests:\n"); RUN_TEST(test_word_basic); RUN_TEST(test_word_no_space); RUN_TEST(test_word_empty); RUN_TEST(test_word_single_char); RUN_TEST(test_word_long); printf("\n"); printf("char_to tests:\n"); RUN_TEST(test_char_to_basic); RUN_TEST(test_char_to_not_found); RUN_TEST(test_char_to_at_end); RUN_TEST(test_char_to_long); printf("\n"); printf("string_to tests:\n"); RUN_TEST(test_string_to_basic); RUN_TEST(test_string_to_not_found); RUN_TEST(test_string_to_single_char_delim); printf("\n"); printf("number tests:\n"); RUN_TEST(test_number_basic); RUN_TEST(test_number_zero); RUN_TEST(test_number_with_trailing); RUN_TEST(test_number_not_a_number); RUN_TEST(test_number_positive_sign); printf("\n"); printf("unsigned tests:\n"); RUN_TEST(test_unsigned_basic); RUN_TEST(test_unsigned_rejects_negative); printf("\n"); printf("hex tests:\n"); RUN_TEST(test_hex_basic); RUN_TEST(test_hex_upper); RUN_TEST(test_hex_with_trailing); printf("\n"); printf("ipv4 tests:\n"); RUN_TEST(test_ipv4_basic); RUN_TEST(test_ipv4_with_trailing); RUN_TEST(test_ipv4_all_zeros); RUN_TEST(test_ipv4_max); RUN_TEST(test_ipv4_invalid); printf("\n"); printf("quoted tests:\n"); RUN_TEST(test_quoted_double); RUN_TEST(test_quoted_single); RUN_TEST(test_quoted_with_escape); RUN_TEST(test_quoted_empty); RUN_TEST(test_quoted_unclosed); printf("\n"); printf("bracketed tests:\n"); RUN_TEST(test_bracketed_basic); RUN_TEST(test_bracketed_nested); RUN_TEST(test_bracketed_unclosed); printf("\n"); printf("unescape tests:\n"); RUN_TEST(test_unescape_basic); RUN_TEST(test_unescape_backslash); RUN_TEST(test_unescape_tab); RUN_TEST(test_unescape_no_escapes); printf("\n"); printf("span utility tests:\n"); RUN_TEST(test_span_eq); RUN_TEST(test_span_eq_str); printf("\n"); printf("char classification tests:\n"); RUN_TEST(test_char_classification); printf("\n"); /* Summary */ printf("=== Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); printf("Tests failed: %d\n", tests_failed); return tests_failed > 0 ? 1 : 0; } #else /* !ENABLE_TURBO */ int main(void) { printf("Turbo mode not enabled, skipping tests.\n"); return 0; } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/tests/turbo_test_vm.c000066400000000000000000000760501520037563000200710ustar00rootroot00000000000000/** * @file turbo_test_vm.c * @brief Comprehensive test suite for ln_turbo VM execution engine * * Tests the full VM lifecycle, all instruction categories, backtracking, * call/ret subroutines, field context stack, and all field extraction * opcodes. * * @author Jérémie Jourdin / Advens * @copyright 2026 Advens. Released under ASL 2.0. */ #include "config.h" #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" #ifdef ENABLE_TURBO #include "turbo_vm.h" #include "turbo_opcode.h" #include "turbo_arena.h" #include "turbo_result_fast.h" #include #include #include /*============================================================================ * Test Framework *============================================================================*/ static int tests_run = 0; static int tests_passed = 0; static int tests_failed = 0; #define TEST_ASSERT(cond, msg) do { \ if (!(cond)) { \ fprintf(stderr, " FAIL: %s (line %d)\n", msg, __LINE__); \ return 0; \ } \ } while(0) #define TEST_ASSERT_EQ(a, b, msg) do { \ if ((a) != (b)) { \ fprintf(stderr, " FAIL: %s - got %lld, expected %lld (line %d)\n", \ msg, (long long)(a), (long long)(b), __LINE__); \ return 0; \ } \ } while(0) #define RUN_TEST(test_func) do { \ tests_run++; \ printf("Running %s... ", #test_func); \ fflush(stdout); \ if (test_func()) { \ tests_passed++; \ printf("OK\n"); \ } else { \ tests_failed++; \ printf("FAILED\n"); \ } \ } while(0) /*============================================================================ * Test Helpers *============================================================================*/ /** Common test setup: init arena + VM + result */ static void setup_vm(ln_vm_t *vm, ln_arena_t *arena, ln_fast_result_t *result) { ln_arena_init_sized(arena, 8192); ln_vm_init(vm, arena); ln_fast_result_init(result, arena); } /** Common test teardown */ static void teardown_vm(ln_arena_t *arena) { ln_arena_destroy(arena); } /** Helper: find field by name in result, return pointer or NULL */ static const ln_fast_field_t * find_field(const ln_fast_result_t *r, const char *name) { for (int i = 0; i < r->n_fields; i++) { if (r->fields[i].name && strcmp(r->fields[i].name, name) == 0) return &r->fields[i]; } return NULL; } /** Helper: get string value from field (handles inline vs external) */ static const char * field_str(const ln_fast_field_t *f, size_t *out_len) { if (!f) { *out_len = 0; return NULL; } if (f->type == LN_FTYPE_STRING_INLINE) { *out_len = strlen(f->v.inl); return f->v.inl; } else if (f->type == LN_FTYPE_STRING) { *out_len = f->v.str.len; return f->v.str.ptr; } *out_len = 0; return NULL; } /*============================================================================ * VM Lifecycle Tests *============================================================================*/ static int test_vm_init(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); TEST_ASSERT(vm.arena != NULL, "arena should be set"); TEST_ASSERT_EQ(vm.pc, 0, "pc should be 0"); TEST_ASSERT_EQ(vm.fork_sp, 0, "fork_sp should be 0"); TEST_ASSERT_EQ(vm.call_sp, 0, "call_sp should be 0"); TEST_ASSERT_EQ(vm.field_ctx_sp, 0, "field_ctx_sp should be 0"); TEST_ASSERT(vm.matched_rule == NULL, "no match yet"); teardown_vm(&arena); return 1; } static int test_vm_init_null(void) { ln_arena_t arena; ln_arena_init_sized(&arena, 4096); TEST_ASSERT_EQ(ln_vm_init(NULL, &arena), LN_VM_ERROR, "NULL vm"); TEST_ASSERT_EQ(ln_vm_init((ln_vm_t[1]){{}}, NULL), LN_VM_ERROR, "NULL arena"); ln_arena_destroy(&arena); return 1; } static int test_vm_reset(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* Simulate some state */ vm.pc = 42; vm.fork_sp = 3; vm.matched_rule = "test"; ln_vm_reset(&vm); TEST_ASSERT_EQ(vm.pc, 0, "pc reset"); TEST_ASSERT_EQ(vm.fork_sp, 0, "fork_sp reset"); TEST_ASSERT(vm.matched_rule == NULL, "matched_rule reset"); TEST_ASSERT(vm.prog == NULL, "prog reset"); /* Reset NULL should not crash */ ln_vm_reset(NULL); teardown_vm(&arena); return 1; } /*============================================================================ * Instruction Builder Tests *============================================================================*/ static int test_instruction_builders(void) { /* HALT */ ln_instr_t i = ln_i_halt(); TEST_ASSERT_EQ(i.op, OP_HALT, "halt opcode"); /* MATCH */ i = ln_i_match("rule1"); TEST_ASSERT_EQ(i.op, OP_MATCH, "match opcode"); TEST_ASSERT(strncmp(i.data.str, "rule1", 5) == 0, "match rule name"); /* LITERAL */ i = ln_i_literal("hello", 5); TEST_ASSERT_EQ(i.op, OP_LITERAL, "literal opcode"); TEST_ASSERT_EQ(i.aux, 5, "literal length"); TEST_ASSERT(memcmp(i.data.str, "hello", 5) == 0, "literal content"); /* CHAR */ i = ln_i_char(':'); TEST_ASSERT_EQ(i.op, OP_CHAR, "char opcode"); TEST_ASSERT_EQ(i.data.str[0], ':', "char value"); /* JUMP */ i = ln_i_jump(5); TEST_ASSERT_EQ(i.op, OP_JUMP, "jump opcode"); TEST_ASSERT_EQ(i.data.jump.offset, 5, "jump offset"); /* FORK */ i = ln_i_fork(3); TEST_ASSERT_EQ(i.op, OP_FORK, "fork opcode"); TEST_ASSERT_EQ(i.data.jump.offset, 3, "fork alt offset"); /* FAIL */ i = ln_i_fail(); TEST_ASSERT_EQ(i.op, OP_FAIL, "fail opcode"); /* FIELD */ i = ln_i_field(OP_FIELD_WORD, "hostname"); TEST_ASSERT_EQ(i.op, OP_FIELD_WORD, "field_word opcode"); TEST_ASSERT_EQ(i.flags & LN_INSTR_F_STORE, LN_INSTR_F_STORE, "store flag"); TEST_ASSERT(strncmp(i.data.str, "hostname", 8) == 0, "field name"); /* FIELD_CHAR_TO */ i = ln_i_field_char_to("msg", ':'); TEST_ASSERT_EQ(i.op, OP_FIELD_CHAR_TO, "field_char_to opcode"); TEST_ASSERT_EQ(i.data.char_to.delim, ':', "delimiter"); TEST_ASSERT(strncmp(i.data.char_to.name, "msg", 3) == 0, "field name"); /* FIELD_NAME_VALUE */ i = ln_i_field_name_value("pairs", ' ', '='); TEST_ASSERT_EQ(i.op, OP_FIELD_NAME_VALUE, "field_name_value opcode"); TEST_ASSERT_EQ(i.data.char_to.delim, ' ', "separator"); TEST_ASSERT_EQ(i.data.char_to.ass, '=', "assignator"); /* SKIP_SPACE */ i = ln_i_skip_space(); TEST_ASSERT_EQ(i.op, OP_SKIP_SPACE, "skip_space opcode"); /* SKIP_N */ i = ln_i_skip_n(10); TEST_ASSERT_EQ(i.op, OP_SKIP_N, "skip_n opcode"); TEST_ASSERT_EQ(i.aux, 10, "skip count"); /* TAG */ i = ln_i_tag("syslog"); TEST_ASSERT_EQ(i.op, OP_TAG, "tag opcode"); TEST_ASSERT(strncmp(i.data.str, "syslog", 6) == 0, "tag name"); /* CTX_PUSH */ i = ln_i_ctx_push("parent"); TEST_ASSERT_EQ(i.op, OP_CTX_PUSH, "ctx_push opcode"); TEST_ASSERT(strncmp(i.data.str, "parent", 6) == 0, "ctx name"); /* CTX_POP */ i = ln_i_ctx_pop(); TEST_ASSERT_EQ(i.op, OP_CTX_POP, "ctx_pop opcode"); /* NOP */ i = ln_i_nop(); TEST_ASSERT_EQ(i.op, OP_NOP, "nop opcode"); return 1; } static int test_instruction_size(void) { TEST_ASSERT_EQ(sizeof(ln_instr_t), 64, "instruction must be 64 bytes"); return 1; } /*============================================================================ * Basic Execution Tests *============================================================================*/ static int test_vm_halt(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_halt() }; ln_program_t prog = ln_program_make(code, 1, "test_halt"); int r = ln_vm_exec(&vm, &prog, "hello", 5, &result); TEST_ASSERT_EQ(r, LN_VM_NOMATCH, "halt = no match"); TEST_ASSERT(!ln_vm_matched(&vm), "not matched"); teardown_vm(&arena); return 1; } static int test_vm_literal_match(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_literal("hello", 5), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_literal"); int r = ln_vm_exec(&vm, &prog, "hello", 5, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should match"); TEST_ASSERT(ln_vm_matched(&vm), "should be matched"); TEST_ASSERT(strcmp(vm.matched_rule, "rule1") == 0, "rule id"); TEST_ASSERT_EQ(ln_vm_consumed(&vm), 5, "consumed all input"); TEST_ASSERT_EQ(ln_vm_remaining(&vm), 0, "no remaining"); teardown_vm(&arena); return 1; } static int test_vm_literal_no_match(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_literal("hello", 5), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_literal_no"); int r = ln_vm_exec(&vm, &prog, "world", 5, &result); TEST_ASSERT_EQ(r, LN_VM_NOMATCH, "should not match"); TEST_ASSERT(!ln_vm_matched(&vm), "not matched"); teardown_vm(&arena); return 1; } static int test_vm_char_match(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_char('A'), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_char"); int r = ln_vm_exec(&vm, &prog, "A", 1, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should match"); ln_vm_reset(&vm); ln_fast_result_clear(&result); r = ln_vm_exec(&vm, &prog, "B", 1, &result); TEST_ASSERT_EQ(r, LN_VM_NOMATCH, "should not match"); teardown_vm(&arena); return 1; } static int test_vm_nop(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_nop(), ln_i_nop(), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 3, "test_nop"); int r = ln_vm_exec(&vm, &prog, "", 0, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "NOPs don't block"); teardown_vm(&arena); return 1; } /*============================================================================ * Jump and Fork Tests *============================================================================*/ static int test_vm_jump(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_jump(2), /* [0] jump to [2] */ ln_i_halt(), /* [1] skipped */ ln_i_match("rule1"),/* [2] match */ }; ln_program_t prog = ln_program_make(code, 3, "test_jump"); int r = ln_vm_exec(&vm, &prog, "", 0, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "jump should skip halt"); teardown_vm(&arena); return 1; } static int test_vm_fork_first_path(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* FORK: try literal "hello", alt -> literal "world" */ ln_instr_t code[] = { ln_i_fork(3), /* [0] try [1], alt -> [3] */ ln_i_literal("hello", 5),/* [1] */ ln_i_match("rule_hello"),/* [2] */ ln_i_literal("world", 5),/* [3] alt path */ ln_i_match("rule_world"),/* [4] */ }; ln_program_t prog = ln_program_make(code, 5, "test_fork"); int r = ln_vm_exec(&vm, &prog, "hello", 5, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should match first path"); TEST_ASSERT(strcmp(vm.matched_rule, "rule_hello") == 0, "first path rule"); teardown_vm(&arena); return 1; } static int test_vm_fork_second_path(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_fork(3), /* [0] try [1], alt -> [3] */ ln_i_literal("hello", 5),/* [1] */ ln_i_match("rule_hello"),/* [2] */ ln_i_literal("world", 5),/* [3] alt path */ ln_i_match("rule_world"),/* [4] */ }; ln_program_t prog = ln_program_make(code, 5, "test_fork2"); int r = ln_vm_exec(&vm, &prog, "world", 5, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should match second path"); TEST_ASSERT(strcmp(vm.matched_rule, "rule_world") == 0, "second path rule"); TEST_ASSERT(vm.backtrack_count > 0, "backtrack happened"); teardown_vm(&arena); return 1; } static int test_vm_fork_neither(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_fork(3), /* [0] try [1], alt -> [3] */ ln_i_literal("hello", 5),/* [1] */ ln_i_match("rule_hello"),/* [2] */ ln_i_literal("world", 5),/* [3] alt path */ ln_i_match("rule_world"),/* [4] */ }; ln_program_t prog = ln_program_make(code, 5, "test_fork_fail"); int r = ln_vm_exec(&vm, &prog, "xxxxx", 5, &result); TEST_ASSERT_EQ(r, LN_VM_NOMATCH, "neither path matches"); teardown_vm(&arena); return 1; } static int test_vm_fail_opcode(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* FORK with FAIL on first path forces backtrack */ ln_instr_t code[] = { ln_i_fork(3), /* [0] try [1], alt -> [3] */ ln_i_fail(), /* [1] force backtrack */ ln_i_halt(), /* [2] unreachable */ ln_i_match("alt"), /* [3] alt path */ }; ln_program_t prog = ln_program_make(code, 4, "test_fail"); int r = ln_vm_exec(&vm, &prog, "", 0, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should take alt path"); TEST_ASSERT(strcmp(vm.matched_rule, "alt") == 0, "alt rule matched"); teardown_vm(&arena); return 1; } /*============================================================================ * Skip Instruction Tests *============================================================================*/ static int test_vm_skip_space(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_literal("hello", 5), ln_i_skip_space(), ln_i_literal("world", 5), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 4, "test_skip_space"); int r = ln_vm_exec(&vm, &prog, "hello world", 13, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "skip space match"); teardown_vm(&arena); return 1; } static int test_vm_skip_n(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_skip_n(3), ln_i_literal("world", 5), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 3, "test_skip_n"); int r = ln_vm_exec(&vm, &prog, "123world", 8, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "skip N match"); teardown_vm(&arena); return 1; } /*============================================================================ * Field Extraction Tests *============================================================================*/ static int test_vm_field_word(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_field(OP_FIELD_WORD, "hostname"), ln_i_char(' '), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 3, "test_field_word"); int r = ln_vm_exec(&vm, &prog, "myhost rest", 11, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field word match"); const ln_fast_field_t *f = find_field(&result, "hostname"); TEST_ASSERT(f != NULL, "field found"); size_t vlen; const char *val = field_str(f, &vlen); TEST_ASSERT(val != NULL, "value not null"); TEST_ASSERT_EQ(vlen, 6, "value length"); TEST_ASSERT(memcmp(val, "myhost", 6) == 0, "value content"); teardown_vm(&arena); return 1; } static int test_vm_field_int(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_field(OP_FIELD_INT, "pid"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_field_int"); int r = ln_vm_exec(&vm, &prog, "-42", 3, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field int match"); const ln_fast_field_t *f = find_field(&result, "pid"); TEST_ASSERT(f != NULL, "field found"); TEST_ASSERT_EQ(f->type, LN_FTYPE_INT, "type is INT"); TEST_ASSERT_EQ(f->v.i, -42, "value is -42"); teardown_vm(&arena); return 1; } static int test_vm_field_rest(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_literal("prefix:", 7), ln_i_field(OP_FIELD_REST, "msg"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 3, "test_field_rest"); int r = ln_vm_exec(&vm, &prog, "prefix:everything else", 22, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field rest match"); const ln_fast_field_t *f = find_field(&result, "msg"); TEST_ASSERT(f != NULL, "field found"); size_t vlen; const char *val = field_str(f, &vlen); TEST_ASSERT(val != NULL, "value not null"); TEST_ASSERT_EQ(vlen, 15, "rest length"); TEST_ASSERT(memcmp(val, "everything else", 15) == 0, "rest content"); teardown_vm(&arena); return 1; } static int test_vm_field_char_to(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_field_char_to("key", ':'), ln_i_char(':'), ln_i_field(OP_FIELD_REST, "val"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 4, "test_field_char_to"); int r = ln_vm_exec(&vm, &prog, "mykey:myvalue", 13, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field char_to match"); const ln_fast_field_t *fk = find_field(&result, "key"); TEST_ASSERT(fk != NULL, "key field found"); size_t vlen; const char *val = field_str(fk, &vlen); TEST_ASSERT(memcmp(val, "mykey", 5) == 0, "key value"); const ln_fast_field_t *fv = find_field(&result, "val"); TEST_ASSERT(fv != NULL, "val field found"); val = field_str(fv, &vlen); TEST_ASSERT(memcmp(val, "myvalue", 7) == 0, "val value"); teardown_vm(&arena); return 1; } static int test_vm_field_ipv4(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_field(OP_FIELD_IPV4, "src_ip"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_field_ipv4"); int r = ln_vm_exec(&vm, &prog, "192.168.1.1", 11, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field ipv4 match"); const ln_fast_field_t *f = find_field(&result, "src_ip"); TEST_ASSERT(f != NULL, "field found"); size_t vlen; const char *val = field_str(f, &vlen); TEST_ASSERT(val != NULL, "value not null"); TEST_ASSERT(memcmp(val, "192.168.1.1", 11) == 0, "ipv4 value"); teardown_vm(&arena); return 1; } static int test_vm_field_quoted(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_field(OP_FIELD_QUOTED, "msg"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_field_quoted"); int r = ln_vm_exec(&vm, &prog, "\"hello world\"", 13, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "field quoted match"); const ln_fast_field_t *f = find_field(&result, "msg"); TEST_ASSERT(f != NULL, "field found"); teardown_vm(&arena); return 1; } /*============================================================================ * Tag Tests *============================================================================*/ static int test_vm_tag(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); ln_instr_t code[] = { ln_i_tag("syslog"), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 2, "test_tag"); int r = ln_vm_exec(&vm, &prog, "", 0, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "should match"); TEST_ASSERT_EQ(result.n_tags, 1, "one tag"); TEST_ASSERT(ln_fast_has_tag(&result, "syslog"), "tag 'syslog' present"); TEST_ASSERT(!ln_fast_has_tag(&result, "other"), "tag 'other' absent"); teardown_vm(&arena); return 1; } /*============================================================================ * Call/Ret (Subroutine) Tests *============================================================================*/ static int test_vm_call_ret(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* Program: match "hello", call subroutine that matches " ", then match "world" */ ln_instr_t code[] = { /* [0] */ ln_i_literal("hello", 5), /* main: match "hello" */ /* [1] */ {.op = OP_CALL, .data.jump.offset = 3}, /* call [4] */ /* [2] */ ln_i_literal("world", 5), /* match "world" */ /* [3] */ ln_i_match("rule1"), /* done */ /* [4] */ ln_i_char(' '), /* subroutine: match space */ /* [5] */ {.op = OP_RET}, /* return */ }; ln_program_t prog = ln_program_make(code, 6, "test_call_ret"); int r = ln_vm_exec(&vm, &prog, "hello world", 11, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "call/ret should match"); teardown_vm(&arena); return 1; } /*============================================================================ * Field Context Tests (".." substitution) *============================================================================*/ static int test_vm_field_context(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* Simulate custom type: push context "src_ip", extract field "..", then pop. The ".." should resolve to "src_ip". */ ln_instr_t code[] = { ln_i_ctx_push("src_ip"), ln_i_field(OP_FIELD_IPV4, ".."), ln_i_ctx_pop(), ln_i_match("rule1"), }; ln_program_t prog = ln_program_make(code, 4, "test_field_ctx"); int r = ln_vm_exec(&vm, &prog, "10.0.0.1", 8, &result); TEST_ASSERT_EQ(r, LN_VM_OK, "context match"); /* The field should be stored as "src_ip" (resolved from "..") */ const ln_fast_field_t *f = find_field(&result, "src_ip"); TEST_ASSERT(f != NULL, "field 'src_ip' found (resolved from '..')"); teardown_vm(&arena); return 1; } /*============================================================================ * Inline VM Helpers Tests *============================================================================*/ static int test_vm_inline_helpers(void) { ln_vm_t vm; memset(&vm, 0, sizeof(vm)); /* ln_vm_matched */ TEST_ASSERT(!ln_vm_matched(&vm), "initially not matched"); TEST_ASSERT(!ln_vm_matched(NULL), "NULL not matched"); /* ln_vm_remaining */ TEST_ASSERT_EQ(ln_vm_remaining(NULL), 0, "NULL remaining"); TEST_ASSERT_EQ(ln_vm_remaining(&vm), 0, "no input remaining"); /* ln_vm_consumed */ TEST_ASSERT_EQ(ln_vm_consumed(NULL), 0, "NULL consumed"); TEST_ASSERT_EQ(ln_vm_consumed(&vm), 0, "no input consumed"); /* ln_vm_get_field_context */ TEST_ASSERT(ln_vm_get_field_context(NULL) == NULL, "NULL context"); TEST_ASSERT(ln_vm_get_field_context(&vm) == NULL, "empty context"); return 1; } /*============================================================================ * Program Make Helper *============================================================================*/ static int test_program_make(void) { ln_instr_t code[] = { ln_i_halt() }; ln_program_t prog = ln_program_make(code, 1, "test"); TEST_ASSERT(prog.code == code, "code pointer"); TEST_ASSERT_EQ(prog.code_len, 1, "code length"); TEST_ASSERT(strcmp(prog.name, "test") == 0, "program name"); /* NULL name */ prog = ln_program_make(code, 1, NULL); TEST_ASSERT(prog.name == NULL, "NULL name ok"); return 1; } /*============================================================================ * Disassembly Tests *============================================================================*/ static int test_opcode_names(void) { TEST_ASSERT(strcmp(ln_opcode_name(OP_HALT), "HALT") == 0, "HALT name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_MATCH), "MATCH") == 0, "MATCH name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_LITERAL), "LITERAL") == 0, "LITERAL name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_FIELD_WORD), "FIELD_WORD") == 0, "FIELD_WORD name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_FIELD_IPV4), "FIELD_IPV4") == 0, "FIELD_IPV4 name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_SKIP_SPACE), "SKIP_SPACE") == 0, "SKIP_SPACE name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_TAG), "TAG") == 0, "TAG name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_NOP), "NOP") == 0, "NOP name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_SYSLOG_PRI), "SYSLOG_PRI") == 0, "SYSLOG_PRI name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_STATIC_FIELD), "STATIC_FIELD") == 0, "STATIC_FIELD name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_CTX_PUSH), "CTX_PUSH") == 0, "CTX_PUSH name"); TEST_ASSERT(strcmp(ln_opcode_name(OP_CTX_POP), "CTX_POP") == 0, "CTX_POP name"); /* Unknown opcode */ const char *name = ln_opcode_name(0x99); TEST_ASSERT(name != NULL, "unknown opcode returns string"); return 1; } static int test_disasm(void) { char buf[128]; ln_instr_t inst = ln_i_literal("hello", 5); int n = ln_instr_disasm(&inst, buf, sizeof(buf)); TEST_ASSERT(n > 0, "disasm produces output"); TEST_ASSERT(strstr(buf, "LITERAL") != NULL, "contains LITERAL"); TEST_ASSERT(strstr(buf, "hello") != NULL, "contains literal text"); /* CHAR disasm */ inst = ln_i_char(':'); n = ln_instr_disasm(&inst, buf, sizeof(buf)); TEST_ASSERT(n > 0, "char disasm"); TEST_ASSERT(strstr(buf, "CHAR") != NULL, "contains CHAR"); /* JUMP disasm */ inst = ln_i_jump(5); n = ln_instr_disasm(&inst, buf, sizeof(buf)); TEST_ASSERT(n > 0, "jump disasm"); TEST_ASSERT(strstr(buf, "JUMP") != NULL, "contains JUMP"); /* NULL safety */ TEST_ASSERT_EQ(ln_instr_disasm(NULL, buf, sizeof(buf)), 0, "NULL inst"); TEST_ASSERT_EQ(ln_instr_disasm(&inst, NULL, 128), 0, "NULL buf"); TEST_ASSERT_EQ(ln_instr_disasm(&inst, buf, 0), 0, "zero len"); return 1; } /*============================================================================ * Syslog-Realistic Test *============================================================================*/ static int test_vm_syslog_like(void) { ln_arena_t arena; ln_vm_t vm; ln_fast_result_t result; setup_vm(&vm, &arena, &result); /* Parse: "hostname program[1234]: message text" */ ln_instr_t code[] = { ln_i_field(OP_FIELD_WORD, "hostname"), /* [0] */ ln_i_char(' '), /* [1] */ ln_i_field_char_to("program", '['), /* [2] */ ln_i_char('['), /* [3] */ ln_i_field(OP_FIELD_INT, "pid"), /* [4] */ ln_i_literal("]: ", 3), /* [5] */ ln_i_field(OP_FIELD_REST, "msg"), /* [6] */ ln_i_tag("syslog"), /* [7] */ ln_i_match("syslog_rule"), /* [8] */ }; ln_program_t prog = ln_program_make(code, 9, "syslog_like"); const char *input = "myhost sshd[1234]: Failed password for root"; int r = ln_vm_exec(&vm, &prog, input, strlen(input), &result); TEST_ASSERT_EQ(r, LN_VM_OK, "syslog-like match"); /* Check hostname */ const ln_fast_field_t *f = find_field(&result, "hostname"); TEST_ASSERT(f != NULL, "hostname found"); size_t vlen; const char *val = field_str(f, &vlen); TEST_ASSERT(memcmp(val, "myhost", 6) == 0, "hostname value"); /* Check program */ f = find_field(&result, "program"); TEST_ASSERT(f != NULL, "program found"); val = field_str(f, &vlen); TEST_ASSERT(memcmp(val, "sshd", 4) == 0, "program value"); /* Check PID */ f = find_field(&result, "pid"); TEST_ASSERT(f != NULL, "pid found"); TEST_ASSERT_EQ(f->v.i, 1234, "pid value"); /* Check msg */ f = find_field(&result, "msg"); TEST_ASSERT(f != NULL, "msg found"); val = field_str(f, &vlen); TEST_ASSERT(memcmp(val, "Failed password for root", 24) == 0, "msg value"); /* Check tag */ TEST_ASSERT(ln_fast_has_tag(&result, "syslog"), "syslog tag"); /* Check rule */ TEST_ASSERT(strcmp(result.rule_id, "syslog_rule") == 0, "rule id"); TEST_ASSERT(result.flags & LN_FRESULT_MATCHED, "matched flag"); teardown_vm(&arena); return 1; } /*============================================================================ * Main *============================================================================*/ int main(void) { printf("=== ln_vm Test Suite ===\n\n"); printf("VM lifecycle tests:\n"); RUN_TEST(test_vm_init); RUN_TEST(test_vm_init_null); RUN_TEST(test_vm_reset); printf("\n"); printf("Instruction builder tests:\n"); RUN_TEST(test_instruction_builders); RUN_TEST(test_instruction_size); printf("\n"); printf("Basic execution tests:\n"); RUN_TEST(test_vm_halt); RUN_TEST(test_vm_literal_match); RUN_TEST(test_vm_literal_no_match); RUN_TEST(test_vm_char_match); RUN_TEST(test_vm_nop); printf("\n"); printf("Jump and fork tests:\n"); RUN_TEST(test_vm_jump); RUN_TEST(test_vm_fork_first_path); RUN_TEST(test_vm_fork_second_path); RUN_TEST(test_vm_fork_neither); RUN_TEST(test_vm_fail_opcode); printf("\n"); printf("Skip instruction tests:\n"); RUN_TEST(test_vm_skip_space); RUN_TEST(test_vm_skip_n); printf("\n"); printf("Field extraction tests:\n"); RUN_TEST(test_vm_field_word); RUN_TEST(test_vm_field_int); RUN_TEST(test_vm_field_rest); RUN_TEST(test_vm_field_char_to); RUN_TEST(test_vm_field_ipv4); RUN_TEST(test_vm_field_quoted); printf("\n"); printf("Tag tests:\n"); RUN_TEST(test_vm_tag); printf("\n"); printf("Call/ret tests:\n"); RUN_TEST(test_vm_call_ret); printf("\n"); printf("Field context tests:\n"); RUN_TEST(test_vm_field_context); printf("\n"); printf("Inline helper tests:\n"); RUN_TEST(test_vm_inline_helpers); RUN_TEST(test_program_make); printf("\n"); printf("Disassembly tests:\n"); RUN_TEST(test_opcode_names); RUN_TEST(test_disasm); printf("\n"); printf("Integration tests:\n"); RUN_TEST(test_vm_syslog_like); printf("\n"); /* Summary */ printf("=== Summary ===\n"); printf("Tests run: %d\n", tests_run); printf("Tests passed: %d\n", tests_passed); printf("Tests failed: %d\n", tests_failed); return tests_failed > 0 ? 1 : 0; } #else /* !ENABLE_TURBO */ int main(void) { printf("Turbo mode not enabled, skipping tests.\n"); return 0; } #endif /* ENABLE_TURBO */ liblognorm-2.1.0/tests/user_test.c000066400000000000000000000012051520037563000172000ustar00rootroot00000000000000#include "config.h" #include #include "liblognorm.h" #include "v1_liblognorm.h" int main() { const char* str = "foo says hello!"; json_object *obj, *from, *msg; obj = from = msg = NULL; ln_ctx ctx = ln_initCtx(); int ret = 1; ln_v1_loadSample(ctx, "rule=:%from:word% says %msg:word%"); if (ln_v1_normalize(ctx, str, strlen(str), &obj) == 0) { json_object_object_get_ex(obj, "from", &from); json_object_object_get_ex(obj, "msg", &msg); ret = strcmp(json_object_get_string(from), "foo") || strcmp(json_object_get_string(msg), "hello!"); } if (obj != NULL) json_object_put(obj); ln_exitCtx(ctx); return ret; } liblognorm-2.1.0/tests/usrdef_actual1.sh000077500000000000000000000022361520037563000202650ustar00rootroot00000000000000#!/bin/bash # added 2015-10-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "an actual test case for user-defined types" add_rule 'version=2' add_rule 'type=@endpid:%{"type":"alternative","parser":[ {"type": "literal", "text":"]"},{"type": "literal", "text":"]:"} ] }%' add_rule 'type=@AUTOTYPE1:%iface:char-to:/%/%ip:ipv4%(%port:number%)' add_rule 'type=@AUTOTYPE1:%iface:char-to:\x3a%\x3a%ip:ipv4%/%port:number%' add_rule 'type=@AUTOTYPE1:%iface:char-to:\x3a%\x3a%ip:ipv4%' add_rule 'rule=:a pid[%pid:number%%-:@endpid% b' add_rule 'rule=:a iface %.:@AUTOTYPE1% b' execute 'a pid[4711] b' assert_output_json_eq '{ "pid": "4711" }' # the next text needs priority assignment #execute 'a pid[4712]: b' #assert_output_json_eq '{ "pid": "4712" }' execute 'a iface inside:10.0.0.1 b' assert_output_json_eq '{ "ip": "10.0.0.1", "iface": "inside" }' execute 'a iface inside:10.0.0.1/514 b' assert_output_json_eq '{ "port": "514", "ip": "10.0.0.1", "iface": "inside" }' execute 'a iface inside/10.0.0.1(514) b' assert_output_json_eq '{ "port": "514", "ip": "10.0.0.1", "iface": "inside" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_ipaddr.sh000077500000000000000000000011101520037563000201640ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple user-defined type" add_rule 'version=2' add_rule 'type=@IPaddr:%ip:ipv4%' add_rule 'type=@IPaddr:%ip:ipv6%' add_rule 'rule=:an ip address %.:@IPaddr%' execute 'an ip address 10.0.0.1' assert_output_json_eq '{ "ip": "10.0.0.1" }' execute 'an ip address 127::1' assert_output_json_eq '{ "ip": "127::1" }' execute 'an ip address 2001:DB8:0:1::10:1FF' assert_output_json_eq '{ "ip": "2001:DB8:0:1::10:1FF" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_ipaddr_dotdot.sh000077500000000000000000000011211520037563000215430ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "user-defined type with '..' name" add_rule 'version=2' add_rule 'type=@IPaddr:%..:ipv4%' add_rule 'type=@IPaddr:%..:ipv6%' add_rule 'rule=:an ip address %ip:@IPaddr%' execute 'an ip address 10.0.0.1' assert_output_json_eq '{ "ip": "10.0.0.1" }' execute 'an ip address 127::1' assert_output_json_eq '{ "ip": "127::1" }' execute 'an ip address 2001:DB8:0:1::10:1FF' assert_output_json_eq '{ "ip": "2001:DB8:0:1::10:1FF" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_ipaddr_dotdot2.sh000077500000000000000000000013041520037563000216300ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "user-defined type with '..' name embedded in other fields" add_rule 'version=2' add_rule 'type=@IPaddr:%..:ipv4%' add_rule 'type=@IPaddr:%..:ipv6%' add_rule 'rule=:a word %w1:word% an ip address %ip:@IPaddr% another word %w2:word%' execute 'a word word1 an ip address 10.0.0.1 another word word2' assert_output_json_eq '{ "w2": "word2", "ip": "10.0.0.1", "w1": "word1" }' execute 'a word word1 an ip address 2001:DB8:0:1::10:1FF another word word2' assert_output_json_eq '{ "w2": "word2", "ip": "2001:DB8:0:1::10:1FF", "w1": "word1" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_ipaddr_dotdot3.sh000077500000000000000000000020271520037563000216340ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "user-defined type with '..' name embedded in other fields" add_rule 'version=2' add_rule 'type=@IPaddr:%..:ipv4%' add_rule 'type=@IPaddr:%..:ipv6%' add_rule 'type=@ipOrNumber:%..:@IPaddr{"priority":"1000"}%' add_rule 'type=@ipOrNumber:%..:number%' #add_rule 'type=@ipOrNumber:%..:@IPaddr%' # if we enable this instead of the above, the test would break add_rule 'rule=:a word %w1:word% an ip address %ip:@ipOrNumber% another word %w2:word%' execute 'a word word1 an ip address 10.0.0.1 another word word2' assert_output_json_eq '{ "w2": "word2", "ip": "10.0.0.1", "w1": "word1" }' execute 'a word word1 an ip address 2001:DB8:0:1::10:1FF another word word2' assert_output_json_eq '{ "w2": "word2", "ip": "2001:DB8:0:1::10:1FF", "w1": "word1" }' execute 'a word word1 an ip address 111 another word word2' assert_output_json_eq '{ "w2": "word2", "ip": "111", "w1": "word1" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_nested_segfault.sh000077500000000000000000000010461520037563000221050ustar00rootroot00000000000000#!/bin/bash # added 2018-04-07 by Vincent Tondellier # based on usrdef_twotypes.sh # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "nested user-defined types" add_rule 'version=2' add_rule 'type=@hex-byte:%..:hexnumber{"maxval": "255"}%' add_rule 'type=@two-hex-bytes:%f1:@hex-byte% %f2:@hex-byte%' add_rule 'type=@unused:stop' add_rule 'rule=:two bytes %.:@two-hex-bytes% %-:@unused%' execute 'two bytes 0xff 0x16 stop' assert_output_json_eq '{ "f1": "0xff", "f2": "0x16" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_simple.sh000077500000000000000000000007321520037563000202230ustar00rootroot00000000000000#!/bin/bash # added 2015-07-22 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "simple user-defined type" add_rule 'version=2' add_rule 'type=@hex-byte:%f1:hexnumber{"maxval": "255"}%' add_rule 'rule=:a word %w1:word% a byte % .:@hex-byte % another word %w2:word%' execute 'a word w1 a byte 0xff another word w2' assert_output_json_eq '{ "w2": "w2", "f1": "0xff", "w1": "w1" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_two.sh000077500000000000000000000012001520037563000175320ustar00rootroot00000000000000#!/bin/bash # added 2015-10-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "user-defined type with two alternatives" add_rule 'version=2' add_rule 'type=@hex-byte:%f1:hexnumber{"maxval": "255"}%' add_rule 'type=@hex-byte:%f1:word%' add_rule 'rule=:a word %w1:word% a byte % .:@hex-byte % another word %w2:word%' execute 'a word w1 a byte 0xff another word w2' assert_output_json_eq '{ "w2": "w2", "f1": "0xff", "w1": "w1" }' execute 'a word w1 a byte TEST another word w2' assert_output_json_eq '{ "w2": "w2", "f1": "TEST", "w1": "w1" }' cleanup_tmp_files liblognorm-2.1.0/tests/usrdef_twotypes.sh000077500000000000000000000010031520037563000206200ustar00rootroot00000000000000#!/bin/bash # added 2015-10-30 by Rainer Gerhards # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh test_def $0 "two user-defined types" add_rule 'version=2' add_rule 'type=@hex-byte:%f1:hexnumber{"maxval": "255"}%' add_rule 'type=@word-type:%w1:word%' add_rule 'rule=:a word %.:@word-type% a byte % .:@hex-byte % another word %w2:word%' execute 'a word w1 a byte 0xff another word w2' assert_output_json_eq '{ "w2": "w2", "f1": "0xff", "w1": "w1" }' cleanup_tmp_files liblognorm-2.1.0/tests/very_long_logline.sh000077500000000000000000000006731520037563000211030ustar00rootroot00000000000000#!/bin/bash # added 2015-09-21 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh msg="foo" for i in $(seq 1 10); do msg="${msg},${msg},abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ${i}" done test_def $0 "float field" add_rule 'rule=:%line:rest%' execute $msg assert_output_json_eq "{\"line\": \"$msg\"}" cleanup_tmp_files liblognorm-2.1.0/tests/very_long_logline_jsoncnf.sh000077500000000000000000000007451520037563000226230ustar00rootroot00000000000000#!/bin/bash # added 2015-09-21 by singh.janmejay # This file is part of the liblognorm project, released under ASL 2.0 . $srcdir/exec.sh msg="foo" for i in $(seq 1 10); do msg="${msg},${msg},abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ${i}" done test_def $0 "float field" add_rule 'version=2' add_rule 'rule=:%{"name":"line", "type":"rest"}%' execute $msg assert_output_json_eq "{\"line\": \"$msg\"}" cleanup_tmp_files liblognorm-2.1.0/tools/000077500000000000000000000000001520037563000150175ustar00rootroot00000000000000liblognorm-2.1.0/tools/Makefile.am000066400000000000000000000005111520037563000170500ustar00rootroot00000000000000# Note: slsa is not yet functional with the v2 engine! #bin_PROGRAMS = slsa #slsa_SOURCES = slsa.c syntaxes.c ../src/parser.c #slsa_CPPFLAGS = -I$(top_srcdir)/src $(JSON_C_CFLAGS) #slsa_LDADD = $(JSON_C_LIBS) $(LIBLOGNORM_LIBS) $(LIBESTR_LIBS) #slsa_DEPENDENCIES = ../src/liblognorm.la EXTRA_DIST=logrecord.h #include_HEADERS= liblognorm-2.1.0/tools/lablog000066400000000000000000000330711520037563000162060ustar00rootroot00000000000000This is a log of findings during development of the slsa heuristic Terms ----- log message a string of printable characters, delimited by the operating system line terminator. word a substring inside a log message that is delimited by specific delimiters, usually whitespace [this definition may need to be changed] subword a sequence inside a word that is not delimited by the usual word delimiters MSA multiple sequence alignment (like used in bioinformatics) motif a substring inside a log message that is frequently being used and has a specific syntax and semantic (e.g. an IPv4 address). The term is based on the idea of "sequence motif" in bioinformatics. parser (also "motif parser") extracts actual data matching a given motif from a log message. Open Problems ------------- [P00001] How to detect TAG? syslog TAG does not work well with common prefix/suffix detection. The problem is that e.g. PIX tags have integers, which are detected and Linux logs have process IDs. The PIX integers are actually part of the constant text, whereas in Linux it is fine to detect them as variable part. This problem becomes even more problematic with more agressive word delimition (see [I00002]). [P00002] subword detection with optional parts Sometimes, subwords may contain optional parts, so not all words may contain the common delimiter (e.g. Cisco PIX). In this case, subword detection is not triggered. Open TODO Items --------------- Note: TODO items regarding motif parsers can also be found at https://github.com/rsyslog/liblognorm/issues?utf8=%E2%9C%93&q=is%3Aissue+label%3A%22motif+parser%22 [T00001] - Dictionary motif We need to add dictionary motifs, where one can e.g. query users or common words. It may make sense to ship some "common-word dictionaries". Common word (sets) are like ("TCP", "UDP", "ICMP", ...) which are often found in firewall messages. With such words, subword detection does not prove to be helpful, because it detects a common suffix like [["TC", "UD"] "P"]. [T00002] - URL motif Logs often contain URLs (and file pathes), just like this: 193.45.10.119:/techsupp/css/default.css We need to find a proper way to identify them. This motif is probably very broad and hard to specify, especially when thinking about multiple platforms. [T00003] - name=value motif name=value is very common and justifed to be treated as motif. [T00004] - quote characters in n=v pairs We need to find out which quote characters happen to be present in especially values of a wide variety of Linux logs (at least the typical ones). This can be used to better describe the n=v motif. Recommended further testing --------------------------- This section lists some tests that would be good to conduct as part of the project. However, it is NOT thought that these experiments are vital for the results. - test slsa on iptables logs with the regular n=v parser We want to see here if there is a way to still properly detect the structure. Maybe we need to shuffle the "nondetected" word elements. - test slsa without cisco-interface-spec parser Same question as above, can the generic heuristic sufficiently well detect structure? The core goal of these tests is to find ways to improve the slsa algorithm. Indicators in first column of actual log ---------------------------------------- - lesson learned, general info ! idea + result of idea Ideas are referenced by [Innnnn] at start of log entry Actual Log 09:55:01 ---------- 2015-04-14 - Cisco ASA has ample slighly different formats for specifying interface addresses (the faddr, gaddr, laddr parts of message). It looks like the log strings are written directly in code, at least we have a lot of small inconsistencies which suggest so. This seems to makes it impractical to generate a single motif for this type. - Based on visual review, it looks like pattern detection on a subword-level may make sense (e.g. detect IP address, then you'll find a common slash, then a port number). This boils down to doing a full MSA on the word level, which I tried to avoid for performance reasons. ! [I00001] We may do an subword alignment on some common delimiters like slash, comma, colon etc. This can probably be done much faster than a full MSA. ! [I00002] We may also experiment with additional "word-delimiters", optionally enable those from the same set as [I00001]. When this is done, we need to make sure the delimiter can be stored inside the rulebase (an additional step to be taken only of this idea proves to be useful). 2015-04-15 + [I0002] First rought tests indicate that additional word delimiters seem to work well, at least on Cisco messages. A problem is that TAG values are now intermangled, as a TAG typically is "name:" and the colon is now a word delimiter. This leads to all TAGs becoming one big group, and only after the colon differentiation occurs. That, however, is too late, especially for things like fixed-width structured formats (e.g. squid). This handling of the TAG is a big problem, because the TAG is usually the primary identifier for a message class. So it should be preserved as unique. The same problem shows up when processing Linux logs. The TAG becomes effectively unusable as a way to identify the message. I also have problems interpreting postfix logs correctly, if "[]" is part of the delimiter set. I have not tried yet to trace the root cause down, as the approach in general seems to be problematic in regard to TAG. I suspect that this "postfix problem" is actually related to the TAG. Looks like [P00001] must be looked at with priority in order to continue with useful analysis. Another problem that manual review brings up is that colon is often used e.g. in time formats ("12:04:11"). If we use colon as word delimiter, we are no longer able to detect such time formats. This suggest that more agressive word delimition is probably not a good thing to have. However, it looks like we could do the same during subword detection stage without big problems. Ideally, this would be a kind of alignment like proposed in [I00001]. 2015-04-16 + [I00001][I00002] Tried subword detection with colon and slash as delimiters. Works fine on Cisco logs and does not have any of the problem we had with [I0002] (as described yesterday). One problem is if we have something like this: 7l: connection {5} 8l: %posint% {5} 9l: for {5} 10l: outside:192.168.66.144/80 {3} 11l: (192.168.66.144/80) {3} 12l: to {3} 13l: inside:192.168.12.154/56839 14l: (217.17.249.222/56839) [nterm 1] 13l: inside:192.168.12.154/56842 14l: (217.17.249.222/56842) [nterm 1] 13l: inside:192.168.12.154/56845 14l: (217.17.249.222/31575) [nterm 1] The algo will explode the lower "inside:" parts individually because each node is of course processed individually. So the result will be: 7l: connection {5} 8l: %posint% {5} 9l: for {5} 10l: outside {subword} {3} 11l: : {subword} 12l: %ipv4% {subword} 13l: / {subword} 14l: %posint% {subword} 15l: (192.168.66.144 / 80) to {subword} {3} 16l: inside {subword} 17l: : {subword} 18l: %ipv4% {subword} 19l: / {subword} 20l: %posint% {subword} 21l: (217.17.249.222 / 56839) {subword} 16l: inside {subword} 17l: : {subword} 18l: %ipv4% {subword} 19l: / {subword} 20l: %posint% {subword} 21l: (6.79.249.222 / 56842) {subword} 16l: inside {subword} 17l: : {subword} 18l: %ipv4% {subword} 19l: / {subword} 20l: %posint% {subword} 21l: (217.17.249.222 / 31575) {subword} However, we expect that this will not affect the final rule generation. But needs to be proven. [I00003] Once we have split subwords, we may do another "alignment run" on the tree and check if we now can find additional things to combine. Needs to be seen. In any case, we need to split braces, which requires slight changes to the split algo. We also see that the subword split algo does not work properly if we have words of different formats. Cisco PIX, for example, has interface specifiers which may either be "IP" or "IP/PORT" (among others). In that case, the delimiter "/" is not detected as common delimiter and so subword detection is not triggered. Now tracked as [P00002]. - I had a frist try at using "=" as a subword delimiter. This works for name=value fields, but only if they are all in the same sequence. It looks like it is a much better idea, for real N=V type of log messages to generate an parser that works on the complete line (things like iptables) [T00003]. It may still make sense to have individual N=V parsers if these constructs are seen within otherwise non-structured messages. 2015-04-17 - as expected, the Linux kernel timestamp motif parser proved to be useful. 2015-04-29 - some interim entries are missing, as I was focussed on some other work, including support for some structured formats. - I added cee-syslog and pure JSON motifs for structured formats. As expected, this permits slsa to process log files that contain both structured and unstructured formats (a common use case) much more rapidly and also improved the correct detection. Note that pure JSON is seen for example in GELF, wheres cee-syslog is seen in ossec. Both are frequently used. - I also worked on a specifc motif parser for interface specifiers found in Cisco logs. This, too, improved detection, but at a later stage we may want to try if we can gain to similar results without a specific parser. For practical cases, however, a specific parser is much more user-friendly. - I have also continued to work on a specific Name=Value (N=V) parser [T00003]. A first test some days ago showed that iptables needs a special parser because it also has name "flags", that is a field without a value and WITHOUT an equal sign (e.g. "DF"). This is not something we want in the regular n=v motif as it would match much to broadly. For iptables, we can do other restrictions (e.g. names need to consist of uppercase letters A..Z). So an iptable motif would not match too broad. Unfortunately, iptables is an example of where a specialised parser is needed for a single application. At least from the slsa point of view, this is bad, because it shows that the tool has limited potential. From a practical perspective, that iptables motif is definitely frequent enough to justify the (small) effort to implement a specific parser. - on the n=v parser: it turns out to be somewhat problematic to find good sets of characters permitted in name and value. Value is not so much a problem for quoted strings, but unquoted strings are very common. For example "session opened for user root by (uid=0)" will lead to "session opened for user root by (%name-value-list%" This shows a) the we must not insist on whitespace before the name, as quoted forms (like the brace here) are common b) but the terminating brace is treated as part of the value if we only use whitespace delimition Especially b) is a bit hard of a problem, because we may also see things like "name=(value)", so it seems we cannot forbid braces in values. HOWEVER, we may actually forbid them if inside the string and not being escaped - so think of them as just different types of quoting, which they are most probably like. This should be verified by experiments. --> [T00004] 2015-05-03 - did some experiments with the v2-iptables parser I had newly written. It turned out that the parser is matching too broad (e.g a single upper case latter will qualify as an iptables entry, because it looks like a single flag value). While this is fine with a manually crafted rulebase, it doesn't work with tools like slsa, which try to detect what the motif is. Also, it may cause misdetection even in the manually crafted case. So this is a very good proof that motif parsers (and thus the motifs) need to be very well defined and as specific as possible. We will sort this out with the idea that an iptables entry always has more than a single word. + fixing the v2-iptables parser as proposed above did solve the issue with misdetection liblognorm-2.1.0/tools/logrecord.h000066400000000000000000000032261520037563000171530ustar00rootroot00000000000000/* The (in-memory) format of a log record. * * A log record is sequence of nodes of different syntaxes. A log * record is described by a pointer to its root node. * The most important node type is literal text, which is always * assumed if no other syntax is detected. A full list of syntaxes * can be found below. * * Copyright 2015 Rainer Gerhards * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LOGRECORD_H_INCLUDED #define LOGRECORD_H_INCLUDED #include /* log record node syntaxes * This "enumeration" starts at 0 and increments for each new * syntax. Note that we do not use an enum type so that we can * streamline the in-memory representation. For large sets of * log records to be held in main memory, this is important. */ #define LRN_SYNTAX_LITERAL_TEXT 0 #define LRN_SYNTAX_IPV4 1 #define LRN_SYNTAX_INT_POSITIVE 2 #define LRN_SYNTAX_DATE_RFC3164 3 struct logrec_node { struct logrec_node *next; /* NULL: end of record */ int8_t ntype; union { char *ltext; /* the literal text */ int64_t number; /* all integer types */ } val; }; typedef struct logrec_node logrecord_t; #endif /* ifndef LOGRECORD_H_INCLUDED */ liblognorm-2.1.0/tools/slsa.c000066400000000000000000001040461520037563000161320ustar00rootroot00000000000000/* simple log structure analyzer (slsa) * * This is a heuristic to mine log structure. * * Copyright 2015 Rainer Gerhards * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Learnings (mostly from things that failed) * ------------------------------------------ * - if we detect IP (and similar things) too early in the process, we * get wrong detections (e.g. for reverse DNS entries) * - if we detect IP adresses during value collapsing, we get unacceptable * runtime and memory requirements, as a huge number of different actual * values needs to be stored. * ->current solution is to detect them after tokenization but before * adding the the structure tree. * * - if we split some known syntaxes (like cisco ip/port) into it's parts, * they may get improperly detected in regard to subwords. This especially * happens if there are few servers (or few destination ports), where parts * of the IP (or the port) is considered to be a common prefix, which then * may no longer be properly detected. As it looks, it pays to really have a * much richer set of special parsers, and let the common pre/suffix * detection only be used in rare cases where we do not have a parser. * Invocation may even be treated as the need to check for a new parser. * * Along the same lines, common "command words" (like "udp", "tcp") may * also be detected by a specific parser, because otherwise they tend to have * common pre/suffixes (e.g. {"ud","tc"},"p"). This could be done via a * dictionary lookup (bsearch, later to become a single state machine?). * * Along these lines, we probably need parses who return *multiple* values, * e.g. the IP address and port. This requires that the field name has a * common prefix. A solution is to return JSON where the node element is * named like the field name, and the members have parser-specific names. */ #include "config.h" #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include "json_compatibility.h" #include "liblognorm.h" #include "internal.h" #include "parser.h" #include "syntaxes.h" #define MAXLINE 32*1024 struct wordflags { unsigned isSubword : 1, isSpecial : 1; /* indicates this is a special parser */ }; struct wordinfo { char *word; int occurs; struct wordflags flags; }; struct logrec_node { struct logrec_node *parent; struct logrec_node *sibling; struct logrec_node *child; /* NULL: end of record */ int nterm; /* number of times this was the terminal node */ int8_t ntype; union { char *ltext; /* the literal text */ int64_t number; /* all integer types */ } val; /* the addtl value structure is dumb, and needs to * be replaced with something much better when the initial * experiments work out. But good enough for now. * (bsearch table?) */ int nwords; /* size of table */ struct wordinfo **words; }; typedef struct logrec_node logrec_node_t; logrec_node_t *root = NULL; typedef struct rule_table_etry rule_table_etry_t; typedef struct rule_table rule_table_t; struct rule_table { int maxEtry; /* max # entries that fit into table */ int nxtEtry; /* next free entry */ rule_table_etry_t **entries; }; struct rule_table_etry { int ntimes; char *rule; }; #define RULE_TABLE_GROWTH 512 /* number of entries rule table grows when too small */ /* command line options */ static int displayProgress = 0; /* display progress indicators */ static int optPrintTree = 0; /* disply internal tree for debugging purposes */ static int optPrintDebugOutput = 0; static int optSortMultivalues = 1; /* forward definitions */ void wordDetectSyntax(struct wordinfo *const __restrict__ wi, const size_t wordlen, const int); void treePrint(logrec_node_t *node, const int level); static void reportProgress(const char *const label); /* param word may be NULL, if word is not yet known */ static struct wordinfo * wordinfoNew(char *const word) { struct wordinfo *const wi = calloc(1, sizeof(struct wordinfo)); if(wi == NULL) { perror("slsa: malloc error struct wordinfo"); exit(1); } wi->word = word; wi->occurs = 1; return wi; } static void wordinfoDelete(struct wordinfo *const wi) { //printf("free %p: %s\n", wi->word, wi->word); free(wi->word); free(wi); } static rule_table_t * ruleTableCreate(void) { rule_table_t *const rt = calloc(1, sizeof(rule_table_t)); if(rt == NULL) { perror("slsa: malloc error ruletable_create"); exit(1); } rt->nxtEtry = 0; rt->maxEtry = 0; return rt; } static rule_table_etry_t * ruleTableEtryCreate(rule_table_t *const __restrict__ rt) { if(rt->nxtEtry == rt->maxEtry) { const int newMax = rt->maxEtry + RULE_TABLE_GROWTH; rt->entries = realloc(rt->entries, newMax * sizeof(rule_table_t)); if(rt->entries == NULL) { perror("slsa: malloc error ruletable_create"); exit(1); } rt->maxEtry = newMax; } const int etry = rt->nxtEtry; rt->nxtEtry++; rt->entries[etry] = calloc(1, sizeof(rule_table_etry_t)); if(rt->entries[etry] == NULL) { perror("slsa: malloc error entry ruletable_create"); exit(1); } return rt->entries[etry]; } static void ruleTablePrint(rule_table_t *const __restrict__ rt) { for(int i = 0 ; i < rt->nxtEtry ; ++i) { reportProgress("rule table print"); printf("%6d times: %s\n", rt->entries[i]->ntimes, rt->entries[i]->rule); } } static void ruleTableDestroy(rule_table_t *const __restrict__ rt) { for(int i = 0 ; i < rt->nxtEtry ; ++i) { free((void*)rt->entries[i]->rule); free((void*)rt->entries[i]); } free((void*) rt->entries); free((void*) rt); } /* function to quicksort rule table */ static int qs_comp_rt_etry(const void *v1, const void *v2) { const rule_table_etry_t *const e1 = *((rule_table_etry_t **) v1); const rule_table_etry_t *const e2 = *((rule_table_etry_t **) v2); return -(e1->ntimes - e2->ntimes); /* sort descending! */ } /* a stack to keep track of detected (sub)words that * need to be processed in the future. */ #define SIZE_WORDSTACK 8 static struct wordinfo* wordStack[SIZE_WORDSTACK]; static int wordstackPtr = -1; /* empty */ static void wordstackPush(struct wordinfo *const wi) { ++wordstackPtr; if(wordstackPtr >= SIZE_WORDSTACK) { fprintf(stderr, "slsa: wordstack too small\n"); exit(1); } wordStack[wordstackPtr] = wi; } /* returns wordinfo or NULL, if stack is empty */ static struct wordinfo * wordstackPop(void) { return (wordstackPtr < 0) ? NULL : wordStack[wordstackPtr--]; } static void reportProgress(const char *const label) { static unsigned cnt = 0; static const char *lastlabel = NULL; if(!displayProgress) return; if(lastlabel == NULL) lastlabel = strdup(label); if(label == NULL || strcmp(label, lastlabel)) { fprintf(stderr, "\r%s: %u - done\n", lastlabel, cnt); cnt = 0; free((void*)lastlabel); lastlabel = (label == NULL) ? NULL : strdup(label); } else { if(++cnt % 100 == 0) fprintf(stderr, "\r%s: %u", label, cnt); } } static int qs_compmi(const void *v1, const void *v2) { const struct wordinfo *const w1 = *((struct wordinfo**) v1); const struct wordinfo *const w2 = *((struct wordinfo**) v2); return strcmp(w1->word, w2->word); } #if 0 /* we don't need this at the moment, but want to preserve it * in case we can need it again. */ static int bs_compmi(const void *key, const void *mem) { const struct wordinfo *const wi = *((struct wordinfo**) mem); return strcmp((char*)key, wi->word); } #endif /* add an additional word to existing node */ void logrec_addWord(logrec_node_t *const __restrict__ node, struct wordinfo *const __restrict__ wi) { /* TODO: here we can insert at the right spot, which makes * bsearch() usable [in the caller, later...] */ /* TODO: alloc in larger increments */ int newnwords = node->nwords + 1; node->words = realloc(node->words, sizeof(struct wordinfo *)*newnwords); wi->occurs = 1; /* TODO: layer violation, do in other function */ node->words[node->nwords] = wi; node->nwords = newnwords; #if 0 if(node->nwords > 1) { qsort(node->words, node->nwords, sizeof(struct wordinfo), qs_compmi); // TODO upgrade } #endif } logrec_node_t * logrec_newNode(struct wordinfo *const wi, struct logrec_node *const parent) { logrec_node_t *const node = calloc(1, sizeof(logrec_node_t)); node->parent = parent; node->child = NULL; node->val.ltext = NULL; if(wi != NULL) logrec_addWord(node, wi); return node; } void logrec_delNode(logrec_node_t *const node) { for(int i = 0 ; i < node->nwords ; ++i) wordinfoDelete(node->words[i]); free(node->words); free(node->val.ltext); free(node); } /* returns ptr to existing struct wordinfo or NULL, if not found. */ static struct wordinfo * logrec_hasWord(logrec_node_t *const __restrict__ node, char *const __restrict__ word) { // return (struct wordinfo*) bsearch(word, node->words, node->nwords, sizeof(struct wordinfo), bs_compmi); /* alternative without need to have a binary array -- may make sense... */ int i; for(i = 0 ; i < node->nwords ; ++i) { if(!strcmp(node->words[i]->word, word)) break; } return (i < node->nwords) ? node->words[i] : NULL; } void printPrefixes(logrec_node_t *const __restrict__ node, const int lenPrefix, const int lenSuffix) { if(!optPrintDebugOutput) return; int i; const int maxwords = node->nwords > 5 ? 5 : node->nwords; printf("prefix %d, suffix %d\n", lenPrefix, lenSuffix); for(i = 0 ; i < maxwords ; ++i) { const char *const word = node->words[i]->word; const int lenWord = strlen(word); const int strtSuffix = lenWord - lenSuffix; int j; putchar('"'); for(j = 0 ; j < lenPrefix ; ++j) putchar(word[j]); printf("\" \""); for( ; j < strtSuffix ; ++j) putchar(word[j]); printf("\" \""); for( ; j < lenWord ; ++j) putchar(word[j]); printf("\"\n"); } } /* Squash duplicate values inside a tree node. This must only be run * after tree node values have been modified. */ static void squashDuplicateValues(logrec_node_t *node) { if(node->nwords == 1) return; /* sort and the remove the easy to find dupes */ qsort(node->words, node->nwords, sizeof(struct wordinfo*), qs_compmi); int iPrev = 0; int nsquashed = 0; for(int iNext = 1 ; iNext < node->nwords ; ++iNext) { if(!strcmp(node->words[iPrev]->word, node->words[iNext]->word)) { wordinfoDelete(node->words[iNext]); ++nsquashed; } else { /* OK, new word */ if(iPrev+1 == iNext) iPrev = iNext; else { node->words[iPrev]->occurs += iNext - iPrev - 1; ++iPrev; node->words[iPrev] = node->words[iNext]; } } } if(nsquashed) { node->nwords -= nsquashed; node->words = realloc(node->words, sizeof(struct wordinfo *)*node->nwords); } } /* Disjoin subwords based on a Delimiter. * When calling this function, it must be known that the Delimiter * is present in *all* words. * TODO: think about empty initial subwords (Delimiter in start position!). */ static void disjoinDelimiter(logrec_node_t *node, const char Delimiter) { /* first, create node pointers: * we need to update our original node in-place, because otherwise * we change the structure of the tree with a couple of * side effects. As we do not want this, the first subword must * be placed into the current node, and new nodes be * created for the other subwords. */ char delimword[2]; delimword[0] = Delimiter; delimword[1] = '\0'; struct wordinfo *const delim_wi = wordinfoNew(strdup(delimword)); delim_wi->flags.isSubword = 1; logrec_node_t *const delim_node = logrec_newNode(delim_wi, node); logrec_node_t *const tail_node = logrec_newNode(NULL, delim_node); delim_node->child = tail_node; tail_node->child = node->child; node->child = delim_node; delim_node->parent = node; if(tail_node->child != NULL) tail_node->child->parent = tail_node; /* now, do the actual split */ //printf("nodes setup, now doing actual split\n");fflush(stdout); char *prevword = NULL; for(int i = 0 ; i < node->nwords ; ++i) { struct wordinfo *wi; char *const delimptr = strchr(node->words[i]->word, Delimiter); //printf("splitting off tail %d of %d [%p]:'%s' [full: '%s']\n" //, i, node->nwords, delimptr+1, delimptr+1, node->words[i]->word);fflush(stdout); wi = wordinfoNew(strdup(delimptr+1)); wi->flags.isSubword = 1; wordDetectSyntax(wi, strlen(wi->word), 0); /* add new word only if not duplicate of previous (reduces dupes) */ if(prevword == NULL || strcmp(prevword, wi->word)) { logrec_addWord(tail_node, wi); prevword = wi->word; } else { wordinfoDelete(wi); } /* we can now do an in-place update of the old word ;) */ *delimptr = '\0'; node->words[i]->flags.isSubword = 1; wordDetectSyntax(node->words[i], strlen(node->words[i]->word), 0); #if 0 /* but we trim the memory */ const char *delword = node->words[i]->word; node->words[i]->word = strdup(delword); free((void*)delword); #endif } if(node->nwords > 1) { squashDuplicateValues(node); squashDuplicateValues(tail_node); } //printf("done disjonDelimiter\n");fflush(stdout); } /* Disjoin common prefixes and suffixes. This also triggers a * new syntax detection on the remaining variable part. */ static void disjoinCommon(logrec_node_t *node, const size_t lenPrefix, const size_t lenSuffix) { logrec_node_t *newnode; struct wordinfo *newwi; char *newword; char *word = node->words[0]->word; if(lenPrefix > 0) { /* we need to update our node in-place, because otherwise * we change the structure of the tree with a couple of * side effects. As we do not want this, the prefix must * be placed into the current node, and new nodes be * created for the other words. */ newword = malloc(lenPrefix+1); memcpy(newword, word, lenPrefix); newword[lenPrefix] = '\0'; newwi = wordinfoNew(newword); newwi->flags.isSubword = 1; newnode = logrec_newNode(newwi, node); newnode->words = node->words; newnode->nwords = node->nwords; node->nwords = 0; node->words = NULL; logrec_addWord(node, newwi); newnode->child = node->child; node->child = newnode; node->parent = newnode; /* switch node */ node = newnode; for(int i = 0 ; i < node->nwords ; ++i) memmove(node->words[i]->word, /* move includes \0 */ node->words[i]->word+lenPrefix, strlen(node->words[i]->word)-lenPrefix+1); } if(lenSuffix > 0) { const size_t lenword = strlen(word); size_t iSuffix = lenword-lenSuffix; newword = malloc(lenSuffix+1); memcpy(newword, word+iSuffix, lenSuffix+1); /* includes \0 */ newwi = wordinfoNew(newword); newwi->flags.isSubword = 1; newnode = logrec_newNode(newwi, node); newnode->child = node->child; if(newnode->child != NULL) newnode->child->parent = newnode; node->child = newnode; for(int i = 0 ; i < node->nwords ; ++i) { iSuffix = strlen(node->words[i]->word)-lenSuffix; node->words[i]->word[iSuffix] = '\0'; } } for(int i = 0 ; i < node->nwords ; ++i) { node->words[i]->flags.isSubword = 1; wordDetectSyntax(node->words[i], strlen(node->words[i]->word), 0); } // TODO: squash duplicates only if syntaxes were detected! squashDuplicateValues(node); } /* find a matching terminator inside a suffix, searchs only * within the suffix area. If found, lenPrefix and lenSuffix * are update and 1 is returned. Returns 0 if not found. * Helper to checkPrefixes. */ static int findMatchingTerm(const char *const __restrict__ word, const size_t lenWord, size_t potentialNewPrefix, int *const __restrict lenPrefix, int *const __restrict lenSuffix, const char term) { int newSuffix = -1; for(int i = 0 ; i < *lenSuffix ; ++i) if(word[lenWord-i-1] == term) { newSuffix = i+1; break; } if(newSuffix >= 0) { *lenSuffix = newSuffix; *lenPrefix = potentialNewPrefix; return 1; } return 0; } /* returns 1 if Delimiter is found in all words, 0 otherwise */ int checkCommonDelimiter(logrec_node_t *const __restrict__ node, const char Delimiter) { for(int i = 0 ; i < node->nwords ; ++i) { if(strlen(node->words[i]->word) < 2 || strchr(node->words[i]->word+1, Delimiter) == NULL) return 0; } return 1; } /* returns 1 if braces are found in all words, 0 otherwise */ int checkCommonBraces(logrec_node_t *const __restrict__ node, const char braceOpen, const char braceClose) { char *op; for(int i = 0 ; i < node->nwords ; ++i) { if(strlen(node->words[i]->word) < 2) return 0; if((op = strchr(node->words[i]->word+1, braceOpen)) == NULL) return 0; else if(strchr(op+1, braceClose) == NULL) return 0; } return 1; } /* check if there are common subword delimiters inside the values. If so, * use them to create subwords. Revalute the syntax if done so. */ void checkSubwords(logrec_node_t *const __restrict__ node) { //printf("checkSubwords checking node %p: %s\n", node, node->words[0]->word); if(checkCommonDelimiter(node, '/')) { disjoinDelimiter(node, '/'); } if(checkCommonDelimiter(node, ':')) { disjoinDelimiter(node, ':'); } if(checkCommonDelimiter(node, '=')) { disjoinDelimiter(node, '='); } #if 0 // this does not work, requies a seperate disjoin operation (4-parts) if(checkCommonBraces(node, '[', ']')) { disjoinDelimiter(node, '('); disjoinDelimiter(node->child->child, ')'); } #endif } /* check if there are common prefixes and suffixes and, if so, * extract them. */ void checkPrefixes(logrec_node_t *const __restrict__ node) { if(node->nwords == 1 || node->words[0]->flags.isSubword) return; int i; const char *const baseword = node->words[0]->word; const size_t lenBaseword = strlen(baseword); int lenPrefix = lenBaseword; int lenSuffix = lenBaseword; int shortestWord = INT_MAX; for(i = 1 ; i < node->nwords ; ++i) { int j; /* check prefix */ if(lenPrefix > 0) { for(j = 0 ; j < lenPrefix && node->words[i]->word[j] == baseword[j] ; ++j) ; /* EMPTY - just scan */ if(j < lenPrefix) lenPrefix = j; } /* check suffix */ if(lenSuffix > 0) { const int lenWord = strlen(node->words[i]->word); if(lenWord < shortestWord) shortestWord = lenWord; const int jmax = (lenWord < lenSuffix) ? lenWord : lenSuffix; for(j = 0 ; j < jmax && node->words[i]->word[lenWord-j-1] == baseword[lenBaseword-j-1] ; ++j) ; /* EMPTY - just scan */ if(j < lenSuffix) lenSuffix = j; } } if(lenPrefix+lenSuffix > shortestWord) /* can happen, e.g. if {"aaa","aaa"} */ lenSuffix = shortestWord - lenPrefix; /* to avoid false positives, we check for some common * field="xxx" syntaxes here. */ for(int j = lenPrefix-1 ; j >= 0 ; --j) { switch(baseword[j]) { case '"': if(findMatchingTerm(baseword, lenBaseword, j+1, &lenPrefix, &lenSuffix,'"')) goto done_prefixes; break; case '\'': if(findMatchingTerm(baseword, lenBaseword, j+1, &lenPrefix, &lenSuffix,'\'')) goto done_prefixes; break; case '[': if(findMatchingTerm(baseword, lenBaseword, j+1, &lenPrefix, &lenSuffix,']')) goto done_prefixes; break; case '(': if(findMatchingTerm(baseword, lenBaseword, j+1, &lenPrefix, &lenSuffix,')')) goto done_prefixes; break; case '<': if(findMatchingTerm(baseword, lenBaseword, j+1, &lenPrefix, &lenSuffix,'>')) goto done_prefixes; break; case '=': case ':': lenPrefix = j+1; break; default: break; } } done_prefixes: if(lenPrefix != 0 || lenSuffix != 0) { /* TODO: not only print here, but let user override * (in upcoming "interactive" mode) */ printPrefixes(node, lenPrefix, lenSuffix); disjoinCommon(node, lenPrefix, lenSuffix); } } /* if all terminals, squash siblings. It is too dangerous to * do this while creating the tree, but after it has been created * such siblings are really just different values. */ void squashTerminalSiblings(logrec_node_t *const __restrict__ node) { if(!node->sibling) return; int nSiblings = 0; for(logrec_node_t *n = node ; n ; n = n->sibling) { if(n->child || n->nwords > 1) return; nSiblings++; } node->words = realloc(node->words, sizeof(struct wordinfo *) * (node->nwords + nSiblings)); for(logrec_node_t *n = node->sibling ; n ; n = n->sibling) { if(optPrintDebugOutput) printf("add to idx %d: '%s'\n", node->nwords, n->words[0]->word);fflush(stdout); node->words[node->nwords++] = n->words[0]; n->words[0] = NULL; } node->sibling = NULL; // TODO: fix memory leak } /* reprocess tree to check subword creation */ void treeDetectSubwords(logrec_node_t *node) { if(node == NULL) return; reportProgress("subword detection"); squashTerminalSiblings(node); while(node != NULL) { checkSubwords(node); //checkPrefixes(node); treeDetectSubwords(node->child); node = node->sibling; } } /* squash a tree, that is combine nodes that point to nodes * without siblings to a single node. */ void treeSquash(logrec_node_t *node) { if(node == NULL) return; reportProgress("squashing"); squashTerminalSiblings(node); const int hasSibling = node->sibling == NULL ? 0 : 1; while(node != NULL) { if(!hasSibling && node->child != NULL && node->nwords == 1 && node->child->sibling == NULL && node->child->nwords == 1 && node->words[0]->word[0] != '%' /* do not combine syntaxes */ && node->child->words[0]->word[0] != '%') { char *newword; if(asprintf(&newword, "%s %s", node->words[0]->word, node->child->words[0]->word)) {}; /* silence cc warning */ if(optPrintDebugOutput) printf("squashing: %s\n", newword); free(node->words[0]->word); node->words[0]->word = newword; node->nterm = node->child->nterm; /* TODO: do not combine terminals! */ logrec_node_t *toDel = node->child; node->child = node->child->child; logrec_delNode(toDel); continue; /* see if we can squash more */ } //checkPrefixes(node); treeSquash(node->child); node = node->sibling; } } void treePrintIndent(const int level, const char indicator) { printf("%2d%c:", level, indicator); for(int i = 0 ; i < level ; ++i) printf(" "); } void treePrintWordinfo(struct wordinfo *const __restrict__ wi) { printf("%s", wi->word); if(wi->flags.isSubword) printf(" {subword}"); if(wi->occurs > 1) printf(" {%d}", wi->occurs); } void treePrint(logrec_node_t *node, const int level) { if(!optPrintTree) return; reportProgress("print"); while(node != NULL) { if(optSortMultivalues) qsort(node->words, node->nwords, sizeof(struct wordinfo*), qs_compmi); treePrintIndent(level, 'l'); treePrintWordinfo(node->words[0]); if(node->nterm) printf(" [nterm %d]", node->nterm); printf("\n"); for(int i = 1 ; i < node->nwords ; ++i) { treePrintIndent(level, 'v'); treePrintWordinfo(node->words[i]); printf("\n"); } treePrint(node->child, level + 1); node = node->sibling; } } #if 0 void treeToJSON(logrec_node_t *node, json_object *json) { json_object *newobj; int isArray; reportProgress("convert tree to json"); if(node == NULL) return; if(node->sibling == NULL) { isArray = 0; newobj = json_object_new_object(); } else { isArray = 1; newobj = json_object_new_array(); } while(node != NULL) { treePrintWordinfo(node->words[0]); if(node->nterm) printf(" [nterm %d]", node->nterm); printf("\n"); for(int i = 1 ; i < node->nwords ; ++i) { treePrintIndent(level, 'v'); treePrintWordinfo(node->words[i]); printf("\n"); } treePrint(node->child, level + 1); node = node->sibling; } } #endif #if 0 void treePrintTerminalsNonRoot(logrec_node_t *__restrict__ node, const char *const __restrict__ beginOfMsg) { const char *msg = NULL; while(node != NULL) { const char *tail; if(node == root) { msg = ""; } else { if(node->nwords > 1) { tail = "%word%"; } else { tail = node->words[0]->word; } free((void*)msg); asprintf((char**)&msg, "%s%s%s", beginOfMsg, tail, (node->words[0]->flags.isSubword) ? "" : " "); if(node->nterm) printf("%6d times:%s\n", node->nterm, msg); } treePrintTerminalsNonRoot(node->child, msg); node = node->sibling; } } void treePrintTerminals(logrec_node_t *__restrict__ node) { /* we need to strip "[ROOT]" from the node value. Note that it may * have been combined with some other value during tree squash. */ const char *beginOfMsg = node->words[0]->word + 6 /* "[ROOT]"! */; while(node != NULL) { treePrintTerminalsNonRoot(node->child, ""); node = node->sibling; } } #endif void treeCreateRuleTableNonRoot(logrec_node_t *__restrict__ node, rule_table_t *const __restrict__ rt, const char *const __restrict__ beginOfMsg) { const char *msg = NULL; while(node != NULL) { const char *tail; if(node->nwords > 1) { tail = "%MULTIVALUE%"; } else { tail = node->words[0]->word; } free((void*)msg); if(asprintf((char**)&msg, "%s%s%s", beginOfMsg, tail, (node->words[0]->flags.isSubword) ? "" : " ") == -1) {}; /* silence cc warning */ if(node->nterm) { reportProgress("rule table create"); rule_table_etry_t *const rt_etry = ruleTableEtryCreate(rt); rt_etry->ntimes = node->nterm; rt_etry->rule = strdup(msg); } treeCreateRuleTableNonRoot(node->child, rt, msg); node = node->sibling; } } rule_table_t * treeCreateRuleTable(logrec_node_t *__restrict__ node) { /* we need to strip "[ROOT]" from the node value. Note that it may * have been combined with some other value during tree squash. */ const char *beginOfMsg = node->words[0]->word + 6 /* "[ROOT]"! */; rule_table_t *const __restrict__ rt = ruleTableCreate(); while(node != NULL) { treeCreateRuleTableNonRoot(node->child, rt, beginOfMsg); node = node->sibling; } return rt; } /* TODO: move wordlen to struct wordinfo? */ void /* NOTE: bDetectStacked is just a development aid, it permits us to write * a first version which does not detect multi-node items that would * go to the stack and require more elaborate handling. TODO: remove that * restriction. * TODO: check: we may remove stacked mode due to new subword algo (if it stays!) */ wordDetectSyntax(struct wordinfo *const __restrict__ wi, const size_t wordlen, const int bDetectStacked) { size_t nproc; size_t constzero = 0; /* the default lognorm parsers need this */ if(syntax_posint(wi->word, wordlen, NULL, &nproc) && nproc == wordlen) { free(wi->word); wi->word = strdup("%posint%"); wi->flags.isSpecial = 1; goto done; } if(ln_parseTime24hr(wi->word, wordlen, &constzero, NULL, &nproc, NULL) == 0 && nproc == wordlen) { free(wi->word); wi->word = strdup("%time-24hr%"); wi->flags.isSpecial = 1; goto done; } /* duration needs to go after Time24hr, as duration would accept * Time24hr format, whereas duration usually starts with a single * digit and so Tim24hr will not pick it. Still we may get false * detection for durations > 10hrs, but so is it... */ if(ln_parseDuration(wi->word, wordlen, &constzero, NULL, &nproc, NULL) == 0 && nproc == wordlen) { free(wi->word); wi->word = strdup("%duration%"); wi->flags.isSpecial = 1; goto done; } if(syntax_ipv4(wi->word, wordlen, NULL, &nproc)) { if(nproc == wordlen) { free(wi->word); wi->word = strdup("%ipv4%"); wi->flags.isSpecial = 1; goto done; } if(bDetectStacked && wi->word[nproc] == '/') { size_t strtnxt = nproc + 1; if(syntax_posint(wi->word+strtnxt, wordlen-strtnxt, NULL, &nproc)) if(strtnxt+nproc == wordlen) { free(wi->word); wi->word = strdup("%ipv4%"); wi->flags.isSubword = 1; wi->flags.isSpecial = 1; struct wordinfo *wit; wit = wordinfoNew("%posint%"); wit->flags.isSubword = 1; wit->flags.isSpecial = 1; wordstackPush(wit); wit = wordinfoNew("/"); wit->flags.isSubword = 1; wordstackPush(wit); goto done; } } } if(ln_parseKernelTimestamp(wi->word, wordlen, &constzero, NULL, &nproc, NULL) == 0 && nproc == wordlen) { free(wi->word); wi->word = strdup("%kernel-timestamp%"); wi->flags.isSpecial = 1; goto done; } done: return; } struct wordinfo * getWord(char **const line) { struct wordinfo *wi = wordstackPop(); if(wi != NULL) return wi; char *ln = *line; if(*ln == '\0') return NULL; size_t i; for(i = 0 ; ln[i] && isspace(ln[i]) ; ++i) /* EMPTY - skip spaces */; const size_t begin_word = i; for( ; ln[i] && !isspace(ln[i]) ; ++i) { #if 0 /* turn on for subword detection experiment */ if(ln[i] == ':' || ln[i] == '=' || ln[i] == '/' || ln[i] == '[' || ln[i] == ']' || ln[i] == '(' || ln[i] == ')') { wi = wordinfoNew(NULL); wi->word = malloc(2); wi->word[0] = ln[i]; wi->word[1] = '\0'; wordstackPush(wi); // TODO: if we continue with this approach, we must indicate that // this is a subword. /* mimic word delimiter, will be skipped over in next run: */ ln[i] = ' '; break; } #endif } if(begin_word == i) /* only trailing spaces? */ return NULL; const size_t wordlen = i - begin_word; wi = wordinfoNew(NULL); wi->word = malloc(wordlen + 1); memcpy(wi->word, ln+begin_word, wordlen); wi->word[wordlen] = '\0'; if(wi->word[0] == '%') /* assume already token [TODO: improve] */ goto done; wordDetectSyntax(wi, wordlen, 1); done: *line = ln+i; return wi; } logrec_node_t * treeAddToLevel(logrec_node_t *const level, /* current node */ struct wordinfo *const wi, struct wordinfo *const nextwi ) { logrec_node_t *existing, *prev = NULL; for(existing = level->child ; existing != NULL ; existing = existing->sibling) { struct wordinfo *wi_val; if((wi_val = logrec_hasWord(existing, wi->word)) != NULL) { wi_val->occurs++; break; } prev = existing; } if(existing == NULL && nextwi != NULL) { /* we check if the next word is the same, if so, we can * just add this as a different value. */ logrec_node_t *child; for(child = level->child ; child != NULL ; child = child->sibling) { if(child->child != NULL && !strcmp(child->child->words[0]->word, nextwi->word)) break; } if(child != NULL) { logrec_addWord(child, wi); existing = child; } } if(existing == NULL) { existing = logrec_newNode(wi, level); if(prev == NULL) { /* first child of parent node */ level->child = existing; } else { /* potential new sibling */ prev->sibling = existing; } } return existing; } void treeAddLine(char *ln) { struct wordinfo *wi; struct wordinfo *nextwi; /* we need one-word lookahead for structure tree */ logrec_node_t *level = root; nextwi = getWord(&ln); while(1) { wi = nextwi; if(wi == NULL) { ++level->nterm; break; } nextwi = getWord(&ln); level = treeAddToLevel(level, wi, nextwi); } } void preprocessLine(const char *const __restrict__ buf, const size_t buflen, char *const bufout) { static int lnCnt = 1; size_t nproc; char *tocopy; size_t tocopylen; size_t iout; iout = 0; for(size_t i = 0 ; i < buflen ; ) { /* in this stage, we must only detect syntaxes that we are * very sure to correctly detect AND that *spawn multiple * words*. Otherwise, it is safer to detect them on a * word basis. */ if(ln_parseRFC3164Date(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%date-rfc3164%"; } else if(ln_parseRFC5424Date(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%date-rfc5424%"; } else if(ln_parseISODate(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%date-iso%"; } else if(ln_parsev2IPTables(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%v2-iptables%"; } else if(ln_parseNameValue(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%name-value-list%"; } else if(ln_parseCiscoInterfaceSpec(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%cisco-interface-spec%"; } else if(ln_parseCEESyslog(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%cee-syslog%"; } else if(ln_parseJSON(buf, buflen, &i, NULL, &nproc, NULL) == 0) { tocopy = "%json%"; } else { tocopy = NULL; nproc = 1; } /* copy to output buffer */ if(tocopy == NULL) { bufout[iout++] = buf[i]; } else { tocopylen = strlen(tocopy); // do this in lower lever memcpy(bufout+iout, tocopy, tocopylen); iout += tocopylen; } i += nproc; } bufout[iout] = '\0'; ++lnCnt; } int processFile(FILE *fp) { char lnbuf[MAXLINE]; char lnpreproc[MAXLINE]; while(!feof(fp)) { reportProgress("reading"); size_t i; for(i = 0 ; i < sizeof(lnbuf)-1 ; ++i) { const int c = fgetc(fp); if(c == EOF || c == '\n') break; lnbuf[i] = c; } lnbuf[i] = '\0'; if(i > 0) { //processLine(lnbuf, i, &logrec); //logrecPrint(logrec); preprocessLine(lnbuf, i, lnpreproc); treeAddLine(lnpreproc); } } treePrint(root, 0); treeDetectSubwords(root); treeSquash(root); treePrint(root, 0); rule_table_t *const rt = treeCreateRuleTable(root); reportProgress("sorting rule table"); qsort(rt->entries, (size_t) rt->nxtEtry, sizeof(rule_table_etry_t*), qs_comp_rt_etry); ruleTablePrint(rt); ruleTableDestroy(rt); //treePrintTerminals(root); reportProgress(NULL); return 0; } #define OPT_PRINT_TREE 1000 #define OPT_PRINT_DEBUG_OUTPUT 1001 #define OPT_SORT_MULTIVALUES 1002 int main(int argc, char *argv[]) { int r; int ch; static const struct option longopts[] = { { "report-progress", no_argument, 0, 'p' }, { "print-tree", no_argument, 0, OPT_PRINT_TREE }, { "print-debug-output", no_argument, 0, OPT_PRINT_DEBUG_OUTPUT }, { "sort-multivalues", required_argument,0, OPT_SORT_MULTIVALUES }, { NULL, 0, 0, 0 } }; setvbuf(stdout, NULL, _IONBF, 0); while ((ch = getopt_long(argc, argv, "p", longopts, NULL)) != -1) { switch (ch) { case 'p': /* file to log */ displayProgress = 1; break; case OPT_PRINT_TREE: optPrintTree = 1; break; case OPT_PRINT_DEBUG_OUTPUT: optPrintDebugOutput = 1; break; case OPT_SORT_MULTIVALUES: if(!strcmp(optarg, "enabled")) optSortMultivalues = 1; else if(!strcmp(optarg, "disabled")) optSortMultivalues = 0; else { fprintf(stderr, "invalid value '%s' for --sort-multivalues." "Valid: \"enabled\", \"disabled\"\n", optarg); exit(1); } break; case '?': default: // usage(stderr); fprintf(stderr, "invalid option"); break; } } root = logrec_newNode(wordinfoNew(strdup("[ROOT]")), NULL); r = processFile(stdin); return r; } liblognorm-2.1.0/tools/squashml.c000066400000000000000000000026421520037563000170240ustar00rootroot00000000000000/* a small tool to squash multiline message - probably to be removed * later and integrated into the "mainstream" tools. * Copyright (C) 2015 by Rainer Gerhards * Released under ASL 2.0 */ #include #include #include #include #include char * getmsg(regex_t *const preg, char *const buf, size_t len) { static size_t lenln = 0; static char lnbuf[1024*64]; size_t iDst = 0; int nlines = 0; if(lenln) { /* have previous segment? */ memcpy(buf+iDst, lnbuf, lenln); iDst += lenln; ++nlines; } while(fgets(lnbuf, sizeof(lnbuf), stdin)) { lenln = strlen(lnbuf); if(lnbuf[lenln-1] == '\n') { lnbuf[lenln-1] = '\0'; lenln--; } const int is_match = !regexec(preg, lnbuf, 0, NULL, 0); if(is_match) { break; /* previous message complete */ } else { if(iDst != 0) { buf[iDst++] = '\\'; buf[iDst++] = 'n'; } memcpy(buf+iDst, lnbuf, lenln); iDst += lenln; ++nlines; } } if(nlines == 0 && lenln > 0) { /* handle single lines */ memcpy(buf+iDst, lnbuf, lenln); iDst += lenln; lenln = 0; } buf[iDst] = '\0'; } int main(int argc, char *argv[]) { if(argc != 2) { fprintf(stderr, "usage: squashml regex\n"); exit(1); } regex_t preg; if(regcomp(&preg, argv[1], REG_EXTENDED)) { perror("regcomp"); exit(1); } char msg[1024*256]; while(!feof(stdin)) { getmsg(&preg, msg, sizeof(msg)); printf("%s\n", msg); } } liblognorm-2.1.0/tools/syntaxes.c000066400000000000000000000042241520037563000170430ustar00rootroot00000000000000/* Syntax "detectors" * * Copyright 2015 Rainer Gerhards * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "config.h" #include #include #include "syntaxes.h" /* returns -1 if no integer found, else integer */ static int64_t getPosInt(const char *const __restrict__ buf, const size_t buflen, size_t *const __restrict__ nprocessed) { int64_t val = 0; size_t i; for(i = 0 ; i < buflen ; ++i) { if('0' <= buf[i] && buf[i] <= '9') val = val*10 + buf[i]-'0'; else break; } *nprocessed = i; if(i == 0) val = -1; return val; } /* 1 - is IPv4, 0 not */ int syntax_ipv4(const char *const __restrict__ buf, const size_t buflen, const char *extracted, size_t *const __restrict__ nprocessed) { int64_t val; size_t nproc; size_t i; int r = 0; val = getPosInt(buf, buflen, &i); if(val < 1 || val > 255) goto done; if(buf[i] != '.') goto done; i++; val = getPosInt(buf+i, buflen-i, &nproc); if(val < 0 || val > 255) goto done; i += nproc; if(buf[i] != '.') goto done; i++; val = getPosInt(buf+i, buflen-i, &nproc); if(val < 0 || val > 255) goto done; i += nproc; if(buf[i] != '.') goto done; i++; val = getPosInt(buf+i, buflen-i, &nproc); if(val < 0 || val > 255) goto done; i += nproc; //printf("IP Addr[%zd]: '%s'\n", i, buf); *nprocessed = i; r = 1; done: return r; } /* 1 - is positive integer, 0 not */ int syntax_posint(const char *const __restrict__ buf, const size_t buflen, const char *extracted, size_t *const __restrict__ nprocessed) { int64_t val; size_t i; int r = 0; val = getPosInt(buf, buflen, &i); if(val == -1) goto done; *nprocessed = i; r = 1; done: return r; } liblognorm-2.1.0/tools/syntaxes.h000066400000000000000000000016411520037563000170500ustar00rootroot00000000000000/* Syntax "detectors" * * Copyright 2015 Rainer Gerhards * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* 1 - is IPv4, 0 not */ int syntax_ipv4(const char *const __restrict__ buf, const size_t buflen, const char *extracted, size_t *const __restrict__ nprocessed); int syntax_posint(const char *const __restrict__ buf, const size_t buflen, const char *extracted, size_t *const __restrict__ nprocessed);