pax_global_header00006660000000000000000000000064130334504300014505gustar00rootroot0000000000000052 comment=b6f002555ac8d4c26ed3bc7832f53259b4f3e262 genwqe-user-4.0.18/000077500000000000000000000000001303345043000140415ustar00rootroot00000000000000genwqe-user-4.0.18/.gitignore000066400000000000000000000000271303345043000160300ustar00rootroot00000000000000*.o *.a *.d *.so.* *.sogenwqe-user-4.0.18/.gitmodules000066400000000000000000000001261303345043000162150ustar00rootroot00000000000000[submodule "ext/libcxl"] path = ext/libcxl url = https://github.com/ibm-capi/libcxl genwqe-user-4.0.18/.travis.yml000066400000000000000000000003211303345043000161460ustar00rootroot00000000000000sudo: required dist: trusty language: c compiler: gcc before_install: - sudo apt-get -qq update - sudo apt-get --assume-yes install gcc help2man git make zlib1g-dev script: - make - make test_software genwqe-user-4.0.18/Jenkinsfile000066400000000000000000000001651303345043000162270ustar00rootroot00000000000000node { stage 'checkout' checkout scm stage 'build' sh 'make' stage 'test' sh 'make test_software' } genwqe-user-4.0.18/LICENSE000066400000000000000000000261361303345043000150560ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. genwqe-user-4.0.18/Makefile000066400000000000000000000110501303345043000154760ustar00rootroot00000000000000# # Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # include config.mk # Verbose level: # V=0 means completely silent # V=1 means brief output # V=2 means full output V ?= 2 include config.mk ifeq ($(V),0) Q := @ MAKEFLAGS += --silent MAKE += -s endif ifeq ($(V),1) MAKEFLAGS += --silent MAKE += -s endif PLATFORM ?= $(shell uname -i) distro = $(shell lsb_release -d | cut -f2) subdirs += lib tools misc ifdef WITH_LIBCXL subdirs += init endif UDEV_RULES_D ?= /etc/udev/rules.d MODPROBE_D ?= /etc/modprobe.d all: $(subdirs) # Rules for the recursive build tools misc: lib # z_ prefixed version of libz, intended to be linked statically with # our libz version to provide the software zlib functionality. # ifeq ($(CONFIG_DLOPEN_MECHANISM),0) HAS_WGET = $(shell which wget > /dev/null 2>&1 && echo y || echo n) HAS_CURL = $(shell which curl > /dev/null 2>&1 && echo y || echo n) OBJCOPY = @printf "\t[OBJCP]\t%s\n" `basename "$@"`; $(CROSS)objcopy define Q @/bin/echo -e " [$1]\t$(2)" @$(3) endef lib: zlib-1.2.8/libz.so zlib-1.2.8/libz.so: zlib-1.2.8.cfg @/bin/echo -e " [BUILD]\tzlib-1.2.8" @$(MAKE) -C zlib-1.2.8 1>&2 > /dev/null zlib-1.2.8.cfg: zlib-1.2.8.tar.gz @/bin/echo -e " [TAR]\t$<" @tar xfz $< @/bin/echo -e " [CFG]\tzlib-1.2.8" @(cd zlib-1.2.8 && CFLAGS=-O2 ./configure --prefix=/opt/genwqe) \ 1>&2 > /dev/null @touch zlib-1.2.8.cfg zlib-1.2.8.tar.gz: ifeq (${HAS_WGET},y) $(call Q,WGET,zlib-1.2.8.tar.gz, wget -O zlib-1.2.8.tar.gz -q http://www.zlib.net/zlib-1.2.8.tar.gz) else ifeq (${HAS_CURL},y) $(call Q,CURL,zlib-1.2.8.tar.gz, curl -o zlib-1.2.8.tar.gz -s http://www.zlib.net/zlib-1.2.8.tar.gz) endif endif # Only build if the subdirectory is really existent .PHONY: $(subdirs) install $(subdirs): @if [ -d $@ ]; then \ $(MAKE) -C $@ C=0 || exit 1; \ fi rpmbuild_setup: @mkdir -p ~/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS} $(RM) ~/.rpmmacros echo '%_topdir %(echo $$HOME)/rpmbuild' > ~/.rpmmacros # # Create required tar.gz archive and copy everything to the right # places. Create version.mk since the Fedora build system requires # running without git. # rpmbuild: @$(MAKE) -s distclean @rm -rf /tmp/genwqe-user-$(RPMVERSION) @mkdir -p /tmp/genwqe-user-$(RPMVERSION) @cp -ar * /tmp/genwqe-user-$(RPMVERSION)/ (cd /tmp && tar cfz v$(RPMVERSION).tar.gz genwqe-user-$(RPMVERSION)) rpmbuild -ta -v /tmp/v$(RPMVERSION).tar.gz @rm -rf /tmp/genwqe-user-$(RPMVERSION) $(RM) /tmp/v$(RPMVERSION).tar.gz # Install/Uninstall install uninstall: @for dir in $(subdirs); do \ if [ -d $$dir ]; then \ $(MAKE) -C $$dir $@ || exit 1; \ fi \ done install_udev_rules: mkdir -p $(UDEV_RULES_D) cp etc/udev/rules.d/52-genwqedevices.rules $(UDEV_RULES_D)/ uninstall_udev_rules: $(RM) $(UDEV_RULES_D)/52-genwqedevices.rules install_modprobe_d: mkdir -p $(MODPROBE_D) cp etc/modprobe.d/genwqe.conf $(MODPROBE_D)/ uninstall_modprobe_d: $(RM) $(MODPROBE_D)/genwqe.conf help: @echo "Build GenWQE/CAPI hardware accelerator tools" @echo @echo "Possible Makefile options:" @echo " V=0 silent, 1 normal (default), 2 verbose" @echo " FORCE_32BIT=0 64-bit (default), 1 32-bit" @echo " BUILD_SIMCODE=1 use pslse version of libcxl, 0 use libcxl " @echo " (default)" @echo " CONFIG_DLOPEN_MECHANISM=0 statically link against private" @echo " software zlib, 1 use dlopen to include software zlib" @echo " (default)" @echo # Automatically perform tests - card selected automatically test: test_hardware test_software test_hardware: all ./misc/basic_hardware_tests.sh test_software: all ./misc/basic_software_tests.sh distclean: clean @$(RM) -r sim_* zlib-1.2.8 zlib-1.2.8.tar.gz clean: @for dir in $(subdirs); do \ if [ -d $$dir ]; then \ $(MAKE) -C $$dir $@ || exit 1; \ fi \ done @$(RM) genwqe-$(RPMVERSION).tar.gz libz.o libz_prefixed.o zlib-1.2.8.cfg @if [ -d zlib-1.2.8 ]; then \ $(MAKE) -s -C zlib-1.2.8 distclean; \ fi @find . -depth -name '*~' -exec rm -rf '{}' \; -print @find . -depth -name '.#*' -exec rm -rf '{}' \; -print @$(RM) *.bin *.log genwqe-user-4.0.18/README.md000066400000000000000000000047051303345043000153260ustar00rootroot00000000000000genwqe-user =========== GenWQE (Generic Work Queue Engine) software supports the IBM PCIe3 FPGA or CAPI Compression Accelerator Adapters to speed up processing of the DEFLATE compression algorithm. This repository contains the source code to test, maintain and update the GenWQE PCIe/CAPI cards. Furthermore a zlib version with hardware acceleration is provided to do zlib style compression/decompression according to [RFC1950](https://www.ietf.org/rfc/rfc1950.txt), [RFC1951](https://www.ietf.org/rfc/rfc1951.txt) and [RFC1952](https://www.ietf.org/rfc/rfc1952.txt). This can be used as alternative to the traditional software zlib. The GenWQE PCIe or CAPI cards can currently be acquired as options to the latest IBM System p machines: + EJ12 full-height: [PCIe3 FPGA Compression Accelerator Adapter (FC EJ12; CCIN 59AB)](http://www-01.ibm.com/support/knowledgecenter/POWER8/p8hcd/fcej12.htm?cp=POWER8%2F3-3-9-1-1-44) + EJ13 low-profile: [PCIe3 LP FPGA Compression Accelerator Adapter (FC EJ13; CCIN 59AB)](http://www-01.ibm.com/support/knowledgecenter/POWER8/p8hcd/fcej13.htm?cp=POWER8%2F1-2-9-1-1-50&lang=en) And here the CAPI version of the adapter: + EJ1A full-heigh and EJ1B low-profile: [CAPI Compression Accelerator Adapter (FC EJ1A and EJ1B; CCIN 2CF0)](http://www.ibm.com/support/knowledgecenter/POWER8/p8hcd/fcej1a.htm) If you like to contribute to this project, please fill out and sign one of our contributor license agreements to be found in /licenses and send this back to us before sending us contributions. Additional documentation can be found at the [IBM Knowledgecenter](http://www-01.ibm.com/support/knowledgecenter/linuxonibm/liabt/liabtkickoff.htm). A programming and usage guide for the hardware accelerated zlib can be downloaded here: [Generic Work Queue Engine (GenWQE) Application Programming Guide](https://www.ibm.com/developerworks/community/blogs/fe313521-2e95-46f2-817d-44a4f27eba32/entry/Generic_Work_Queue_Engine_GenWQE_Application_Programming_Guide?lang=en). The User's guide for the CAPI Compression Accelerator Adapter (FC EJ1A and EJ1B; CCIN 2CF0) can be found here: [CAPI accelerated GZIP Compression Adapter User’s guide](https://www.ibm.com/developerworks/community/wikis/home?lang=en#!/wiki/W51a7ffcf4dfd_4b40_9d82_446ebc23c550/page/CAPI%20accelerated%20GZIP%20Compression%20Adapter%20User’s%20guide). Possible distributors: If you take a snapshot of this into one of your releases, please let us know such that we can sync up testing. Thanks. genwqe-user-4.0.18/config.mk000066400000000000000000000116001303345043000156350ustar00rootroot00000000000000# # Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Verbosity level: # V=0 means completely silent # V=1 means brief output # V=2 means full output # V ?= 1 CC = $(CROSS)gcc AS = $(CROSS)as LD = $(CROSS)ld AR = $(CROSS)ar RANLIB = $(CROSS)ranlib OBJCOPY = $(CROSS)objcopy OBJDUMP = $(CROSS)objdump STRIP = $(CROSS)strip NM = $(CROSS)nm HELP2MAN = help2man ifeq ($(V),0) Q := @ MAKEFLAGS += --silent MAKE += -s endif ifeq ($(V),1) MAKEFLAGS += --silent MAKE += -s CC = printf "\t[CC]\t%s\n" `basename "$@"`; $(CROSS)gcc AS = printf "\t[AS]\t%s\n" `basename "$@"`; $(CROSS)as AR = printf "\t[AR]\t%s\n" `basename "$@"`; $(CROSS)ar LD = printf "\t[LD]\t%s\n" `basename "$@"`; $(CROSS)ld OBJCOPY = printf "\t[OBJCOPY]\t%s\n" `basename "$@"`; $(CROSS)objcopy else CLEAN = echo -n endif # # If we can use git to get a version, we use that. If not, we have # no repository and set a static version number. # # NOTE Keep the VERSION for the non git case in sync with the git # tag used to build this code! # HAS_GIT = $(shell git describe > /dev/null 2>&1 && echo y || echo n) ifeq (${HAS_GIT},y) VERSION ?= $(shell git describe --abbrev=4 --always --tags | sed -e 's/v//g') RPMVERSION ?= $(shell git describe --abbrev=0 --tags | cut -c 2-7) else VERSION=4.0.18 RPMVERSION=$(VERSION) endif MAJOR_VERS=$(shell echo $(VERSION) | cut -d'.' -f1) PLATFORM ?= $(shell uname -i) CFLAGS ?= -W -Wall -Werror -Wwrite-strings -Wextra -O2 -g \ -Wmissing-prototypes # -Wstrict-prototypes -Warray-bounds CFLAGS += -DGIT_VERSION=\"$(VERSION)\" \ -I. -I../include -I../include/linux/uapi -D_GNU_SOURCE=1 # Force 32-bit build # This is needed to generate the code for special environments. We have # some 64-bit machines where we need to support binaries compiled for # 32-bit. # # FORCE_32BIT=0 Use machine default # FORCE_32BIT=1 Enforce 32-bit build # ifeq ($(PLATFORM),x86_64) FORCE_32BIT ?= 0 ifeq ($(FORCE_32BIT),1) CFLAGS += -m32 LDFLAGS += -m32 XLDFLAGS = -melf_i386 ARFLAGS = else CFLAGS += -m64 LDFLAGS += -m64 XLDFLAGS = -melf_x86_64 ARFLAGS = endif else ARFLAGS = endif # Libcxl is required to run the CAPI version of this code. Libcxl is # available for normal CAPI/PCIe device usage, but also as simulation # version, which connects to the pslse server, which talks to the # hardware simulator. # # libcxl is enabled by default on architectures that support # libcxl (ppc64le). # # If you need to disable it, you can run Make with DISABLE_LIBCXL=1. # # If you want to use the bundled version of libcxl (*not recommended*), # run make with BUNDLE_LIBCXL=1. If your bundle is in some place other # than ../ext/libcxl, you can use CONFIG_LIBCXL_PATH to fix it. # # If you want to use the simulation (pslse) version of libcxl, run with # BUILD_SIMCODE=1. If your bundle is in some place other than # ../../pslse/libcxl, you can use CONFIG_LIBCXL_PATH to fix it. # # # libcxl cannot be enabled on platforms that don't have CAPI support. ifndef DISABLE_LIBCXL ifeq ($(PLATFORM), ppc64le) WITH_LIBCXL=1 endif ifeq ($(PLATFORM), ppc64) WITH_LIBCXL=1 endif ifdef BUILD_SIMCODE WITH_LIBCXL=1 BUNDLE_LIBCXL ?= 1 CONFIG_LIBCXL_PATH ?= ../../pslse/libcxl CFLAGS += -DCONFIG_BUILD_SIMCODE -I../ext/include endif CFLAGS += -I../include # Can be overwritten by makfile option ifeq ($(BUNDLE_LIBCXL),1) WITH_LIBCXL=1 CONFIG_LIBCXL_PATH ?= ../ext/libcxl CFLAGS += -I../ext/include endif # Finally, set any path needed. ifdef CONFIG_LIBCXL_PATH CFLAGS += -I$(CONFIG_LIBCXL_PATH) -I$(CONFIG_LIBCXL_PATH)/include LDFLAGS += -L$(CONFIG_LIBCXL_PATH) libcxl_a = $(CONFIG_LIBCXL_PATH)/libcxl.a endif # !CONFIG_LIBCXL_PATH endif # !DISABLE_LIBCXL # z_ prefixed version of libz, intended to be linked statically with # our libz version to provide the software zlib functionality. # Allow overwriting the ZLIB_PATH to the right libz.so file e.g. via # spec file during RPM build. # CONFIG_DLOPEN_MECHANISM ?= 1 # CONFIG_ZLIB_PATH ?= /usr/lib64/libz.so.1 CONFIG_ZLIB_PATH ?= $(shell /sbin/ldconfig -p | grep libz.so.1 | cut -d' ' -f4 | head -n1) ifeq ($(CONFIG_DLOPEN_MECHANISM),1) CFLAGS += -DCONFIG_DLOPEN_MECHANISM -DCONFIG_ZLIB_PATH=\"$(CONFIG_ZLIB_PATH)\" else CONFIG_LIBZ_PATH=../zlib-1.2.8 CFLAGS += -I$(CONFIG_LIBZ_PATH) libz_a=libz_prefixed.o endif DESTDIR ?= /usr LIB_INSTALL_PATH ?= $(DESTDIR)/lib64/genwqe INCLUDE_INSTALL_PATH ?= $(DESTDIR)/include/genwqe MAN_INSTALL_PATH ?= $(DESTDIR)/share/man/man1 genwqe-user-4.0.18/ext/000077500000000000000000000000001303345043000146415ustar00rootroot00000000000000genwqe-user-4.0.18/ext/libcxl/000077500000000000000000000000001303345043000161165ustar00rootroot00000000000000genwqe-user-4.0.18/include/000077500000000000000000000000001303345043000154645ustar00rootroot00000000000000genwqe-user-4.0.18/include/afu_regs.h000066400000000000000000000442471303345043000174430ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once __BEGIN_DECLS #define CGZIP_CR_DEVICE 0x00000602 /* 0x0000044c */ #define CGZIP_CR_VENDOR 0x00001014 #define CGZIP_CR_CLASS 0x00120000 #define MMIO_IMP_VERSION_REG 0x00000000ull /* Implementation Version Register (IVR) ===================================== 63..48 RO: Reserved 47..32 RO: AFU logic frequency, divided by 10000. Examples: 0x61A8 (25000): 250.00MHz 0x4E20 (20000): 200.00MHz 0x4650 (18000): 180.00MHz 0x411B (16667): 166.67MHz Note: The PSL interface and job control interfaces are always clocked with 250MHz. 31..24 RO: Build Year (decade, BCD coded) 0x15: 2015 23..16 RO: Build Month (BCD coded) Example: 0x10: October 15...8 RO: Build Day (BCD coded) 0x24: 24th 7...4 RO: Reserved 3...0 RO: Build Count (binary, count from 0) 0xE: 15th build on that day POR value depends on build date. Example for 180MHz, 7th build on October 31st, 2015: 0x00004650_15103106 */ #define MMIO_APP_VERSION_REG 0x00000008ull /* AFU Version Register (AVR) ========================== 63..40 RO: Release ID (optional): Release Identifier or mkrel release name or otherwise designator that uniquely identifies how to retrieve the VHDL source code that produced this AFU. Higher numbers are later versions. 39..32 RO: Application Layer Architecture 0x02: GZIP DDCB with dynamic Huffman support 0x03: GZIP DDCB with dynamic Huffman support and MMIO driven queue 31...0 RO: Application Identifier 0x475A4950: GZIP POR value: 0x00000003_475A4950 */ #define MMIO_AFU_CONFIG_REG 0x00000010ull /* Time Slice Register (TSR) ========================= 63...0 RW: Minimum available time slice per context POR value: 0x00000000_00000200 corresponds to 524288 ns X * 1/200Mhz (X = 131072 * 1 / 200 Mhz = 524288 nsec) */ #define MMIO_AFU_STATUS_REG 0x00000018ull /* AFU Status Register (ASR) ======================== 63..14 RO: Reserved 13...8 Non-fatal Master Access errors: 13 RC: MMIO Cfg Write access (always illegal) 12 RO: Reserved 11 RC: Illegal MMIO write address 10 RC: Illegal MMIO write alignment 9 RC: Illegal MMIO read address 8 RC: Illegal MMIO read alignment 7...5 RO: Reserved 4 RO: DEBUG REGISTER was written (to be removed!) 3 RO: A config register (e.g. DTR) was written 2 RO: A Huffman encoding register was written 1 RO: An Aggravator Register was written O RO: An Error Injection register was written */ #define MMIO_AFU_COMMAND_REG 0x00000020ull /* AFU Command Register (ACR) ========================= 63...4 RO: Reserved 3...0 RW: Command Legal commands are: 0x4 Abort: Abort current DDCB and set accelerator to finished immediately (asserting aXh_jdone) 0x2 Stop: Finish current DDCB, then set accelerator to finished (asserting aXh_jdone) 0x0 NOP */ #define MMIO_FRT_REG 0x00000080ull /* Free run timer X * 1/200Mhz */ /* Freerunning Timer (FRT) ====================== 63...0 RO: Counter counting the number of clock cycles since reset (afu open) This counter increments with the 250MHz PSL clock. */ #define MMIO_DDCB_TIMEOUT_REG 0x00000088ull /* X * 1/180Mhz (set to */ /* DDCB Timeout Register (DTR) ========================== 63 RW: Enable DDCB Timeout checking 62..32 RO: Reserved 31...0 RW: DDCB Timeout value (this value decrements with 180MHz clock) POR value: 0x80000000_0ABA9500 timeout enabled to 1s */ #define MMIO_DDCB_CID_REG 0x00000120ull /* Context ID REG */ /* Master Context Register (MCR) ============================ Address: 0x0000120 63..32 RO: Reserved 63 RO: Set to '1' for master register 62..26 RO: Reserved 25..16 RO: Current context id (10 bits corresponding to 512 contexts) 15...0 RO: 0xffff for Master access */ #define MMIO_FIR_REGS_BASE 0x00001000ull /* FIR: 1000...1028 */ /* Job-Manager FIRs ================ Address: 0x0001000 63..6 RO: Reserved 5 RC: EA Parity Error 4 RC: COM Parity Error 3 RC: DDCB Read FSM Error 2 RC: DDCB Queue Control FSM Error 1 RC: Job Control FSM Error 0 RC: Context Control FSM Error MMIO FIRs ========= Address: 0x0001008 63..10 RO: Reserved 9 RC: MMIO DDCBQ Work-Timer RAM Parity Error 8 RC: MMIO DDCBQ DMA-Error RAM Parity Error 7 RC: MMIO DDCBQ Last Sequence Number RAM Parity Error 6 RC: MMIO DDCBQ Index and Sequence Number RAM Parity Error 5 RC: MMIO DDCBQ Non-Fatal-Error RAM Parity Error 4 RC: MMIO DDCBQ Status RAM Parity Error 3 RC: MMIO DDCBQ Config RAM Parity Error 2 RC: MMIO DDCBQ Start Pointer RAM Parity Error 1 RC: MMIO Write Address Parity Error 0 RC: MMIO Write Data Parity Error DMA FIRs ======== Address: 0x0001010 63..10 RO: Reserved 9 RC: DMA Aligner Write FSM Error 8 RC: DMA Aligner Read FSM Error 7 RO: Reserved 6 RC: HA Buffer Interface Write Data Error 5 RC: HA Buffer Interface Write Tag Error 4 RC: HA Buffer Interface Read TAG Error 3 RC: HA Response Interface Tag Error 2 RC: DMA Write Control FSM Error 1 RC: DMA Read Control FSM Error 0 RC: AH Command FSM Error DDCB-Manager FIRs ================= Address: 0x0001018 63..31 RO: Reserved 30 RC: Dictionary Size Error 29 RC: Decompression Dictionary Count Parity Error or Dictionary Words To Write Parity Error 28 RC: Copy Length Parity Error 27 RC: Copy Length Decompression Parity Error 26 RC: Compression Dictionary Error 25 RC: Checker: Write Data Parity Error 24 RC: Checker: Read Data Parity Error 23..22 RO: Reserved 21 RC: Copy Length Compression Parity Error 20 RC: Data Read Counter Parity Error 19 RC: Data Write Counter Parity Error 18 RC: Compression Data Buffer Read On Empty Fifo 2 17 RC: Compression Data Buffer Read On Empty Fifo 1 16 RC: Compression Data Buffer Overrun 15..13 RO: Reserved 12 RC: Compression Checker: Write On Full Fifo 11 RC: Compression Checker: Read On Empty Fifo 10 RC: Compression Checker: Write On Full Big Fifo 9 RC: Compression Checker: Read On Empty Big Fifo 8 RC: Compression Checker Compare Error 7 RO: Reserved 6 RC: SQB Data Out Parity Error 5 RC: DDCB Manager Register Parity Fail 4 RC: Bad AC Function ID 3 RC: Compression Dictionary Data Parity Error 2 RC: DDCB Data Error 1 RC: DDCB Manager State Machine 1 Error 0 RC: DDCB Manager State Machine 0 Error Compression FIRs ================= Address: 0x0001020 63...9 RO: Reserved 8 RC: EOB Symbol Width Equal Zero 7 RC: Huffman Output Buffer Underrun 6 RC: Huffman Output Buffer Overrun 5 RC: Huffman Input Buffer Underrun 4 RC: Huffman Input Buffer Overrun 3...2 RO: Reserved 1 RC: More Than 1032 Bytes Taken 0 RC: Parity Error Data In Decompression FIRs ================= Address: 0x0001028 63..21 RO: Reserved 20 RC: Slave RAS Error 19 RC: Master RAS Error 18 RC: Data Cross Check Error 17 RC: Dictionary Read Data Cross Check Error 16 RC: Decompression Control Cross Check Error 15 RC: Decompression Control Slave IVL Count Error 14 RC: Decompression Control Slave Dictionary Read Address Parity Error 13...8 RO: Reserved 7 RC: Decompression Control Master IVL Count Error 6 RC: Decompression Control Master Dictionary Read Address Parity Error 5...0 RO: Reserved */ #define MMIO_FIR_REGS_NUM 6 #define MMIO_ERRINJ_MMIO_REG 0x00001800ull /* Error Injection Job-Manager =========================== Address: 0x0001800 63..17 RO: Reserved 16 RS: Force DDCBQ Ctrl State Machine Hang 15...0 RO: Reserved Error Injection MMIO ==================== Address: 0x0001808 63...1 RO: Reserved 16 RS: Inject MMIO Read Response Data Parity error into PSL interface 15...1 RO: Reserved 0 RS: Inject MMIO Write Data Parity error Error Injection DMA =================== Address: 0x0001810 63..22 RO: Reserved 21 RS: Inject error into DMA write path (flip data bit) 20 RS: Inject error into DMA read path (flip data bit) 19 RS: Inject parity error into command on AH Command Bus to PSL 18 RS: Inject parity error into effective address on AH Command Bus to PSL 17 RS: Inject parity error into response on AH Buffer Interface to PSL 16 RS: Inject parity error into response tag on AH Command Bus to PSL 15-..0 RO: Reserved */ #define MMIO_ERRINJ_GZIP_REG 0x00001818ull /* Error Injection GZIP ==================== Address: 0x0001818 63..17 RO: Reserved 16 RS: Inject error into compression/decompression checker (force miscompare) 15...1 RO: Reserved 0 RS: Inject error into compression dictionary */ #define MMIO_AGRV_REGS_BASE 0x00002000ull /* Aggravator Register =================== Note: The value that is written into this register will be rotated left every cycle. Throttling is active in cycles where bit 63 equals '1'. Address: 0x0002000 63..0 RW: GZIP DATA READ Throttle Register Address: 0x0002008 63..0 RW: GZIP DATA WRITE Throttle Register Address: 0x0002010 63..0 RW: DMA DATA READ Throttle Register Address: 0x0002018 63..0 RW: DMA DATA WRITE Throttle Register Address: 0x0002020 63..0 RW: DMA FSM READ Throttle Register Address: 0x0002028 63..0 RW: DMA FSM WRITE Throttle Register Address: 0x0002030 63..0 RW: DMA FSM CMD Throttle Register */ #define MMIO_AGRV_REGS_NUM 7 #define MMIO_GZIP_REGS_BASE 0x2100ull /* GZIP Huffman Literal/Length Code Register ========================================= Address: 0x0002100 63..56 RW: RAM Address 28..24 RW: Literal/Length Code Width 19...0 RW: Literal/Length Code GZIP Huffman Distance Code Register =================================== Address: 0x0002108 63..56 RW: RAM Address 35..32 RW: Distance Extra Bit Width 27..24 RW: Distance Code Width 19...5 RW: Distance Code GZIP Huffman Decider Literal/Length Width Register ================================================== Address: 0x0002110 63..56 RW: RAM Address 39..35 RW: Literal/Length Code Width Tree 0 34..30 RW: Literal/Length Code Width Tree 1 29..25 RW: Literal/Length Code Width Tree 2 24..20 RW: Literal/Length Code Width Tree 3 19..15 RW: Literal/Length Code Width Tree 4 14..10 RW: Literal/Length Code Width Tree 5 9...5 RW: Literal/Length Code Width Tree 6 4...0 RW: Literal/Length Code Width Tree 7 GZIP Huffman Decider Distance Width Register ============================================ Address: 0x0002118 63..56 RW: RAM Address 39..35 RW: Distance Code Width Tree 0 34..30 RW: Distance Code Width Tree 1 29..25 RW: Distance Code Width Tree 2 24..20 RW: Distance Code Width Tree 3 19..15 RW: Distance Code Width Tree 4 14..10 RW: Distance Code Width Tree 5 9...5 RW: Distance Code Width Tree 6 4...0 RW: Distance Code Width Tree 7 GZIP Huffman Tree RAM Register ============================== Address: 0x0002120 63..56 RW: RAM Address RAM address bits 59:56 is used to address the position of the 40 bits Tree RAM Data inside the 160 bits. The list below shows the data position for each address value. --00b = 39:0 --01b = 79:40 --10b = 119:80 --11b = 159:120 39...0 RW: Tree RAM Data GZIP Huffman Decider Control Register ===================================== Address: 0x0002178 63..56 RW: RAM Address 24 RW: Enable Predefine Values * 1b = bit 20 and bits 18:16 are valid 20 RW: Use Predefine Huffman Tree * 1b = The GZIP Deflate Core Decider Logic is using the predefined Huffman tree (18:16). This tree will be used until the next write or the next power on. * 0b = The GZIP Deflate Core Decider Logic is no longer using the predefined Huffman tree 18..16 RW: Predefine Huffman Tree 12 RW: Enable Decider Window * 1b = overrides the GZIP Deflate Core Decider Logic Decider Window value. 10..0 RW: Maximum Decider Window * The Value after power on = 512 (6K). * The value of this field multiplied by 12 is the size of the Decider Window. * After a write, the Windows remains until the next power on. * A larger value could reduce the bandwidth of the GZIP Deflate Core. * In case of bad compressible input data, the Decider Window could be smaller as this value. This happens due to GZIP Deflate Core internal buffer sizes. In such a case the Decider Window is not exactly predictable. */ #define MMIO_GZIP_REGS_NUM 16 /* Context is active if bit is set */ #define MMIO_CASV_REG 0x00003000ull /* Address: 0x003000 + m * 0x000008 (m = 0,...,15) 63..32 RO: Reserved 31..0 RO: Context m*32+k is attached if (and only if) bit k is set. (for each k = 0,..,31) */ #define MMIO_CASV_REG_NUM 16 /* ATTACH Status REG: 0x3000 ... 0x3078 */ #define MMIO_CASV_REG_CTX 32 /* There are 32 bits in each of this regs */ #define MMIO_DEBUG_REG 0x0000FF00ull /* DEBUG REGISTER (to be removed!) =============================== Address: 0x000FF00 RW 0x000FF08 RC 0x000FF10 RS 63..4 Reserved 3 Enable Parity checking 2..0 PSL Translation Ordering behavior */ #define MMIO_CTX_OFFSET 0x00010000ull /* Offset for each Context */ #define MMIO_MASTER_CTX_NUMBER 0 #define MMIO_SLAVE_CTX_NUM 512 /***************************************** ** Slave PSA for Context n ** *****************************************/ /* Note Registers on Address 0x0000000 + (n+1) * 0x0010000 to * 0x0000080 + (n+1) * 0x0010000 * are the same as for the Master Context. They only will be Mapped RO. */ #define MMIO_DDCBQ_START_REG 0x00000100ull /* DDCB Queue Start Pointer Register (QSPR) ======================================== Address: 0x0000100 + (n+1) * 0x0010000 63...0 Pointer to start of DDCB queue in system memory 63...8 RW 7...0 RO: Always 0 POR value: 0x00000000_00000000 Value after afu_attach: WED pointer */ #define MMIO_DDCBQ_CONFIG_REG 0x00000108ull /* DDCB Queue Configuration Register (QCfgR) ======================================= ** This register must not be written while the DDCB queue is active ** ** A valid write operation into this register also resets the corresponding ** DDCB Queue Work Timer ** Address: 0x0000108 + (n+1) * 0x0010000 63..48 RW: First expected DDCB sequence number 47..32 RO: Reserved 31..24 RW: First DDCB index to execute. Must be <= Max DDCB index 23..16 RW: Max DDCB index 15...0 RO: Reserved POR value: 0x00000000_00000000 */ #define MMIO_DDCBQ_COMMAND_REG 0x00000110ull /* DDCB Queue Command Register (QCmdR) =================================== Address: 0x0000110 + (n+1) * 0x0010000 63..48 RW: Argument 47...4 RO: Reserved 3...0 RW: Command Legal commands are: 0x4 Abort: Stop all DDCB activities for this queue immediately (Argument: Don't care) 0x2 Stop: Finish current DDCB, then stop queue (Argument: Don't care) 0x1 Start: Execute DDCBs (Argument: must be set) 0x0 NOP POR value: 0x00000000_00000000 */ #define MMIO_DDCBQ_STATUS_REG 0x00000118ull /* DDCB Queue Status Register (QSR) ================================ Address: 0x0000118 + (n+1) * 0x0010000 63..48 RO: Current DDCB sequence number 47..32 RO: Last DDCB sequence number to be executed 31..24 RO: Current DDCB index. 23...8 Non-fatal errors: 23 RO: Reserved 22 RC: DMA Failed Error (see DMA Error Address Register for DMA address triggering the error) 21 RC: DMA Data Error (see DMA Error Address Register for DMA address triggering the error) 20 RC: DMA Address Error (see DMA Error Address Register for DMA address triggering the error) 19 RO: Reserved 18 RC: Received illegal command in DDCB Queue Command Register 17 RC: Invalid Sequence number in DDCB (queue will be stopped) 16 RC: Write attempt to DDCB Queue Start Pointer register while Queue active 15 RC: Write attempt to DDCB Queue Configuration register while Queue active 14 RC: Write attempt to DDCB Queue Configuration register with first DDCB index > max DDCB index 13 RC: MMIO Cfg Write access (always illegal) 12 RC: MMIO Write access to master register via slave address 11 RC: Illegal MMIO write address 10 RC: Illegal MMIO write alignment 9 RC: Illegal MMIO read address 8 RC: Illegal MMIO read alignment 7...6 RO: Reserved 5 RO: Currently executing DDCB 4 RO: Queue Active 1=fetching and executing DDCBs until last DDCB sequence number is reached 0=stopped 3...0 RO: Command that is currently being executed (see DDCB Queue Command Register) Value 0x0 (NOP) means: Currently, no command is active */ #define MMIO_DDCBQ_CID_REG 0x00000120ull /* Context ID REG */ /* Slave Context Register (SCR) ============================ Address: 0x0000120 + (n+1) * 0x0010000 63..32 RO: Reserved 31..26 RO: "000000" for Slave 25..16 RO: Current context id (10 bits corresponding to 512 contexts) 15..10 RO: "000000" for Slave access 9...0 RO: My context id (10 bits corresponding to 512 contexts) */ #define MMIO_DDCBQ_DMAE_REG 0x00000128ull /* DDCB Queue DMA Error Address Register (QDEAR) ============================================= Address: 0x0000128 + (n+1) * 0x0010000 63...0 RO: DMA address that caused the error */ #define MMIO_DDCBQ_WT_REG 0x00000180ull /* DDCB Queue Work Timer (QWT) =========================== Address: 0x0000180 + (n+1) * 0x0010000 63...0 RO: Counter counting the number of clock cycles during DDCB execution for this context (Counter gets reset with every valid DDCBQ CONFIG Register write access; the value is persistent during reset) This counter increments with the 250MHz PSL clock. */ __END_DECLS genwqe-user-4.0.18/include/ddcb.h000066400000000000000000000114401303345043000165310ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __DDCB_H__ #define __DDCB_H__ #include #include #include #ifdef __cplusplus extern "C" { #endif /** * SHI: Software to Hardware Interlock * This 1 byte field is written by software to interlock the * movement of one queue entry to another with the hardware in the * chip. */ #define DDCB_SHI_INTR 0x04 /* Bit 2 */ #define DDCB_SHI_PURGE 0x02 /* Bit 1 */ #define DDCB_SHI_NEXT 0x01 /* Bit 0 */ /* HSI: Hardware to Software interlock * This 1 byte field is written by hardware to interlock the movement * of one queue entry to another with the software in the chip. */ #define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */ #define DDCB_HSI_FETCHED 0x04 /* Bit 2 */ /** * Accessing HSI/SHI is done 32-bit wide * Normally 16-bit access would work too, but on some platforms the * 16 compare and swap operation is not supported. Therefore * switching to 32-bit such that those platforms will work too. * * iCRC HSI/SHI */ #define DDCB_INTR_BE32 __cpu_to_be32(0x00000004) #define DDCB_PURGE_BE32 __cpu_to_be32(0x00000002) #define DDCB_NEXT_BE32 __cpu_to_be32(0x00000001) #define DDCB_COMPLETED_BE32 __cpu_to_be32(0x00004000) #define DDCB_FETCHED_BE32 __cpu_to_be32(0x00000400) /* CRC polynomials for DDCB */ #define CRC16_POLYNOMIAL 0x1021 /* Definitions of DDCB presets */ #define DDCB_PRESET_PRE 0x80 #define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */ #define VCRC_LENGTH(n) ((n)) /* used ASV */ #define ASIV_LENGTH (0x80 - 0x18) /* 104 */ #define ASIV_LENGTH_ATS (0x80 - 0x20) /* 96 */ #define ASV_LENGTH (0xc0 - 0x80) /* 64 */ /* Interlock flags */ #define HSI_COMPLETED 0x40 #define HSI_FETCHED 0x04 #define SHI_NEXT 0x01 #define SHI_PURGE 0x02 #define SHI_INTR 0x04 /** * The fields are defined to be in big endian format. */ struct ddcb_t { union { __be32 icrc_hsi_shi_32; /**< CRC HW to SW/SW to HW Interlk */ struct { __be16 icrc_16; uint8_t hsi; uint8_t shi; }; }; uint8_t pre; /**< Preamble */ uint8_t xdir; /**< Execution Directives */ __be16 seqnum; /**< Sequence Number */ uint8_t acfunc; /**< Accelerator Function.. */ uint8_t cmd; /**< Command. */ __be16 cmdopts_16; /**< Command Options */ uint8_t sur; /**< Status Update Rate */ uint8_t psp; /**< Protection Section Pointer */ __be16 rsvd_0e; /**< Reserved invariant */ __be64 fwiv; /**< Firmware Invariant. */ union { uint8_t __asiv[ASIV_LENGTH]; /**< Appl Spec Invariant */ struct { __be64 ats_64; /**< Address Translation Spec */ uint8_t asiv[ASIV_LENGTH_ATS]; /**< New ASIV */ } n; }; /* Note: 2nd Cache line starts here. */ uint8_t asv[ASV_LENGTH]; /**< Appl Spec Variant */ __be16 rsvd_c0; /**< Reserved Variant */ __be16 vcrc_16; /**< Variant CRC */ __be32 rsvd; /**< Reserved unprotected */ __be64 deque_ts_64; /**< Deque Time Stamp. */ __be16 retc_16; /**< Return Code. Note Must be cleared by SW */ __be16 attn_16; /**< Attention/Extended Error Codes */ __be32 progress_32; /**< Progress indicator. */ __be64 cmplt_ts_64; /**< Completion Time Stamp. */ __be32 ibdc; __be32 obdc; __be64 rsvd_SLH; /**< Dispatch TimeStamp */ uint8_t priv8[8]; /**< Driver usage */ __be64 disp_ts_64; /**< Dispatch TimeStamp */ } __attribute__((__packed__)); typedef struct ddcb_t ddcb_t; #define DDCB_SIZE sizeof(ddcb_t) /** * @brief Generate 16-bit crc as required for DDCBs * polynomial = x^16 + x^12 + x^5 + 1 (0x1021) * - example: * 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff * should result in a crc16 of 0x89c3 * * @param buff pointer to data buffer * @param len length of data for calculation * @param init initial crc (0xffff at start) * * @return crc16 checksum in big endian format ! * * Example: icrc = ddcb_crc16((const uint8_t *)pddcb, * ICRC_LENGTH(cmd->asiv_length), 0xffff); */ static inline __be16 ddcb_crc16(const uint8_t *buff, size_t len, uint16_t init) { int i; uint16_t crc = init; while (len--) { crc = crc ^ (*buff++ << 8); for (i = 0; i < 8; i++) { if (crc & 0x8000) crc = (crc << 1) ^ CRC16_POLYNOMIAL; else crc = crc << 1; } } return crc; } #endif /* __DDCB_H__ */ genwqe-user-4.0.18/include/deflate_ddcb.h000066400000000000000000000172331303345043000202230ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __ZEDC_DDCB_H__ #define __ZEDC_DDCB_H__ /* * Description of the zEDC DDCB format for inflate and deflate. Each * DDCB references DMA memory for input, output and workspace. The * Driver takes care to replace the original user-space addresses in * to DMA addresses to raw memory or to create scatter-gather-lists to * describe the referenced memory. */ #include #include #include #include #ifdef __cplusplus extern "C" { #endif /**< DDCB commands */ #define ZEDC_CMD_INFLATE 0x01 #define ZEDC_CMD_DEFLATE 0x02 /**< DEFLATE command options */ #define DDCB_OPT_DEFL_IBUF_INDIR (1 << 0) #define DDCB_OPT_DEFL_OBUF_INDIR (1 << 1) #define DDCB_OPT_DEFL_SAVE_DICT (1 << 2) #define DDCB_OPT_DEFL_STATE_PROVIDED (1 << 3) #define DDCB_OPT_DEFL_SAVE_STATE (1 << 4) #define DDCB_OPT_DEFL_START_BLOCK (1 << 5) #define DDCB_OPT_DEFL_END_BLOCK (1 << 6) #define DDCB_OPT_DEFL_RAS_CHECK (1 << 15) /**< INFLATE command options */ #define DDCB_OPT_INFL_IBUF_INDIR (1 << 0) #define DDCB_OPT_INFL_OBUF_INDIR (1 << 1) #define DDCB_OPT_INFL_SAVE_DICT (1 << 2) #define DDCB_OPT_INFL_STATE_PROVIDED (1 << 3) #define DDCB_OPT_INFL_SAVE_STATE (1 << 4) #define DDCB_OPT_INFL_STOP_BLOCK (1 << 5) #define DDCB_OPT_INFL_STOP_TREE (1 << 6) #define DDCB_OPT_INFL_RAS_CHECK (1 << 15) /** * Workspace for deflate: * +-----------------------++-----------------------++ * | 32KiB dict || 32KiB dict || * | + 16 bytes shift area || + 16 bytes shift area || * +-----------------------++-----------------------++ * | wspace_page 0 | wspace_page 1 | * * Workspace for inflate: FIXME Currently no padding FIXME 480 and not 496 * +-----------------------++-----------------------++----------+----------+ * | 32KiB dict || 32KiB dict || 496 byte | 512 byte | * | + 16 bytes shift area || + 16 bytes shift area || padding | hdr | * +-----------------------++-----------------------++----------+----------+ * | wspace_page 0 | wspace_page 1 | */ /* * FIXME Are the definitions OK? Is the 0x8000 + 0x8000 OK? Are the * 496 padding bytes ok? */ /**< Additional Parameters */ #define ZEDC_DICT_LEN (0x8000 + 16) /* 32kb + 16 */ #define ZEDC_TREE_LEN (0x0200) /* real: <= 288(dec) */ #define ZEDC_DEFL_WORKSPACE_SIZE (2 * ZEDC_DICT_LEN) #define ZEDC_INFL_TREE_START (0x8000 + 0x8000 + ZEDC_TREE_LEN) #define ZEDC_INFL_WORKSPACE_SIZE (ZEDC_INFL_TREE_START + ZEDC_TREE_LEN) /* * Worksspace definition for inflate and deflate. */ struct zedc_wsp { uint8_t dict[2][ZEDC_DICT_LEN]; /* two dicts + extra bytes */ uint8_t tree[ZEDC_TREE_LEN]; /* FIXME should be 512 byte aligned */ }; #define ZEDC_ONUMBYTES_v0 (23) /* 0xa0 ... 0xb6 */ #define ZEDC_ONUMBYTES_v1 (24) /* 0xa0 ... 0xb7 */ #define ZEDC_ONUMBYTES_EXTRA (7) /* 0xb9 ... 0xbf */ #define ZEDC_INFL_AVAIL_IN_MAX (0xffffffff - 1023) /* 4GiB - 1KiB */ /** * Application specific invariant part of the DDCB (104 bytes: 0x18...0x7f) * see ZEDC Data Compression HLD spec 0.90: 5.3 Application DDCB Fields. */ /* ASIV specific part for compression (deflate) */ /* DDCB range: 0x18 ... 0x7f */ struct zedc_asiv_infl { uint64_t in_buff; /**< 0x20 inp buff DMA addr */ uint32_t in_buff_len; /**< 0x28 inp buff length */ uint32_t in_crc32; /**> 0x2C inp buff CRC32 */ uint64_t out_buff; /**< 0x30 outp buf DMA addr */ uint32_t out_buff_len; /**< 0x38 outp buf length */ uint32_t in_adler32; /**> 0x3C inp buff ADLER32 */ uint64_t in_dict; /**< 0x40 inp dict DMA addr. */ uint32_t in_dict_len; /**< 0x48 inp dict length */ uint32_t rsvd_0; /**< 0x4C reserved */ uint64_t inp_scratch; /**< 0x50 inp hdr/scr DMA addr */ uint32_t in_scratch_len; /**< 0x58 total used */ uint16_t in_hdr_bits; /**< 0x5C */ uint8_t hdr_ib; /**< 0x5E */ uint8_t scratch_ib; /**< 0x5F */ uint64_t out_dict; /**< 0x60 outp dict DMA addr */ uint32_t out_dict_len; /**< 0x68 outp dict length */ uint32_t rsvd_1; /**< 0x6C reserved */ uint64_t rsvd_2; /**< 0x70 reserved */ uint64_t rsvd_3; /**< 0x78 reserved */ } __attribute__((__packed__)) __attribute__((__may_alias__)); #define INFL_STAT_PASSED_EOB 0x01 #define INFL_STAT_RESERVED1 0x02 #define INFL_STAT_FINAL_EOB 0x04 #define INFL_STAT_REACHED_EOB 0x08 #define INFL_STAT_RESERVED2 0x10 #define INFL_STAT_HDR_TYPE1 0x20 /* Bit 5, see spec */ #define INFL_STAT_HDR_TYPE2 0x40 /* Bit 6, see spec */ #define INFL_STAT_HDR_TYPE (INFL_STAT_HDR_TYPE1 | INFL_STAT_HDR_TYPE2) #define INFL_STAT_HDR_BFINAL 0x80 /* DDCB range: 0x80 ... 0xbf */ struct zedc_asv_infl { uint16_t out_dict_used; /**> 0x80 */ uint16_t copyblock_len; /**> 0x82 */ uint8_t rsvd_84; /**> 0x84 */ uint8_t infl_stat; /**> 0x85 */ uint8_t rsvd_86; /**> 0x86 */ uint8_t proc_bits; /**> 0x87 */ uint32_t hdr_start; /**> 0x88 */ uint8_t rsvd_8c; /**> 0x8c */ uint8_t hdr_start_bits; /**> 0x8d */ uint16_t out_hdr_bits; /**> 0x8e */ uint32_t out_crc32; /**< 0x90 */ uint32_t out_adler32; /**< 0x94 */ uint32_t inp_processed; /**< 0x98 */ uint32_t outp_returned; /**< 0x9c */ uint64_t rsvd_a0[3]; /**> 0xa0, 0xa8, 0xb0 */ uint8_t out_dict_offs; /**> 0xb8 */ uint8_t rsvd_b9; /**> 0xb9 */ uint16_t obytes_in_dict; /**< 0xba */ uint16_t rsvd_bc; /**< 0xbc */ uint16_t rsvd_be; /**< 0xbe ... 0xbf */ }__attribute__((__packed__)) __attribute__((__may_alias__)); /* ASIV specific part for compression (deflate) */ /* DDCB range: 0x20 ... 0x7f */ struct zedc_asiv_defl { uint64_t in_buff; /**< 0x20 inp buff DMA addr */ uint32_t in_buff_len; /**< 0x28 inp buff length */ uint32_t in_crc32; /**< 0x2C inp buff CRC32 */ uint64_t out_buff; /**< 0x30 outp buff DMA addr */ uint32_t out_buff_len; /**< 0x38 outp buff length */ uint32_t in_adler32; /**< 0x3C inp buff ADLER32 */ uint64_t in_dict; /**< 0x40 inp dict DMA addr */ uint32_t in_dict_len; /**< 0x48 inp dict length */ uint32_t rsvd_0; /**< 0x4C reserved */ uint64_t rsvd_1; /**< 0x50 reserved */ uint64_t rsvd_2; /**< 0x58 reserved */ uint64_t out_dict; /**< 0x60 outp dict DMA addr */ uint32_t out_dict_len; /**< 0x68 outp dict length */ uint32_t rsvd_3; /**< 0x6C reserved */ uint64_t rsvd_4; /**< 0x70 reserved */ uint8_t ibits[7]; /**< 0x78 partial symbol */ uint8_t inumbits; /**< 0x7f valid bits (ibits) */ } __attribute__((__packed__)) __attribute__((__may_alias__)); /* ASV DDCB range: 0x80 ... 0xbf */ struct zedc_asv_defl { /* for deflate */ uint16_t out_dict_used; /**< 0x80 */ uint8_t resrv_1[5]; /**< 0x82 */ uint8_t onumbits; /**< 0x87 */ uint64_t resrv_2; /**< 0x88 */ uint32_t out_crc32; /**< 0x90 */ uint32_t out_adler32; /**< 0x94 */ uint32_t inp_processed; /**< 0x98 */ uint32_t outp_returned; /**< 0x9c */ uint8_t obits[ZEDC_ONUMBYTES_v1]; /**< 0xa0 ... 0xb7 */ uint8_t out_dict_offs; /**< 0xb8 */ uint8_t obits_extra[ZEDC_ONUMBYTES_EXTRA]; /**< 0xb9 ... 0xbf */ } __attribute__((__packed__)) __attribute__((__may_alias__)); #ifdef __cplusplus } #endif #endif /* __ZEDC_DDCB_H__ */ genwqe-user-4.0.18/include/deflate_fifo.h000066400000000000000000000061761303345043000202560ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __LIBZEDC_FIFO_H__ #define __LIBZEDC_FIFO_H__ /* * To store temporary data the deflate code uses the FIFO data * structure defined in this file. Storing data is required if the * output buffer in the zedc_stream struct is not sufficient to store * the produced data. This can happen e.g. for the ZLIB/GZIP header * data or the ADLER32 and CRC32/data-size trailer at the end of an * RFC1950, RFC1952 data stream. In case of RFC1951 data the header * and the end of stream symbols can be affected. If the last symbol * in an input stream produces more output bytes than the output * buffer can store, we also use this FIFO to temporarilly store the * data before it goes into the user provided output buffer. */ #include #include #include /* Must be n^2 and large engough to keep some spare bytes. */ #define ZEDC_FIFO_SIZE 256 #define ZEDC_FIFO_MASK (ZEDC_FIFO_SIZE - 1) struct zedc_fifo { unsigned int push; /* push into FIFO here */ unsigned int pop; /* pop from FIFO here */ uint8_t fifo[ZEDC_FIFO_SIZE]; /* FIFO storage */ }; static inline void fifo_init(struct zedc_fifo *fifo) { memset(fifo->fifo, 0x00, ZEDC_FIFO_SIZE); fifo->pop = fifo->push = 0; } static inline int fifo_empty(struct zedc_fifo *fifo) { return (fifo->pop == fifo->push); } static inline unsigned int fifo_used(struct zedc_fifo *fifo) { return ((fifo->push - fifo->pop) & ZEDC_FIFO_MASK); } static inline unsigned int fifo_free(struct zedc_fifo *fifo) { return ZEDC_FIFO_SIZE - fifo_used(fifo) - 1; /* keep 1 more free */ } static inline int fifo_push(struct zedc_fifo *fifo, uint8_t data) { if (fifo_free(fifo) < 1) return 0; fifo->fifo[fifo->push] = data; fifo->push = (fifo->push + 1) & ZEDC_FIFO_MASK; return 1; } static inline int fifo_push32(struct zedc_fifo *fifo, uint32_t data) { unsigned int i; union { uint32_t u32; uint8_t u8[4]; } d; if (fifo_free(fifo) < 4) return 0; d.u32 = data; for (i = 0; i < 4; i++) { fifo->fifo[fifo->push] = d.u8[i]; fifo->push = (fifo->push + 1) & ZEDC_FIFO_MASK; } return 1; } static inline int fifo_pop(struct zedc_fifo *fifo, uint8_t *data) { if (fifo_empty(fifo)) return 0; *data = fifo->fifo[fifo->pop]; fifo->pop = (fifo->pop + 1) & ZEDC_FIFO_MASK; return 1; } static inline int fifo_pop16(struct zedc_fifo *fifo, uint16_t *data) { unsigned int i; union { uint16_t u16; uint8_t u8[2]; } d; if (fifo_used(fifo) < 2) return 0; for (i = 0; i < 4; i++) fifo_pop(fifo, &d.u8[i]); *data = d.u16; return 1; } #endif /* __LIBZEDC_FIFO_H__ */ genwqe-user-4.0.18/include/genwqe_vpd.h000066400000000000000000000060761303345043000200050ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __GENWQE_VPD_H__ #define __GENWQE_VPD_H__ /** * @file genwqe_vpd.h * @brief application library for hardware access * @date 03/09/2012 * * The GenWQE PCIe card provides the ability to speed up tasks by * offloading data processing. It provides a generic work queue engine * (GenWQE) which is used to pass the requests to the PCIe card. The * requests are to be passed in form of DDCB commands (Device Driver * Control Blocks). The device driver is allocating the next free DDCB * from the hardware queue and converts the DDCB-request defined in * this file into a DDCB. Once the request is passed to the card, the * process/thread will sleep and will be awoken once the request is * finished with our without success or a timeout condition occurred. * * IBM Accelerator Family 'GenWQE' */ #ifdef __cplusplus extern "C" { #endif /* VPD layout for GENWQE */ struct vpd_layout { const char *label; const int length; const char *mode; }; #define VPD_VERSION 0x102 /* Reference Table for VPD layout 102 */ static const struct vpd_layout vpd_ref_102[] = { {.label = "RV", .length = 2, .mode="X"}, {.label = "PN", .length = 7, .mode="A"}, {.label = "EC", .length = 7, .mode="A"}, {.label = "FN", .length = 7, .mode="A"}, {.label = "SN", .length = 13, .mode="A"}, {.label = "FC", .length = 5, .mode="A"}, {.label = "CC", .length = 4, .mode="A"}, {.label = "M0", .length = 6, .mode="X"}, {.label = "M1", .length = 6, .mode="X"}, {.label = "CS", .length = 4, .mode="X"} // Must be last one in file }; #define LINES_IN_VPD (sizeof(vpd_ref_102)/sizeof(struct vpd_layout)) #define VPD_SIZE (2*LINES_IN_VPD + LINES_IN_VPD + LINES_IN_VPD + \ 2+7+7+7+13+5+4+6+6+4) #define GENWQE_VPD_BUFFER_SIZE (64*1024) union swap_me { uint32_t ui32; struct { uint16_t uw16[2]; } WORD; struct { uint8_t ub8[4]; } BYTE; }; /* * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 */ #define CRC32_POLYNOMIAL 0x04c11db7 #define CRC32_INIT_SEED 0xffffffff void genwqe_crc32_setup_lut(void); uint32_t genwqe_crc32_gen(uint8_t *buff, size_t len, uint32_t init); /* 2 Convert functions for VPD */ bool bin_2_csv(FILE *op, int fs, uint8_t *buffer); bool csv_2_bin(FILE *ip, uint8_t *buffer, int *size, uint32_t *crc32_result, uint32_t *crc32_from_csv); #ifdef __cplusplus } #endif #endif /* __GENWQE_VPD_H__ */ genwqe-user-4.0.18/include/libcard.h000066400000000000000000000342361303345043000172450ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __LIBCARD_H__ #define __LIBCARD_H__ /** * @file libcard.h * @brief application library for hardware access * * The GenWQE PCIe card provides the ability to speed up tasks by * offloading data processing. It provides a generic work queue engine * (GenWQE) which is used to pass the requests to the PCIe card. The * requests are to be passed in form of DDCB commands (Device Driver * Control Blocks). The device driver is allocating the next free DDCB * from the hardware queue and converts the DDCB-request defined in * this file into a DDCB. Once the request is passed to the card, the * process/thread will sleep and will be awoken once the request is * finished with our without success or a timeout condition occurred. * * IBM Accelerator Family 'GenWQE' */ #ifdef __cplusplus extern "C" { #endif #include #include #include /*****************************************************************************/ /** Version Information and Error Codes */ /*****************************************************************************/ #define GENWQE_LIB_VERS_STRING "3.0.23" /**< library error codes */ #define GENWQE_OK 0 #define GENWQE_ERRNO (-201) #define GENWQE_ERR_CARD (-202) #define GENWQE_ERR_OPEN (-203) #define GENWQE_ERR_VERS_MISMATCH (-204) #define GENWQE_ERR_INVAL (-205) #define GENWQE_ERR_FLASH_VERIFY (-206) #define GENWQE_ERR_FLASH_READ (-207) #define GENWQE_ERR_FLASH_UPDATE (-208) #define GENWQE_ERR_GET_STATE (-209) #define GENWQE_ERR_SIM (-210) #define GENWQE_ERR_EXEC_DDCB (-211) #define GENWQE_ERR_PINNING (-212) #define GENWQE_ERR_TESTMODE (-213) #define GENWQE_ERR_APPID (-214) /*****************************************************************************/ /** Useful macros in case they are not defined somewhere else */ /*****************************************************************************/ #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0])) #endif #ifndef ABS # define ABS(a) (((a) < 0) ? -(a) : (a)) #endif #ifndef MAX # define MAX(a,b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a > _b ? _a : _b; }) #endif #ifndef MIN # define MIN(a,b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) #endif /*****************************************************************************/ /** Type definitions */ /*****************************************************************************/ #define CARD_DEVICE ("/dev/" GENWQE_DEVNAME "%u_card") typedef struct card_dev_t *card_handle_t; /** * @brief In case of RETC 0x110 and ATTN 0xE007 the DMA engine reports back * its detailed status in the ASV of the DDCB. */ struct asv_runtime_dma_error { uint64_t raddr_be64; /* 0x80 */ uint32_t rfmt_chan_disccnt_be32; /* 0x88 */ uint16_t rdmae_be16; /* 0x8C */ uint16_t rsge_be16; /* 0x8E */ uint64_t res0; /* 0x90 */ uint64_t res1; /* 0x98 */ uint64_t waddr_be64; /* 0xA0 */ uint32_t wfmt_chan_disccnt_be32; /* 0xA8 */ uint16_t wdmae_be16; /* 0xAC */ uint16_t wsge_be16; /* 0xAE */ uint64_t res2; /* 0xB0 */ uint64_t res3; /* 0xB8 */ } __attribute__((__packed__)) __attribute__((__may_alias__)); /*****************************************************************************/ /** Function Prototypes */ /*****************************************************************************/ /** Genwqe file operations */ #define GENWQE_CARD_REDUNDANT -1 /* redundant card support */ #define GENWQE_CARD_SIMULATION -2 /* use this for simulation */ #define GENWQE_CARD_TESTMODE 0x1000 /* tweak DDCB/sglists before exec */ #define GENWQE_TESTMODE_MASK 0xfff /** * RDONLY: Only reading data from this handle * WRONLY: Only write to this handle is possible * RDRW: Both reading and writing is possible * ASYNC: Enable signal driven err notification: SIGIO is delivered * when the device needs recovery. * * @note Mode flags can be useful for code which is embedding the * card_handle_t within their own structures. */ #define GENWQE_MODE_RDONLY 0x0001 #define GENWQE_MODE_WRONLY 0x0002 #define GENWQE_MODE_RDWR 0x0004 #define GENWQE_MODE_ASYNC 0x0008 #define GENWQE_MODE_NONBLOCK 0x0010 /* non blocking operation, -EBUSY */ #define GENWQE_APPL_ID_IGNORE 0x0000000000000000 /* Ignore appl id Bits */ #define GENWQE_APPL_ID_MASK 0x00000000ffffffff /* Valid bits in appid */ /** * @brief Get genwqe_card_handle * * @param [in] card_no card number if positive * -1 GENWQE_CARD_REDUNDANT: Use multiple cards * if possible, recover problems automatically. * -2 GENWQE_CARD_SIMULATION: Simulation * @param [in] mode For future extensions to influence handle behavior * @return GENWQE_LIB_OK on success or negative error code. */ card_handle_t genwqe_card_open(int card_no, int mode, int *err_code, uint64_t appl_id, uint64_t appl_id_mask); int genwqe_card_close(card_handle_t card); /* Error Handling and Information */ const char *card_strerror(int errnum); const char *retc_strerror(int retc); void genwqe_card_lib_debug(int onoff); /** * @brief Prepare buffer to do DMA transactions. The driver will * create DMA mappings for this buffer and will allocate memory to * hold and sglist which describes the buffer. When executing DDCBs * the driver will use the cached entry before it tries to dynamically * allocate a new one. The intend is to speed up performance. The * resources are freed on device close or when calling the unpin * function. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer * @param [in] direction 0: read/1: read and write * @return GENWQE_LIB_OK on success or negative error code. */ int genwqe_pin_memory(card_handle_t card, const void *addr, size_t size, int dir); /** * @brief Remove the pinning and free the dma-addresess within the driver. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer or use 0 if you * don't know the size. * @return GENWQE_LIB_OK on success or negative error code. */ int genwqe_unpin_memory(card_handle_t card, const void *addr, size_t size); static inline void genwqe_ddcb_cmd_init(struct genwqe_ddcb_cmd *cmd) { __u64 tstamp; tstamp = cmd->disp_ts; memset(cmd, 0, sizeof(*cmd)); cmd->disp_ts = tstamp; } /** * Super Child Block allocation/deallocation * * The SCB is build up as follows: * ATS[n] - Address Translation Specification * DATA - Data or pointers according to ATS[n] information * * Each 4 bit field in the ATS area of the SCB describes 8 bytes of * the SCB/data. * * Example: When using one 8 byte ATS entry (and the minimum is 8 * bytes for ATS fields), we have 16 4-bit nibbles describing 16 * 8 * bytes of the SCB. That results in an SCB of 128 bytes size, where 8 * bytes are used as ATS bitfields. And the first nibble of the ATS * bitfield needs to be 0b0000 to reserve the space for the ATS 8 byte * entry itself. The remaining nibbles in the ATS area are up to the * application. * * As result the size of the SCB needs to be a multiple of 128 bytes. * And the usable data starts after the ATS area which is used to * describe the SCB itself. * * Our first implementation is limited such that we need to align the * memory for the SCB to a 4KiB boundary. */ void *genwqe_card_alloc_scb(card_handle_t card, size_t size); int genwqe_card_set_ats_flags(void *scb, size_t size, size_t offs, int type); int genwqe_card_free_scb(card_handle_t card, void *scb, size_t size); /** * @brief Genwqe generic DDCB execution interface. * The execution request will block until finished or a timeout occurs. * * @param [in] card card handle * @param [inout] req DDCB execution request * @return GENWQE_LIB_OK on success or negative error code. * Please inspect the DDCB specific return code * in retc, attn and progress in case of error too. */ int genwqe_card_execute_ddcb(card_handle_t card, struct genwqe_ddcb_cmd *req); /** * @brief Execute a DDCB request with no DMA buffer translations. * @param [in] card card handle * @param [inout] req DDCB execution request * @return GENWQE_LIB_OK on success or negative error code. * Please inspect the DDCB specific return code * in retc, attn and progress in case of error too. */ int genwqe_card_execute_raw_ddcb(card_handle_t card, struct genwqe_ddcb_cmd *req); /** Genwqe register access */ uint64_t genwqe_card_read_reg64(card_handle_t card, uint32_t offs, int *rc); uint32_t genwqe_card_read_reg32(card_handle_t card, uint32_t offs, int *rc); int genwqe_card_write_reg64(card_handle_t card, uint32_t offs, uint64_t v); int genwqe_card_write_reg32(card_handle_t card, uint32_t offs, uint32_t v); int genwqe_card_get_state(card_handle_t card, enum genwqe_card_state *state); uint32_t genwqe_ddcb_crc32(uint8_t *buff, size_t len, uint32_t init); /** * Service Layer Architecture (firmware) layer * 0x00: Development mode/Genwqe4-WFO (defunct) * 0x01: SLC1 (a5-wfo) * 0x02: SLC2 (sept2012), zcomp, zdb2, single DDCB, * 0x03: SLC2 (feb2013), zcomp, zdb2, generic driver, single DDCB * 0xFF: Bad Image. */ #define GENWQE_SLU_DEVEL 0x00 #define GENWQE_SLU_SLC1 0x01 #define GENWQE_SLU_SLC2_0 0x02 #define GENWQE_SLU_SLC2_1 0x03 #define GENWQE_SLU_BAD 0xff /** * @brief Get filedescriptor associated with card. * @param [in] card card handle * @return filedescriptor or -1 on error. */ int genwqe_card_fileno(card_handle_t card); int genwqe_get_drv_rc(card_handle_t card); int genwqe_get_drv_errno(card_handle_t card); /** * @brief Debug support. */ void genwqe_card_lib_debug(int onoff); /* debug outputs on/off */ void genwqe_hexdump(FILE *fp, const void *buff, unsigned int size); /* Flags which information should be printed out */ #define GENWQE_DD_IDS 0x0001 #define GENWQE_DD_DDCB_BEFORE 0x0002 #define GENWQE_DD_DDCB_PREVIOUS 0x0004 #define GENWQE_DD_DDCB_PROCESSED 0x0008 #define GENWQE_DD_ALL (GENWQE_DD_IDS | \ GENWQE_DD_DDCB_BEFORE | \ GENWQE_DD_DDCB_PREVIOUS | \ GENWQE_DD_DDCB_PROCESSED) void genwqe_print_debug_data(FILE *fp, struct genwqe_debug_data *debug_data, int flags); /* * Set of functions to alloc/free DMA capable buffers * * Allocating memory via the driver will always result in page alinged * memory. Since this is a feature, we use memalign to mimic the same * for simulation mode. Requesting too large chunks everything larger * than one page, might result in not getting the memory. Intel Linux * provides 4MiB largest. z Linux in the PCI support partition is * configured to return max 1MiB. If a large contignous memory block * is available depends on the systems amount of memory, but also on * the memory fragmentation state of the system. If larger regions are * needed, consider using sglists instead. * * Memory returned by this function is page aligned but not guaranteed * to be zeroed out. */ void *genwqe_card_malloc(card_handle_t card, size_t size); int genwqe_card_free(card_handle_t card, void *ptr, size_t size); /*****************************************************************************/ /** Service related Functions */ /*****************************************************************************/ /*****************************************************************************/ /** move flash / update chip */ /*****************************************************************************/ struct card_upd_params { const char *fname; /**< path and name of update file */ uint32_t flength; /**< length of update file */ uint32_t crc; /**< crc of this image */ uint16_t flags; /**< flags from MoveFlash tool */ char partition; /**< target partition in flash */ uint64_t slu_id; /**< informational/sim: SluID */ uint64_t app_id; /**< informational/sim: AppID */ uint16_t retc; uint16_t attn; /**< attention code from processing */ uint32_t progress; /**< progress code from processing */ }; /** * @brief Update chip code image. Note that the system must be rebooted * after using this function if you want to activate the changes. * * @param [in] card card handle * @param [in] upd struct containing all params for update process * @param [in] verify verify content by reading it back and comparing * @return SLU_LIB_OK on success or error code. */ int genwqe_flash_update(card_handle_t card, struct card_upd_params *upd, int verify); /** * @brief Read chip code image. * * @param [in] card card handle * @param [in] upd struct containing all params for read process * @return SLU_LIB_OK on success or error code. */ int genwqe_flash_read(card_handle_t card, struct card_upd_params *upd); /** * Original VPD layout by Nallatech. This is normally stored in the * cards CPLD chip. */ typedef struct genwqe_vpd { uint8_t csv_vpd_data[512]; /* New defined by CSV file */ } __attribute__((__packed__)) __attribute__((__may_alias__)) genwqe_vpd; int genwqe_read_vpd(card_handle_t card, genwqe_vpd *vpd); int genwqe_write_vpd(card_handle_t card, const genwqe_vpd *vpd); void card_overwrite_slu_id(card_handle_t card, uint64_t slu_id); void card_overwrite_app_id(card_handle_t card, uint64_t app_id); uint64_t card_get_app_id(card_handle_t card); int genwqe_dump_statistics(FILE *fp); #ifdef __cplusplus } #endif #endif /* __LIBCARD_H__ */ genwqe-user-4.0.18/include/libddcb.h000066400000000000000000000344701303345043000172300ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __LIBDDCB_H__ #define __LIBDDCB_H__ /** * IBM DDCB based Accelerator Family * * There will be two types of PCIe cards supporting DDCBs. The 1st one * is using the plain PCIe protocol and using the GenWQE Linux device * driver to communicate to user code. This works for Intel, z and p * and potentially for other architectures too. * * The 2nd type is using the CAPI protocol on top of PCIe and is only * available for IBM System p. */ #ifdef __cplusplus extern "C" { #endif #include #include #include #include /*****************************************************************************/ /** Version Information and Error Codes */ /*****************************************************************************/ #define DDCB_TYPE_GENWQE 0x0000 #define DDCB_TYPE_CAPI 0x0002 #define ACCEL_REDUNDANT -1 /* special: redundant card */ #define DDCB_MODE_RD 0x0001 /* NOTE: Needs to match GENWQE_MODE flags */ #define DDCB_MODE_WR 0x0002 /* ... */ #define DDCB_MODE_RDWR 0x0004 /* ... */ #define DDCB_MODE_ASYNC 0x0008 /* ... */ #define DDCB_MODE_NONBLOCK 0x0010 /* non blocking, -EBUSY */ #define DDCB_MODE_POLLING 0x0020 /* polling */ #define DDCB_MODE_MASTER 0x08000000 /* Open Master Context, Slave is default, CAPI ony */ #define DDCB_APPL_ID_IGNORE 0x0000000000000000ull /* Ignore bits */ #define DDCB_APPL_ID_MASK 0x00000000ffffffffull /* Valid bits */ #define DDCB_APPL_ID_MASK_VER 0x000000ffffffffffull /* Valid bits */ #define DDCB_OK 0 #define DDCB_ERRNO -401 /* libc call went wrong */ #define DDCB_ERR_CARD -402 /* problems accessing accel. */ #define DDCB_ERR_OPEN -403 /* cannot open accelerator */ #define DDCB_ERR_VERS_MISMATCH -404 /* library version mismatch */ #define DDCB_ERR_INVAL -405 /* illegal parameters */ #define DDCB_ERR_EXEC_DDCB -411 /* ddcb execution failed */ #define DDCB_ERR_APPID -414 /* application id wrong */ #define DDCB_ERR_NOTIMPL -415 /* funct not implemented */ #define DDCB_ERR_ENOMEM -416 #define DDCB_ERR_ENOENT -417 #define DDCB_ERR_IRQTIMEOUT -418 #define DDCB_ERR_EVENTFAIL -419 #define DDCB_ERR_SELECTFAIL -420 /* e.g. socket problems in sim */ /* Genwqe chip Units */ #define DDCB_ACFUNC_SLU 0x00 /* chip service layer unit */ #define DDCB_ACFUNC_APP 0x01 /* chip application */ /* DDCB return codes (RETC) */ #define DDCB_RETC_IDLE 0x0000 /* Unexecuted/DDCB created */ #define DDCB_RETC_PENDING 0x0101 /* Pending Execution */ #define DDCB_RETC_COMPLETE 0x0102 /* Cmd complete. No error */ #define DDCB_RETC_FAULT 0x0104 /* App Err, recoverable */ #define DDCB_RETC_ERROR 0x0108 /* App Err, non-recoverable */ #define DDCB_RETC_FORCED_ERROR 0x01ff /* overwritten by driver */ #define DDCB_RETC_UNEXEC 0x0110 /* Unexe/Removed from queue */ #define DDCB_RETC_TERM 0x0120 /* Terminated */ #define DDCB_RETC_RES0 0x0140 /* Reserved */ #define DDCB_RETC_RES1 0x0180 /* Reserved */ /* Common DDCB Commands */ #define DDCB_CMD_ECHO_SYNC 0x00 /* PF/VF */ /* DDCB Command Options (CMDOPT) */ #define DDCB_OPT_ECHO_FORCE_NO 0x0000 /* ECHO DDCB */ #define DDCB_OPT_ECHO_FORCE_102 0x0001 /* force return code */ #define DDCB_OPT_ECHO_FORCE_104 0x0002 #define DDCB_OPT_ECHO_FORCE_108 0x0003 #define DDCB_OPT_ECHO_FORCE_110 0x0004 #define DDCB_OPT_ECHO_FORCE_120 0x0005 #define DDCB_OPT_ECHO_FORCE_140 0x0006 #define DDCB_OPT_ECHO_FORCE_180 0x0007 #define _DDCB_OPT_ECHO_COPY_NONE 0x00 #define _DDCB_OPT_ECHO_COPY_ALL 0x20 /* Issuing a specific DDCB command */ #define DDCB_LENGTH 256 /* Size of real DDCB */ #define DDCB_ASIV_LENGTH 104 /* Length of the DDCB ASIV array */ #define DDCB_ASIV_LENGTH_ATS 96 /* ASIV in ATS architecture */ #define DDCB_ASV_LENGTH 64 /* Len of the DDCB ASV array */ /** * @brief In case of RETC 0x110 and ATTN 0xE007 the DMA engine reports * back its detailed status in the ASV of the DDCB. Fields are defined * in big endian byte ordering. */ struct _asv_runtime_dma_error { uint64_t raddr_be64; /* 0x80 */ uint32_t rfmt_chan_disccnt_be32;/* 0x88 */ uint16_t rdmae_be16; /* 0x8C */ uint16_t rsge_be16; /* 0x8E */ uint64_t res0; /* 0x90 */ uint64_t res1; /* 0x98 */ uint64_t waddr_be64; /* 0xA0 */ uint32_t wfmt_chan_disccnt_be32;/* 0xA8 */ uint16_t wdmae_be16; /* 0xAC */ uint16_t wsge_be16; /* 0xAE */ uint64_t res2; /* 0xB0 */ uint64_t res3; /* 0xB8 */ } __attribute__((__packed__)) __attribute__((__may_alias__)); /** * struct genwqe_ddcb_cmd - User parameter for generic DDCB commands * * General fields are to be passed in host byte endian order. The * fields in asv and asiv depend on the accelerator functionality. The * compression/decompression accelerator uses e.g. big-endian. * * NOTE: This interface is matching the GenWQE device driver * interface. If it is changed, it needs to be reflected in the code * which prepares the request to the GenWQE device driver ioctl. * * And yes ... it is very close to the DDCB design ... */ typedef struct ddcb_cmd { __u64 next_addr; /* chaining ddcb_cmd */ __u64 flags; /* reserved */ __u8 acfunc; /* accelerators functional unit */ __u8 cmd; /* command to execute */ __u8 asiv_length; /* used parameter length */ __u8 asv_length; /* length of valid return values */ __u16 cmdopts; /* command options */ __u16 retc; /* return code from processing */ __u16 attn; /* attention code from processing */ __u16 vcrc; /* variant crc16 */ __u32 progress; /* progress code from processing */ __u64 deque_ts; /* dequeue time stamp */ __u64 cmplt_ts; /* completion time stamp */ __u64 disp_ts; /* SW processing start */ __u64 ddata_addr; /* collect debug data */ __u8 asv[DDCB_ASV_LENGTH]; /* command specific values */ union { /* 2nd version of DDCBs has ATS field */ struct { __u64 ats; __u8 asiv[DDCB_ASIV_LENGTH_ATS]; }; /* 1st version has no ATS field */ __u8 __asiv[DDCB_ASIV_LENGTH]; }; } ddcb_cmd_t; static inline void ddcb_cmd_init(struct ddcb_cmd *cmd) { __u64 tstamp; tstamp = cmd->disp_ts; memset(cmd, 0, sizeof(*cmd)); cmd->disp_ts = tstamp; } /* Opaque data type defined library internal */ typedef struct card_dev_t *accel_t; /*****************************************************************************/ /** Function Prototypes */ /*****************************************************************************/ /* Error Handling and Information */ const char *ddcb_retc_strerror(int ddcb_retc); /* DDCBs retc */ const char *ddcb_strerror(int accel_rc); const char *accel_strerror(accel_t card, int card_rc); /* card errcode */ void ddcb_hexdump(FILE *fp, const void *buff, unsigned int size); void ddcb_debug(int verbosity); void ddcb_set_logfile(FILE *fd_out); /** * @brief Get accel_handle * * @param [in] card_no card number if positive * -1 ACCEL_REDUNDANT: Use multiple cards * if possible, recover problems automatically. * @param [in] mode influence handle behavior * @return handle on success or NULL (see card_rc) */ accel_t accel_open(int card_no, unsigned int card_type, unsigned int mode, int *rc, uint64_t appl_id, uint64_t appl_id_mask); int accel_close(accel_t card); /** * @brief Genwqe generic DDCB execution interface. * The execution request will block until finished or a timeout occurs. * * @param [in] card card handle * @param [inout] req DDCB execution request * @return DDCB_LIB_OK on success or negative error code. * Please inspect the DDCB specific return code * in retc, attn and progress in case of error too. */ int accel_ddcb_execute(accel_t card, struct ddcb_cmd *req, int *card_rc, int *card_errno); /* Register access */ uint64_t accel_read_reg64(accel_t card, uint32_t offs, int *card_rc); uint32_t accel_read_reg32(accel_t card, uint32_t offs, int *card_rc); int accel_write_reg64(accel_t card, uint32_t offs, uint64_t val); int accel_write_reg32(accel_t card, uint32_t offs, uint32_t val); uint64_t accel_get_app_id(accel_t card); /** * @brief Get the queue work timer card ticks. This indicates how long * the hardware queue was in use. Comparing this value with the over * all runtime, helps to judge how much time was spend in software and * in hardware data processing. */ uint64_t accel_get_queue_work_time(accel_t card); uint64_t accel_get_frequency(accel_t card); void accel_dump_hardware_version(accel_t card, FILE *fp); /** * @brief Prepare buffer to do DMA transactions. The driver will * create DMA mappings for this buffer and will allocate memory to * hold and sglist which describes the buffer. When executing DDCBs * the driver will use the cached entry before it tries to dynamically * allocate a new one. The intend is to speed up performance. The * resources are freed on device close or when calling the unpin * function. * * Note: Only needed if underlying architecture supports it. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer * @param [in] direction 0: read/1: read and write * @return DDCB_LIB_OK on success or negative error code. */ int accel_pin_memory(accel_t card, const void *addr, size_t size, int dir); /** * @brief Remove the pinning and free the dma-addresess within the driver. * * Note: Only needed if underlying architecture supports it. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer or use 0 if you * don't know the size. * @return DDCB_LIB_OK on success or negative error code. */ int accel_unpin_memory(accel_t card, const void *addr, size_t size); /* * Set of functions to alloc/free DMA capable buffers * * Allocating memory via the GenWQE Linux driver will result in page * alinged memory. Since this is a feature, we use memalign to mimic * the same for simulation mode. Requesting too large chunks * everything larger than one page, might result in not getting the * memory. Intel Linux provides 4MiB largest. z Linux in the PCI * support partition is configured to return max 1MiB. If a large * contignous memory block is available depends on the systems amount * of memory, but also on the memory fragmentation state of the * system. If larger regions are needed, consider using sglists * instead. * * Memory returned by this function is page aligned but not guaranteed * to be zeroed out. * * Note: Only needed if underlying architecture supports it. */ void *accel_malloc(accel_t card, size_t size); int accel_free(accel_t card, void *ptr, size_t size); /** * Since there are different types of DDCB accelerators out there, * e.g. GenWQE PCIe card and its simulation or the new CAPI PCIe * implementation with yet a different simulation approach underneath, * this interface offers to register functionality for the respective * types. The idea is to provide a constructor which registers the * interface atat libddcb and tools using it can specify the type of * DDCB accelerator they like to use. * * libddcb will use the registered functions to provide the requested * functionality. */ #define DDCB_FLAG_STATISTICS 0x0001 /* enable statistical data gathering */ struct ddcb_accel_funcs { int card_type; const char *card_name; /* must return void *card_data */ void *(* card_open)(int card_no, unsigned int mode, int *card_rc, uint64_t appl_id, uint64_t appl_id_mask); int (* card_close)(void *card_data); int (* ddcb_execute)(void *card_data, struct ddcb_cmd *req); const char * (* card_strerror)(void *card_data, int card_rc); /* The following functions we need for all implementation, least for debugging purposes. */ uint64_t (* card_read_reg64)(void *card_data, uint32_t offs, int *card_rc); uint32_t (* card_read_reg32)(void *card_data, uint32_t offs, int *card_rc); int (* card_write_reg64)(void *card_data, uint32_t offs, uint64_t val); int (* card_write_reg32)(void *card_data, uint32_t offs, uint32_t val); /* The application id is something we used for the GenWQE implementation. It helps to ensure that the software can check if it can operatate this accelerator implementation. For CAPI we are searching a similar mechanism still. */ uint64_t (* card_get_app_id)(void *card_data); uint64_t (* card_get_queue_work_time)(void *card_data); /* ticks */ uint64_t (* card_get_frequency)(void *card_data); /* Hz */ void (* card_dump_hardware_version)(void *card_data, FILE *fp); /* Not all DDCB accelerators have this, GenWQE has it, but CAPI does not. If not executed wrapper functions will return DDCB_OK */ int (* card_pin_memory)(void *card_data, const void *addr, size_t size, int dir); int (* card_unpin_memory)(void *card_data, const void *addr, size_t size); void * (* card_malloc)(void *card_data, size_t size); int (* card_free)(void *card_data, void *ptr, size_t size); /* statistical information */ int (* dump_statistics)(FILE *fp); pthread_mutex_t slock; unsigned long num_open; unsigned long num_execute; unsigned long num_close; unsigned long time_open; unsigned long time_execute; unsigned long time_close; /* private */ void *priv_data; }; /* * Dump card statistics for debugging and for performance analysis. * * @param [in] card card handle * @param [out] fp filehandle to write the text too */ int accel_dump_statistics(struct ddcb_accel_funcs *accel, FILE *fp); /* * Register accelerator for later usage. This needs ideally be done in * a library constructor. * * @param [in] accel accelerator function table */ int ddcb_register_accelerator(struct ddcb_accel_funcs *accel); #ifdef __cplusplus } #endif #endif /* __LIBDDCB_H__ */ genwqe-user-4.0.18/include/libzHW.h000066400000000000000000000442761303345043000170510ustar00rootroot00000000000000/* * Copyright 2014, 2016 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __LIBZHW_H__ #define __LIBZHW_H__ /* * @brief Compression/decompression supporting RFC1950, RFC1951 and * RFC1952. The data structure is similar to the one described in * zlib.h, but contains some more information required to do the * hardware compression/decompression. * * In addition to the compression/decompression related functions/data * it defines functions to open/operate/close the GenWQE card which is * used to implement the hardware accelerated * compression/decompression. * * IBM Accelerator Family 'GenWQE'/zEDC Compression */ #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define DDCB_APPL_ID_GZIP 0x00000000475a4950ull /* The GZIP APPL id */ #define DDCB_APPL_ID_GZIP2 0x00000002475a4950ull /* The GZIP2 APPL id */ /* Different zlib versions used different codes for flush! */ #define ZEDC_NO_FLUSH 0 #define ZEDC_PARTIAL_FLUSH 1 #define ZEDC_SYNC_FLUSH 2 #define ZEDC_FULL_FLUSH 3 #define ZEDC_FINISH 4 #define ZEDC_BLOCK 5 #define ZEDC_NO_COMPRESSION 0 #define ZEDC_BEST_SPEED 1 #define ZEDC_BEST_COMPRESSION 9 #define ZEDC_DEFAULT_COMPRESSION (-1) #define ZEDC_FILTERED 1 #define ZEDC_HUFFMAN_ONLY 2 #define ZEDC_RLE 3 #define ZEDC_FIXED 4 #define ZEDC_DEFAULT_STRATEGY 0 /* Fragile, since return codes might not match local zlib implementation */ #define ZEDC_OK 0 #define ZEDC_STREAM_END 1 #define ZEDC_NEED_DICT 2 #define ZEDC_ERRNO (-1) /* see errno for more details */ #define ZEDC_STREAM_ERROR (-2) /* please see errno or zedc_liberr */ #define ZEDC_DATA_ERROR (-3) /* see zedc_carderr for more details */ #define ZEDC_MEM_ERROR (-4) #define ZEDC_BUF_ERROR (-5) /* zEDC specific enhancements */ #define ZEDC_ERR_CARD (-307) /* see zedc_carderr for details */ #define ZEDC_ERR_INVAL (-308) /* illegal parameters */ #define ZEDC_ERR_RETLEN (-309) /* returned invalid length */ #define ZEDC_ERR_RETOBITS (-310) /* returned invalid output bytes */ #define ZEDC_ERR_TREE_OVERRUN (-311) /* too many tree bits */ #define ZEDC_ERR_ZLIB_HDR (-312) /* illegal zlib header found */ #define ZEDC_ERR_ADLER32 (-313) /* adler32 mismatch */ #define ZEDC_ERR_GZIP_HDR (-314) /* illegal gzip header found */ #define ZEDC_ERR_CRC32 (-315) /* crc32 mismatch */ #define ZEDC_ERR_UNSUPPORTED (-316) /* currently unsupported function */ #define ZEDC_ERR_DICT_OVERRUN (-317) /* dictionary overrun */ #define ZEDC_ERR_INP_MISSING (-318) /* further input data missing */ #define ZEDC_ERR_ILLEGAL_APPID (-319) /* unsupported APP_ID */ #define ZEDC_NULL NULL #define ZEDC_DEFLATED 8 /* The deflate compression method (the only one supported) */ #define ZEDC_FORMAT_DEFL 0 #define ZEDC_FORMAT_ZLIB 1 #define ZEDC_FORMAT_GZIP 2 #define ZEDC_FORMAT_STORAGE 18 /* GZIP/ZLIB header storage */ /* NOTE: Always turn CROSS_CHECK on, otherwise you loose data protection */ #define ZEDC_FLG_CROSS_CHECK (1 << 0) /* flag: inflate<->deflate check */ #define ZEDC_FLG_DEBUG_DATA (1 << 1) /* flag: collect debug data */ /* * The SKIP_LAST_DICT flag can be used to omit transmitting the last * dictionary on an inflate/deflate request. If the output buffer is * not large enough the DDCB will be repeated with the SAVE_DICT flag * enabled, such that compression/decompression can properly * continue. It might help to reduce hardware time especially for many * independent small transfers. E.g. 64KiB data will cause an * osolete 32KiB dictionary transfer with zEDC/zEDCv2 bitstreams. * * For large files the effect is not noticeable. * * Note: This flag cannot be used in verification tools like * genwqe_zcomp, since there we check dictionary consistency by * comparing the hardware dictionary with a private software * maintained dictionary (-z option). */ #define ZEDC_FLG_SKIP_LAST_DICT (1 << 2) /* flag: try to omit last dict */ /** * We might have addresses within the ASIV data. Those need to be * replaced by valid DMA addresses to the buffer, sg-list or * child-block in the kernel driver handling the request. */ enum zedc_mtype { DDCB_DMA_TYPE_MASK = 0x18, /**< mask off type */ DDCB_DMA_TYPE_FLAT = 0x08, /**< contignous DMA block */ DDCB_DMA_TYPE_SGLIST = 0x10, /**< DMA sg-list */ DDCB_DMA_WRITEABLE = 0x04, /**< memory writeable? */ DDCB_DMA_PIN_MEMORY = 0x20, /**< pin sgl memory after allocation */ }; /* Index for zedc_mtype information */ #define ZEDC_IN 0 /* input buffer */ #define ZEDC_OUT 1 /* output buffer */ #define ZEDC_WS 2 /* workspace buffer */ /**< data structure for dict check for integrity check by genwqe_zedc */ struct zedc_dict_ref_s { uint8_t *addr; /* local reference dictionary */ unsigned wr; /* wr offset */ unsigned in_offs; unsigned long last_total; }; typedef enum e_head_state { HEADER_START = 0, /* Enter */ FLAGS_CHECK_EMPTY, /* No Flags set State */ FLAGS_CHECK_EXTRA, FLAGS_GET_EXTRA_LEN1, FLAGS_GET_EXTRA_LEN2, FLAGS_GET_EXTRA, FLAGS_CHECK_FNAME, FLAGS_GET_FNAME, FLAGS_CHECK_FCOMMENT, FLAGS_GET_FCOMMENT, FLAGS_CHECK_FHCRC, FLAGS_GET_FHCRC1, FLAGS_GET_FHCRC2, FLAGS_CHECK_FTEXT, ZLIB_ADLER, /* State for zlib only */ HEADER_DONE } head_state; /* * Gzip header information passed to and from zlib routines. See RFC * 1952 for more details on the meanings of these fields. */ typedef struct gzedc_header_s { int text; unsigned long time; /* modification time */ int xflags; /* extra flags (not used for write) */ int os; /* operating system */ uint8_t *extra; /* pointer to extra field or Z_NULL if none */ unsigned int extra_len; /* extra field len (valid if extra!=Z_NULL) */ unsigned int extra_max; /* space at extra (only when reading hdr) */ char *name; /* ptr to zero-terminated filename or Z_NULL */ unsigned int name_max; /* space at name (only when reading header) */ char *comment; /* ptr to zero-terminated comment or Z_NULL */ unsigned int comm_max; /* space at comment (only when reading hdr) */ int hcrc; /* true if there was or will be a header crc */ int done; /* true when done reading gzip header (not used when writing a gzip file) */ } gzedc_header; typedef gzedc_header *gzedc_headerp; /** * @note Data structure which should match what libz offers plus some * additional changes needed for hardware compression/decompression. * * FIXME This data-structure is way too large. Fields are duplicated * with content which is already in the DDCB execution request. We * could define two DDCB request data structures and alternate between * those to keep the amount of copying data small. Also the different * naming between this and the DDCB request data structures causes the * code to become error prone and badly readable. * * FIXME We have here three FIFOs which serve a similar purpose: * * 1) prefx: used to contain the header data which did not fit * into the user output buffer. * 1a) in addition there is the obytes array which contains some more * output data too. * We already removed in this version: obytes[], good * * => Merging those buffers makes a lot of sense, because both create similar * code for the same purpose! * * 2) postfx: used to hold the trailer data e.g. CRC32/ADLER32/LEN * in case of RFC1950, RFC1952 before it is completely read in. * * => Adding the ZLIB/GZ trailers is very similar to adding the EOB, * or FEOB for RFC1951. Merging would make here very much sense * too. */ typedef struct zedc_stream_s { /* parameters for the supported functions */ int level; /**< compression level */ int method; /**< must be Z_DEFLATED for zlib */ int windowBits; /* -15..-8 = raw deflate, window size (2^-n) * 8..15 = zlib window size (2^n) default=15 * 24..31 = gzip encoding */ int memLevel; /**< 1...9 (default=8) */ int strategy; /**< force compression algorithm */ int flush; int data_type; /**< best guess dtype: ascii/binary*/ /* stream data management */ const uint8_t *next_in; /**< next input byte */ unsigned int avail_in; /**< # of bytes available at next_in */ unsigned long total_in; /**< total nb of inp read so far */ uint8_t *next_out; /**< next obyte should be put there */ unsigned int avail_out; /**< remaining free space at next_out*/ unsigned long total_out; /**< total nb of bytes output so far */ uint32_t crc32; /**< data crc32 */ uint32_t adler32; /**< data adler32 */ /* * PRIVATE AREA * * The definitions below are not intended for normal use. We * have them at the moment here, because we liked to dump some * internals for problem determination and where too lazy to * hide them and add access functions. When moving towards a * potential Linux product this might change. */ /* Hardware request specific data */ void *device; /**< ref to compr/decompr device */ struct ddcb_cmd cmd; /* RETC/ATTN/PROGRESS */ uint16_t retc; /**< after DDCB processing */ uint16_t attn; /**< after DDCB processing */ uint32_t progress; /**< after DDCB processing */ /* Parameters for supported formats */ int format; /**< DEFL, GZIP, ZLIB */ int flags; /* control memory handling behavior */ /* Save & Restore values for successive DDCB exchange */ struct zedc_fifo out_fifo; /* FIFO for output data e.g. hdrs */ struct zedc_fifo in_fifo; /* FIFO for read data e.g. hdrs */ head_state header_state; /* State when decoding Header */ uint16_t gzip_hcrc; /* The value of the header CRC */ int gzip_header_idx; /* Index need for getting header data */ /* Incomplete output data */ int onumbits; /* remaining bits 0..7 */ uint8_t obyte; /* incomplete byte */ /* Status bits */ int eob_seen; /* inflate: EOB seen */ int eob_added; /* deflate: EOB added */ int header_added; /* deflate: header was added */ int trailer_added; /* deflate: trailer was added */ int havedict; /* inflate/deflate: have dictionary */ /* temporary workspace (dict, tree, scratch) */ struct zedc_wsp *wsp; /* workspace for deflate and inflate */ int wsp_page; /**< toggeling workspace page */ enum zedc_mtype dma_type[3]; /* dma types for in, out, ws */ /* GZIP/ZLIB specific parameters */ uint32_t file_size; /**< GZIP input file size */ uint32_t file_adler32; /**< checksum from GZIP Trailer */ uint32_t file_crc32; /**< checksum from ZLIB Trailer */ uint32_t dict_adler32; /* expected adler32 for the dict */ struct gzedc_header_s *gzip_head; /* for GZIP only */ /* scratch and tree management */ /* ASIV to DDCB */ uint32_t in_hdr_scratch_len; /**< to DDCB */ uint16_t in_hdr_bits; /**< next valid HDR/TREE */ uint8_t hdr_ib; /**< to DDCB */ uint8_t scratch_ib; /**< ignored bits in scratch */ /* ASV from DDCB */ uint32_t inp_processed; uint32_t outp_returned; uint8_t proc_bits; #define INFL_STAT_PASSED_EOB 0x01 #define INFL_STAT_FINAL_EOB 0x04 #define INFL_STAT_REACHED_EOB 0x08 #define INFL_STAT_HDR_TYPE_MASK 0x60 #define INFL_STAT_HDR_BFINAL 0x80 uint8_t infl_stat; /* 0x01: EOB passed * 0x04: FINAL_EOB reached * 0x60: ...? * 0x08: exactly on eob * 0x80: was final block? */ uint32_t hdr_start; /**< offset in input buffer */ uint16_t out_hdr_bits; /**< from DDCB */ uint8_t out_hdr_start_bits; /**< from DDCB */ uint16_t copyblock_len; /* SR variables */ uint32_t tree_bits; /**< valid bits in tree area */ uint32_t pad_bits; /**< padding bits behind tree */ uint32_t scratch_bits; /**< valid bits in scratch area */ uint64_t pre_scratch_bits; /**< scratch part of inp_processed */ uint32_t inp_data_offs; /**< processed bytes from inp-buffer */ uint32_t in_data_used; /* dictionary management */ uint16_t dict_len; /**< previous dictionary length */ uint8_t out_dict_offs; /**< add to INPUT_DICT address */ uint16_t obytes_in_dict; /* FIXME Replace those special purpose buffers with FIFOs */ int prefx_len; /**< GZIP/ZLIB prefix length */ int prefx_idx; /**< GZIP/ZLIB prefix index */ uint8_t prefx[ZEDC_FORMAT_STORAGE]; uint16_t xlen; int postfx_len; /**< GZIP/ZLIB postfix length */ int postfx_idx; /**< GZIP/ZLIB postfix index */ uint8_t postfx[ZEDC_FORMAT_STORAGE]; } zedc_stream; typedef struct zedc_stream_s *zedc_streamp; /**************************************************************************** * Compression/Decompression device - zedc device handle ***************************************************************************/ typedef struct zedc_dev_t *zedc_handle_t; zedc_handle_t zedc_open(int card_no, int card_type, int mode, int *err_code); int zedc_close(zedc_handle_t zedc); void zedc_overwrite_slu_id(zedc_handle_t zedc, uint64_t slu_id); void zedc_overwrite_app_id(zedc_handle_t zedc, uint64_t app_id); /** * @brief Prepare buffer to do DMA transactions. The driver will * create DMA mappings for this buffer and will allocate memory to * hold and sglist which describes the buffer. When executing DDCBs * the driver will use the cached entry before it tries to dynamically * allocate a new one. The intend is to speed up performance. The * resources are freed on device close or when calling the unpin * function. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer * @param [in] direction 0: read/1: read and write * @return DDCB_LIB_OK on success or negative error code. */ int zedc_pin_memory(zedc_handle_t zedc, const void *addr, size_t size, int dir); /** * @brief Remove the pinning and free the dma-addresess within the driver. * * @param [in] card card handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer or use 0 if you * don't know the size. * @return DDCB_LIB_OK on success or negative error code. */ int zedc_unpin_memory(zedc_handle_t zedc, const void *addr, size_t size); /* DMA memory allocation/deallocation */ void *zedc_memalign(zedc_handle_t zedc, size_t size, enum zedc_mtype mtype); int zedc_free(zedc_handle_t zedc, void *ptr, size_t size, enum zedc_mtype mtype); /* Error Handling and Information */ int zedc_pstatus(struct zedc_stream_s *strm, const char *task); int zedc_clearerr(zedc_handle_t zedc); const char *zedc_strerror(int errnum); /** * Retrieve error information from low-level components: libzedc, * libcard, libc. During library execution it might happen that we get * errors from multiple sources e.g. libcard or libc. It may also * happen that we notice an error when interpreting the data we got * from libcard, even if the DDCB was executed successfully. * E.g. when the data returned in the DDCB was inconsistent and the * hardware did not notice it because it had a bug itself. Another * case might be if there are programming errors in the data * interpretation itself leading to inconsistent state in our stream * data structure. Same happens if errors are induced for testing * purposes. * * Use card_strerror(errnum) to print the corresponding error message. */ int zedc_carderr(zedc_handle_t zedc); /** * Use this to get a detailed description e.g. in case of * ZLIB_STREAM_ERROR or ZLIB_DATA_ERROR. In those cases the library * will return the simplified error codes due to compatibility * requirements with libz. Nevertheless the user can use this or the * function above to figure out a more detailed error cause. * * Use zedc_strerror(errnum) to print the corresponding error message. */ int zedc_liberr(zedc_handle_t zedc); struct ddcb_cmd *zedc_last_cmd(struct zedc_stream_s *strm); /**************************************************************************** * Compression ***************************************************************************/ int zedc_deflateInit2(zedc_streamp strm, int level, int method, int windowBits, int memLevel, int strategy); int zedc_deflateParams(zedc_streamp strm, int level, int strategy); int zedc_deflateReset(zedc_streamp strm); int zedc_deflateSetDictionary(zedc_streamp strm, const uint8_t *dictionary, unsigned int dictLength); int zedc_deflatePrime(zedc_streamp strm, int bits, int value); int zedc_deflateCopy(zedc_streamp dest, zedc_streamp source); int zedc_deflatePending(zedc_streamp strm, unsigned *pending, int *bits); int zedc_deflate(zedc_streamp strm, int flush); int zedc_deflateEnd(zedc_streamp strm); int zedc_deflateSetHeader(zedc_streamp strm, gzedc_headerp head); /**************************************************************************** * Decompression ***************************************************************************/ int zedc_inflateInit2(zedc_streamp strm, int windowBits); int zedc_inflateReset(zedc_streamp strm); int zedc_inflateReset2(zedc_streamp strm, int windowBits); int zedc_inflateSetDictionary(zedc_streamp strm, const uint8_t *dictionary, unsigned int dictLength); int zedc_inflateGetDictionary(zedc_streamp strm, uint8_t *dictionary, unsigned int *dictLength); int zedc_inflatePrime(zedc_streamp strm, int bits, int value); int zedc_inflateSync(zedc_streamp strm); int zedc_inflate(zedc_streamp strm, int flush); int zedc_inflateEnd(zedc_streamp strm); int zedc_inflateGetHeader(zedc_streamp strm, gzedc_headerp head); /** miscellaneous */ int zedc_inflateSaveBuffers(zedc_streamp strm, const char *prefix); void zedc_lib_debug(int onoff); /* debug outputs on/off */ void zedc_set_logfile(FILE *logfile); int zedc_inflate_pending_output(struct zedc_stream_s *strm); /** * The application can compare zedc_Version and ZEDC_VERSION for * consistency. This check is automatically made by zedc_deflateInit * and zedc_inflateInit. */ const char *zedc_Version(void); #ifdef __cplusplus } #endif #endif /* __LIBZEDC_H__ */ genwqe-user-4.0.18/include/linux/000077500000000000000000000000001303345043000166235ustar00rootroot00000000000000genwqe-user-4.0.18/include/linux/uapi/000077500000000000000000000000001303345043000175615ustar00rootroot00000000000000genwqe-user-4.0.18/include/linux/uapi/linux/000077500000000000000000000000001303345043000207205ustar00rootroot00000000000000genwqe-user-4.0.18/include/linux/uapi/linux/genwqe/000077500000000000000000000000001303345043000222065ustar00rootroot00000000000000genwqe-user-4.0.18/include/linux/uapi/linux/genwqe/genwqe_card.h000066400000000000000000000432631303345043000246460ustar00rootroot00000000000000/* * Copyright 2014,2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __GENWQE_CARD_H__ #define __GENWQE_CARD_H__ /* * User-space API for the GenWQE card. For debugging and test purposes * the register addresses are included here too. */ #include #include /* Basename of sysfs, debugfs and /dev interfaces */ #define GENWQE_DEVNAME "genwqe" #define GENWQE_TYPE_ALTERA_230 0x00 /* GenWQE4 Stratix-IV-230 */ #define GENWQE_TYPE_ALTERA_530 0x01 /* GenWQE4 Stratix-IV-530 */ #define GENWQE_TYPE_ALTERA_A4 0x02 /* GenWQE5 A4 Stratix-V-A4 */ #define GENWQE_TYPE_ALTERA_A7 0x03 /* GenWQE5 A7 Stratix-V-A7 */ /* MMIO Unit offsets: Each UnitID occupies a defined address range */ #define GENWQE_UID_OFFS(uid) ((uid) << 24) #define GENWQE_SLU_OFFS GENWQE_UID_OFFS(0) #define GENWQE_HSU_OFFS GENWQE_UID_OFFS(1) #define GENWQE_APP_OFFS GENWQE_UID_OFFS(2) #define GENWQE_MAX_UNITS 3 /* Common offsets per UnitID */ #define IO_EXTENDED_ERROR_POINTER 0x00000048 #define IO_ERROR_INJECT_SELECTOR 0x00000060 #define IO_EXTENDED_DIAG_SELECTOR 0x00000070 #define IO_EXTENDED_DIAG_READ_MBX 0x00000078 #define IO_EXTENDED_DIAG_MAP(ring) (0x00000500 | ((ring) << 3)) #define GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace) (((ring) << 8) | (trace)) /* UnitID 0: Service Layer Unit (SLU) */ /* SLU: Unit Configuration Register */ #define IO_SLU_UNITCFG 0x00000000 #define IO_SLU_UNITCFG_TYPE_MASK 0x000000000ff00000 /* 27:20 */ /* SLU: Fault Isolation Register (FIR) (ac_slu_fir) */ #define IO_SLU_FIR 0x00000008 /* read only, wr direct */ #define IO_SLU_FIR_CLR 0x00000010 /* read and clear */ /* SLU: First Error Capture Register (FEC/WOF) */ #define IO_SLU_FEC 0x00000018 #define IO_SLU_ERR_ACT_MASK 0x00000020 #define IO_SLU_ERR_ATTN_MASK 0x00000028 #define IO_SLU_FIRX1_ACT_MASK 0x00000030 #define IO_SLU_FIRX0_ACT_MASK 0x00000038 #define IO_SLU_SEC_LEM_DEBUG_OVR 0x00000040 #define IO_SLU_EXTENDED_ERR_PTR 0x00000048 #define IO_SLU_COMMON_CONFIG 0x00000060 #define IO_SLU_FLASH_FIR 0x00000108 #define IO_SLU_SLC_FIR 0x00000110 #define IO_SLU_RIU_TRAP 0x00000280 #define IO_SLU_FLASH_FEC 0x00000308 #define IO_SLU_SLC_FEC 0x00000310 /* * The Virtual Function's Access is from offset 0x00010000 * The Physical Function's Access is from offset 0x00050000 * Single Shared Registers exists only at offset 0x00060000 * * SLC: Queue Virtual Window Window for accessing into a specific VF * queue. When accessing the 0x10000 space using the 0x50000 address * segment, the value indicated here is used to specify which VF * register is decoded. This register, and the 0x50000 register space * can only be accessed by the PF. Example, if this register is set to * 0x2, then a read from 0x50000 is the same as a read from 0x10000 * from VF=2. */ /* SLC: Queue Segment */ #define IO_SLC_QUEUE_SEGMENT 0x00010000 #define IO_SLC_VF_QUEUE_SEGMENT 0x00050000 /* SLC: Queue Offset */ #define IO_SLC_QUEUE_OFFSET 0x00010008 #define IO_SLC_VF_QUEUE_OFFSET 0x00050008 /* SLC: Queue Configuration */ #define IO_SLC_QUEUE_CONFIG 0x00010010 #define IO_SLC_VF_QUEUE_CONFIG 0x00050010 /* SLC: Job Timout/Only accessible for the PF */ #define IO_SLC_APPJOB_TIMEOUT 0x00010018 #define IO_SLC_VF_APPJOB_TIMEOUT 0x00050018 #define TIMEOUT_250MS 0x0000000f #define HEARTBEAT_DISABLE 0x0000ff00 /* SLC: Queue InitSequence Register */ #define IO_SLC_QUEUE_INITSQN 0x00010020 #define IO_SLC_VF_QUEUE_INITSQN 0x00050020 /* SLC: Queue Wrap */ #define IO_SLC_QUEUE_WRAP 0x00010028 #define IO_SLC_VF_QUEUE_WRAP 0x00050028 /* SLC: Queue Status */ #define IO_SLC_QUEUE_STATUS 0x00010100 #define IO_SLC_VF_QUEUE_STATUS 0x00050100 /* SLC: Queue Working Time */ #define IO_SLC_QUEUE_WTIME 0x00010030 #define IO_SLC_VF_QUEUE_WTIME 0x00050030 /* SLC: Queue Error Counts */ #define IO_SLC_QUEUE_ERRCNTS 0x00010038 #define IO_SLC_VF_QUEUE_ERRCNTS 0x00050038 /* SLC: Queue Loast Response Word */ #define IO_SLC_QUEUE_LRW 0x00010040 #define IO_SLC_VF_QUEUE_LRW 0x00050040 /* SLC: Freerunning Timer */ #define IO_SLC_FREE_RUNNING_TIMER 0x00010108 #define IO_SLC_VF_FREE_RUNNING_TIMER 0x00050108 /* SLC: Queue Virtual Access Region */ #define IO_PF_SLC_VIRTUAL_REGION 0x00050000 /* SLC: Queue Virtual Window */ #define IO_PF_SLC_VIRTUAL_WINDOW 0x00060000 /* SLC: DDCB Application Job Pending [n] (n=0:63) */ #define IO_PF_SLC_JOBPEND(n) (0x00061000 + 8*(n)) #define IO_SLC_JOBPEND(n) IO_PF_SLC_JOBPEND(n) /* SLC: Parser Trap RAM [n] (n=0:31) */ #define IO_SLU_SLC_PARSE_TRAP(n) (0x00011000 + 8*(n)) /* SLC: Dispatcher Trap RAM [n] (n=0:31) */ #define IO_SLU_SLC_DISP_TRAP(n) (0x00011200 + 8*(n)) /* Global Fault Isolation Register (GFIR) */ #define IO_SLC_CFGREG_GFIR 0x00020000 #define GFIR_ERR_TRIGGER 0x0000ffff /* SLU: Soft Reset Register */ #define IO_SLC_CFGREG_SOFTRESET 0x00020018 /* SLU: Misc Debug Register */ #define IO_SLC_MISC_DEBUG 0x00020060 #define IO_SLC_MISC_DEBUG_CLR 0x00020068 #define IO_SLC_MISC_DEBUG_SET 0x00020070 /* Temperature Sensor Reading */ #define IO_SLU_TEMPERATURE_SENSOR 0x00030000 #define IO_SLU_TEMPERATURE_CONFIG 0x00030008 /* Voltage Margining Control */ #define IO_SLU_VOLTAGE_CONTROL 0x00030080 #define IO_SLU_VOLTAGE_NOMINAL 0x00000000 #define IO_SLU_VOLTAGE_DOWN5 0x00000006 #define IO_SLU_VOLTAGE_UP5 0x00000007 /* Direct LED Control Register */ #define IO_SLU_LEDCONTROL 0x00030100 /* SLU: Flashbus Direct Access -A5 */ #define IO_SLU_FLASH_DIRECTACCESS 0x00040010 /* SLU: Flashbus Direct Access2 -A5 */ #define IO_SLU_FLASH_DIRECTACCESS2 0x00040020 /* SLU: Flashbus Command Interface -A5 */ #define IO_SLU_FLASH_CMDINTF 0x00040030 /* SLU: BitStream Loaded */ #define IO_SLU_BITSTREAM 0x00040040 /* This Register has a switch which will change the CAs to UR */ #define IO_HSU_ERR_BEHAVIOR 0x01001010 #define IO_SLC2_SQB_TRAP 0x00062000 #define IO_SLC2_QUEUE_MANAGER_TRAP 0x00062008 #define IO_SLC2_FLS_MASTER_TRAP 0x00062010 /* UnitID 1: HSU Registers */ #define IO_HSU_UNITCFG 0x01000000 #define IO_HSU_FIR 0x01000008 #define IO_HSU_FIR_CLR 0x01000010 #define IO_HSU_FEC 0x01000018 #define IO_HSU_ERR_ACT_MASK 0x01000020 #define IO_HSU_ERR_ATTN_MASK 0x01000028 #define IO_HSU_FIRX1_ACT_MASK 0x01000030 #define IO_HSU_FIRX0_ACT_MASK 0x01000038 #define IO_HSU_SEC_LEM_DEBUG_OVR 0x01000040 #define IO_HSU_EXTENDED_ERR_PTR 0x01000048 #define IO_HSU_COMMON_CONFIG 0x01000060 /* UnitID 2: Application Unit (APP) */ #define IO_APP_UNITCFG 0x02000000 #define IO_APP_FIR 0x02000008 #define IO_APP_FIR_CLR 0x02000010 #define IO_APP_FEC 0x02000018 #define IO_APP_ERR_ACT_MASK 0x02000020 #define IO_APP_ERR_ATTN_MASK 0x02000028 #define IO_APP_FIRX1_ACT_MASK 0x02000030 #define IO_APP_FIRX0_ACT_MASK 0x02000038 #define IO_APP_SEC_LEM_DEBUG_OVR 0x02000040 #define IO_APP_EXTENDED_ERR_PTR 0x02000048 #define IO_APP_COMMON_CONFIG 0x02000060 #define IO_APP_DEBUG_REG_01 0x02010000 #define IO_APP_DEBUG_REG_02 0x02010008 #define IO_APP_DEBUG_REG_03 0x02010010 #define IO_APP_DEBUG_REG_04 0x02010018 #define IO_APP_DEBUG_REG_05 0x02010020 #define IO_APP_DEBUG_REG_06 0x02010028 #define IO_APP_DEBUG_REG_07 0x02010030 #define IO_APP_DEBUG_REG_08 0x02010038 #define IO_APP_DEBUG_REG_09 0x02010040 #define IO_APP_DEBUG_REG_10 0x02010048 #define IO_APP_DEBUG_REG_11 0x02010050 #define IO_APP_DEBUG_REG_12 0x02010058 #define IO_APP_DEBUG_REG_13 0x02010060 #define IO_APP_DEBUG_REG_14 0x02010068 #define IO_APP_DEBUG_REG_15 0x02010070 #define IO_APP_DEBUG_REG_16 0x02010078 #define IO_APP_DEBUG_REG_17 0x02010080 #define IO_APP_DEBUG_REG_18 0x02010088 /* Read/write from/to registers */ struct genwqe_reg_io { __u64 num; /* register offset/address */ __u64 val64; }; /* * All registers of our card will return values not equal this values. * If we see IO_ILLEGAL_VALUE on any of our MMIO register reads, the * card can be considered as unusable. It will need recovery. */ #define IO_ILLEGAL_VALUE 0xffffffffffffffffull /* * Generic DDCB execution interface. * * This interface is a first prototype resulting from discussions we * had with other teams which wanted to use the Genwqe card. It allows * to issue a DDCB request in a generic way. The request will block * until it finishes or time out with error. * * Some DDCBs require DMA addresses to be specified in the ASIV * block. The interface provies the capability to let the kernel * driver know where those addresses are by specifying the ATS field, * such that it can replace the user-space addresses with appropriate * DMA addresses or DMA addresses of a scatter gather list which is * dynamically created. * * Our hardware will refuse DDCB execution if the ATS field is not as * expected. That means the DDCB execution engine in the chip knows * where it expects DMA addresses within the ASIV part of the DDCB and * will check that against the ATS field definition. Any invalid or * unknown ATS content will lead to DDCB refusal. */ /* Genwqe chip Units */ #define DDCB_ACFUNC_SLU 0x00 /* chip service layer unit */ #define DDCB_ACFUNC_APP 0x01 /* chip application */ /* DDCB return codes (RETC) */ #define DDCB_RETC_IDLE 0x0000 /* Unexecuted/DDCB created */ #define DDCB_RETC_PENDING 0x0101 /* Pending Execution */ #define DDCB_RETC_COMPLETE 0x0102 /* Cmd complete. No error */ #define DDCB_RETC_FAULT 0x0104 /* App Err, recoverable */ #define DDCB_RETC_ERROR 0x0108 /* App Err, non-recoverable */ #define DDCB_RETC_FORCED_ERROR 0x01ff /* overwritten by driver */ #define DDCB_RETC_UNEXEC 0x0110 /* Unexe/Removed from queue */ #define DDCB_RETC_TERM 0x0120 /* Terminated */ #define DDCB_RETC_RES0 0x0140 /* Reserved */ #define DDCB_RETC_RES1 0x0180 /* Reserved */ /* DDCB Command Options (CMDOPT) */ #define DDCB_OPT_ECHO_FORCE_NO 0x0000 /* ECHO DDCB */ #define DDCB_OPT_ECHO_FORCE_102 0x0001 /* force return code */ #define DDCB_OPT_ECHO_FORCE_104 0x0002 #define DDCB_OPT_ECHO_FORCE_108 0x0003 #define DDCB_OPT_ECHO_FORCE_110 0x0004 /* only on PF ! */ #define DDCB_OPT_ECHO_FORCE_120 0x0005 #define DDCB_OPT_ECHO_FORCE_140 0x0006 #define DDCB_OPT_ECHO_FORCE_180 0x0007 #define DDCB_OPT_ECHO_COPY_NONE (0 << 5) #define DDCB_OPT_ECHO_COPY_ALL (1 << 5) /* Definitions of Service Layer Commands */ #define SLCMD_ECHO_SYNC 0x00 /* PF/VF */ #define SLCMD_MOVE_FLASH 0x06 /* PF only */ #define SLCMD_MOVE_FLASH_FLAGS_MODE 0x03 /* bit 0 and 1 used for mode */ #define SLCMD_MOVE_FLASH_FLAGS_DLOAD 0 /* mode: download */ #define SLCMD_MOVE_FLASH_FLAGS_EMUL 1 /* mode: emulation */ #define SLCMD_MOVE_FLASH_FLAGS_UPLOAD 2 /* mode: upload */ #define SLCMD_MOVE_FLASH_FLAGS_VERIFY 3 /* mode: verify */ #define SLCMD_MOVE_FLASH_FLAG_NOTAP (1 << 2)/* just dump DDCB and exit */ #define SLCMD_MOVE_FLASH_FLAG_POLL (1 << 3)/* wait for RETC >= 0102 */ #define SLCMD_MOVE_FLASH_FLAG_PARTITION (1 << 4) #define SLCMD_MOVE_FLASH_FLAG_ERASE (1 << 5) enum genwqe_card_state { GENWQE_CARD_UNUSED = 0, GENWQE_CARD_USED = 1, GENWQE_CARD_FATAL_ERROR = 2, GENWQE_CARD_RELOAD_BITSTREAM = 3, GENWQE_CARD_STATE_MAX, }; /* common struct for chip image exchange */ struct genwqe_bitstream { __u64 data_addr; /* pointer to image data */ __u32 size; /* size of image file */ __u32 crc; /* crc of this image */ __u64 target_addr; /* starting address in Flash */ __u32 partition; /* '0', '1', or 'v' */ __u32 uid; /* 1=host/x=dram */ __u64 slu_id; /* informational/sim: SluID */ __u64 app_id; /* informational/sim: AppID */ __u16 retc; /* returned from processing */ __u16 attn; /* attention code from processing */ __u32 progress; /* progress code from processing */ }; /* Issuing a specific DDCB command */ #define DDCB_LENGTH 256 /* for debug data */ #define DDCB_ASIV_LENGTH 104 /* len of the DDCB ASIV array */ #define DDCB_ASIV_LENGTH_ATS 96 /* ASIV in ATS architecture */ #define DDCB_ASV_LENGTH 64 /* len of the DDCB ASV array */ #define DDCB_FIXUPS 12 /* maximum number of fixups */ struct genwqe_debug_data { char driver_version[64]; __u64 slu_unitcfg; __u64 app_unitcfg; __u8 ddcb_before[DDCB_LENGTH]; __u8 ddcb_prev[DDCB_LENGTH]; __u8 ddcb_finished[DDCB_LENGTH]; }; /* * Address Translation Specification (ATS) definitions * * Each 4 bit within the ATS 64-bit word specify the required address * translation at the defined offset. * * 63 LSB * 6666.5555.5555.5544.4444.4443.3333.3333 ... 11 * 3210.9876.5432.1098.7654.3210.9876.5432 ... 1098.7654.3210 * * offset: 0x00 0x08 0x10 0x18 0x20 0x28 0x30 0x38 ... 0x68 0x70 0x78 * res res res res ASIV ... * The first 4 entries in the ATS word are reserved. The following nibbles * each describe at an 8 byte offset the format of the required data. */ #define ATS_TYPE_DATA 0x0ull /* data */ #define ATS_TYPE_FLAT_RD 0x4ull /* flat buffer read only */ #define ATS_TYPE_FLAT_RDWR 0x5ull /* flat buffer read/write */ #define ATS_TYPE_SGL_RD 0x6ull /* sgl read only */ #define ATS_TYPE_SGL_RDWR 0x7ull /* sgl read/write */ /* ATS extensions */ /* * Super Child Block allocation/deallocation * * The SCB is build up as follows: * ATS[n] - Address Translation Specification * DATA - Data or pointers according to ATS[n] information */ #define ATS_TYPE_SCB_RD 0x8ull /* super child block ronly */ #define ATS_TYPE_SCB_RDWR 0x9ull /* super child block rd/wr */ #define ATS_TYPE_SGL2_RD 0xcull /* sglv2 ronly */ #define ATS_TYPE_SGL2_RDWR 0xdull /* sglv2 rd/wr */ #define ATS_SET_FLAGS(_struct, _field, _flags) \ (((_flags) & 0xf) << (44 - (4 * (offsetof(_struct, _field) / 8)))) #define ATS_GET_FLAGS(_ats, _byte_offs) \ (((_ats) >> (44 - (4 * ((_byte_offs) / 8)))) & 0xf) /** * struct genwqe_ddcb_cmd - User parameter for generic DDCB commands * * On the way into the kernel the driver will read the whole data * structure. On the way out the driver will not copy the ASIV data * back to user-space. */ struct genwqe_ddcb_cmd { /* START of data copied to/from driver */ __u64 next_addr; /* chaining genwqe_ddcb_cmd */ __u64 flags; /* reserved */ __u8 acfunc; /* accelerators functional unit */ __u8 cmd; /* command to execute */ __u8 asiv_length; /* used parameter length */ __u8 asv_length; /* length of valid return values */ __u16 cmdopts; /* command options */ __u16 retc; /* return code from processing */ __u16 attn; /* attention code from processing */ __u16 vcrc; /* variant crc16 */ __u32 progress; /* progress code from processing */ __u64 deque_ts; /* dequeue time stamp */ __u64 cmplt_ts; /* completion time stamp */ __u64 disp_ts; /* SW processing start */ /* move to end and avoid copy-back */ __u64 ddata_addr; /* collect debug data */ /* command specific values */ __u8 asv[DDCB_ASV_LENGTH]; /* END of data copied from driver */ union { struct { __u64 ats; __u8 asiv[DDCB_ASIV_LENGTH_ATS]; }; /* used for flash update to keep it backward compatible */ __u8 __asiv[DDCB_ASIV_LENGTH]; }; /* END of data copied to driver */ }; #define GENWQE_IOC_CODE 0xa5 /* Access functions */ #define GENWQE_READ_REG64 _IOR(GENWQE_IOC_CODE, 30, struct genwqe_reg_io) #define GENWQE_WRITE_REG64 _IOW(GENWQE_IOC_CODE, 31, struct genwqe_reg_io) #define GENWQE_READ_REG32 _IOR(GENWQE_IOC_CODE, 32, struct genwqe_reg_io) #define GENWQE_WRITE_REG32 _IOW(GENWQE_IOC_CODE, 33, struct genwqe_reg_io) #define GENWQE_READ_REG16 _IOR(GENWQE_IOC_CODE, 34, struct genwqe_reg_io) #define GENWQE_WRITE_REG16 _IOW(GENWQE_IOC_CODE, 35, struct genwqe_reg_io) #define GENWQE_GET_CARD_STATE _IOR(GENWQE_IOC_CODE, 36, enum genwqe_card_state) /** * struct genwqe_mem - Memory pinning/unpinning information * @addr: virtual user space address * @size: size of the area pin/dma-map/unmap * direction: 0: read/1: read and write * * Avoid pinning and unpinning of memory pages dynamically. Instead * the idea is to pin the whole buffer space required for DDCB * opertionas in advance. The driver will reuse this pinning and the * memory associated with it to setup the sglists for the DDCB * requests without the need to allocate and free memory or map and * unmap to get the DMA addresses. * * The inverse operation needs to be called after the pinning is not * needed anymore. The pinnings else the pinnings will get removed * after the device is closed. Note that pinnings will required * memory. */ struct genwqe_mem { __u64 addr; __u64 size; __u64 direction; __u64 flags; }; #define GENWQE_PIN_MEM _IOWR(GENWQE_IOC_CODE, 40, struct genwqe_mem) #define GENWQE_UNPIN_MEM _IOWR(GENWQE_IOC_CODE, 41, struct genwqe_mem) /* * Generic synchronous DDCB execution interface. * Synchronously execute a DDCB. * * Return: 0 on success or negative error code. * -EINVAL: Invalid parameters (ASIV_LEN, ASV_LEN, illegal fixups * no mappings found/could not create mappings * -EFAULT: illegal addresses in fixups, purging failed * -EBADMSG: enqueing failed, retc != DDCB_RETC_COMPLETE */ #define GENWQE_EXECUTE_DDCB \ _IOWR(GENWQE_IOC_CODE, 50, struct genwqe_ddcb_cmd) #define GENWQE_EXECUTE_RAW_DDCB \ _IOWR(GENWQE_IOC_CODE, 51, struct genwqe_ddcb_cmd) /* Service Layer functions (PF only) */ #define GENWQE_SLU_UPDATE _IOWR(GENWQE_IOC_CODE, 80, struct genwqe_bitstream) #define GENWQE_SLU_READ _IOWR(GENWQE_IOC_CODE, 81, struct genwqe_bitstream) #endif /* __GENWQE_CARD_H__ */ genwqe-user-4.0.18/include/memcopy_ddcb.h000066400000000000000000000045021303345043000202630ustar00rootroot00000000000000/* * Copyright 2014,2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __MEMCOPY_DDCB_H__ #define __MEMCOPY_DDCB_H__ #include #include #ifdef __cplusplus extern "C" { #endif /**< DDCB commands */ #define ZCOMP_CMD_ZEDC_MEMCOPY 0x03 /* memcopy is available for different APPS/AFUs */ #ifndef GENWQE_APPL_ID_GZIP # define GENWQE_APPL_ID_GZIP 0x00000000475a4950 /* The GZIP APPL id */ #endif #ifndef GENWQE_APPL_ID_GZIP2 # define GENWQE_APPL_ID_GZIP2 0x00000002475a4950 /* The GZIP 2 APPL id */ #endif /** * application specific invariant part of the DDCB (104 bytes: 0x20...0x7f) * see ZCOMP Data Compression HLD spec 0.96: 5.3.3 Memcopy CMD */ struct asiv_memcpy { uint64_t inp_buff; /**< 0x20 input buffer address */ uint32_t inp_buff_len; /**< 0x28 */ uint32_t in_crc32; /**< 0x2c only used for zEDC */ uint64_t outp_buff; /**< 0x30 input buffer address */ uint32_t outp_buff_len; /**< 0x38 */ uint32_t in_adler32; /**< 0x3c only used for zEDC */ uint64_t res0[4]; /**< 0x40 0x48 0x50 0x58 */ uint16_t res1; /**< 0x60 */ uint16_t input_lists; /**< 0x62 */ uint32_t res2; /**< 0x64 */ uint64_t res3[3]; /**< 0x68 ... 0x7f */ } __attribute__((__packed__)) __attribute__((__may_alias__)); /** * application specific variant part of the DDCB (56 bytes: 0x80...0xb7) * see ZCOMP Data Compression HLD spec 0.96: 5.3.3 Memcopy CMD */ struct asv_memcpy { uint64_t res0[2]; /**< 0x80 ... 0x8f */ uint32_t out_crc32; /**< 0x90 only used for zEDC */ uint32_t out_adler32; /**< 0x94 only used for zEDC */ uint32_t inp_processed; /**< 0x98 */ uint32_t outp_returned; /**< 0x9c */ uint64_t res1[4]; /**< 0xa0 ... 0xbf */ } __attribute__((__packed__)) __attribute__((__may_alias__)); #ifdef __cplusplus } #endif #endif /* __MEMCOPY_DDCB_H__ */ genwqe-user-4.0.18/include/zaddons.h000066400000000000000000000047711303345043000173100ustar00rootroot00000000000000/* * Copyright 2014,2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __ZADDONS_H__ #define __ZADDONS_H__ /* * Extensions of our hardware accelerated zlib implementation. Use * with care, since they are not part of the official zlib.h * interface. */ enum zlib_impl { ZLIB_SW_IMPL = 0x00, ZLIB_HW_IMPL = 0x01, ZLIB_MAX_IMPL = 0x02, ZLIB_IMPL_MASK = 0x0f, /* Flags which influence special optimization behavior */ ZLIB_FLAG_USE_FLAT_BUFFERS = 0x10, /* GenWQE only */ ZLIB_FLAG_CACHE_HANDLES = 0x20, /* GenWQE only, fails for CAPI */ ZLIB_FLAG_OMIT_LAST_DICT = 0x40, /* Useful for cases like Genomics */ ZLIB_FLAG_USE_POLLING = 0x80, /* Use polling mode only for CAPI */ ZLIB_FLAG_DISABLE_CV_FOR_Z_STREAM_END = 0x100, }; /** * zlib_set_inflate_impl() - Set default implementation for inflate * * @impl: Either ZLIB_SW_IMPL or ZLIB_HW_IMPL. * * We can enforce trying hardware usage by setting * ZLIB_HW_IMPL. Nevertheless if there is no hardware available * e.g. driver not installed, no card plugged, or access rights wrong, * the software version will be used as fallback. */ void zlib_set_inflate_impl(enum zlib_impl impl); /** * zlib_set_deflate_impl() - Set default implementation for deflate * * @impl: Either ZLIB_SW_IMPL or ZLIB_HW_IMPL. * * We can enforce trying hardware usage by setting * ZLIB_HW_IMPL. Nevertheless if there is no hardware available * e.g. driver not installed, no card plugged, or access rights wrong, * the software version will be used as fallback. */ void zlib_set_deflate_impl(enum zlib_impl impl); /** * zlib_set_accelerator() - Set accelerator type to be used * * @accel: GENWQE or CAPI * @card_no: card id or -1 for automatic card selection * * We support different types of hardware acceleration * devices. Examples are our PCIe based GenWQE accelerator or the CAPI * implementation for IBM System p. */ void zlib_set_accelerator(const char *accel, int card_no); #endif /* __ZADDONS_H__ */ genwqe-user-4.0.18/init/000077500000000000000000000000001303345043000150045ustar00rootroot00000000000000genwqe-user-4.0.18/init/Makefile000066400000000000000000000014611303345043000164460ustar00rootroot00000000000000# # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # SYSTEMD_UNIT_DIR?=$(DESTDIR)/lib/systemd/system all: clean: install: install -D -m 644 genwqe_maint.service \ $(SYSTEMD_UNIT_DIR)/genwqe_maint.service uninstall: $(RM) $(SYSTEMD_UNIT_DIR)/genwqe_maint.service genwqe-user-4.0.18/init/genwqe_maint.service000066400000000000000000000002241303345043000210420ustar00rootroot00000000000000[Unit] Description=IBM CAPI GenWQE health daemon [Service] Type=forking ExecStart=/bin/genwqe_maint --daemon [Install] WantedBy=multi-user.target genwqe-user-4.0.18/lib/000077500000000000000000000000001303345043000146075ustar00rootroot00000000000000genwqe-user-4.0.18/lib/Makefile000066400000000000000000000135411303345043000162530ustar00rootroot00000000000000# # Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # include ../config.mk zlibver=1.2.8 libversion = $(VERSION) CFLAGS += -fPIC -fno-strict-aliasing LDLIBS += -lpthread -ldl -lrt # If we use libcxl, we need to reflect that to avoid underlinkage ifeq ($(WITH_LIBCXL),1) ifneq ($(BUNDLE_LIBCXL),1) LDLIBS += -lcxl endif endif ### Accelerated libz implementation (Accelerated Data Compression/ADC) libname=libzADC projA = $(libname).a \ $(libname).so \ $(libname).so.$(MAJOR_VERS) \ $(libname).so.$(libversion) src = wrapper.c hardware.c software.c objs = __libzHW.o __libcard.o __libDDCB.o $(src:.c=.o) ### libzHW src0 = libzHW.c inflate.c deflate.c libname0 = libzHW proj0 = $(libname0).a $(libname0).so.$(libversion) $(libname0).so objs0 = $(src0:.c=.o) ### libcard src1 = libcard.c libname1 = libcard proj1 = $(libname1).a $(libname1).so.$(libversion) $(libname1).so objs1 = $(src1:.c=.o) ### libDDCB requires libcxl for CAPI support src2 += libddcb.c ddcb_card.c # ddcb_capi is only used with LIBCXL support. ifdef WITH_LIBCXL src2 += ddcb_capi.c endif libname2 = libDDCB proj2 = $(libname2).a $(libname2).so.$(libversion) $(libname2).so objs2 = $(src2:.c=.o) projs += $(projA) $(proj0) $(proj1) $(proj2) all: $(projs) ifdef WITH_LIBCXL ddcb_capi.o: $(libcxl_a) $(libcxl_a): ..check_libcxl $(MAKE) -C $(CONFIG_LIBCXL_PATH) .PHONY: ..check_libcxl ..check_libcxl: @if [ $(BUNDLE_LIBCXL) -eq 1 ]; then \ if [ ! -d $(CONFIG_LIBCXL_PATH) ]; then \ printf "\t[ERROR]\t$(CONFIG_LIBCXL_PATH) not found!\n"; \ echo; \ echo " Please clone libcxl git before continuing:"; \ echo " git clone https://github.com/ibm-capi/libcxl"; \ echo; \ exit 1; \ fi; \ if [ ! -f $(CONFIG_LIBCXL_PATH)/include/misc/cxl.h ]; then \ printf "\t[ERROR]\t$(CONFIG_LIBCXL_PATH)/include/misc/cxl.h not found!\n";\ echo; \ echo " Please ensure you are having up-todate"; \ echo " kernel-headers when building the code."; \ echo " When using pslse, get a recent copy of the file"; \ echo " from kernel.org and copy the file manually to"; \ echo " $(CONFIG_LIBCXL_PATH)/include/misc/cxl.h."; \ echo; \ exit 1; \ fi \ fi endif # WITH_LIBCXL ### libzHW __$(libname0).o: $(objs0) $(LD) $(XLDFLAGS) -r -o $@ $^ $(libname0).a: __$(libname0).o $(AR) rcs $(ARFLAGS) $@ $^ $(libname0).so: $(libname0).so.$(libversion) ln -sf $< $@ $(libname0).so.$(libversion): __$(libname0).o $(CC) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ ### libcard __$(libname1).o: $(objs1) $(LD) $(XLDFLAGS) -r -o $@ $^ $(libname1).a: __$(libname1).o $(AR) rcs $(ARFLAGS) $@ $^ $(libname1).so: $(libname1).so.$(libversion) ln -sf $< $@ $(libname1).so.$(libversion): __$(libname1).o $(CC) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ ### libDDCB __$(libname2).o: $(objs2) $(libcxl_a) $(LD) $(XLDFLAGS) -r -o $@ $^ $(libname2).a: __$(libname2).o $(AR) rcs $(ARFLAGS) $@ $^ $(libname2).so: $(libname2).so.$(libversion) ln -sf $< $@ $(libname2).so.$(libversion): __$(libname2).o $(CC) $(LDFLAGS) -shared -Wl,-soname,$@ -o $@ $^ ### libzADC __$(libname).o: $(objs) $(libz_a) $(LD) $(XLDFLAGS) -r -o $@ $^ $(libname).a: __$(libname).o $(AR) rcs $@ $^ $(libname).so: $(libname).so.$(libversion) ln -sf $< $@ $(libname).so.$(MAJOR_VERS): $(libname).so.$(libversion) ln -sf $< $@ $(libname).so.$(libversion): __$(libname).o $(CC) $(LDFLAGS) -shared -Wl,-soname,$(libname).so.$(MAJOR_VERS) \ -Wl,--version-script=libzADC.map -o $@ $^ $(LDLIBS) # Produce z_ prefixed version of software zlib. We need this when we # want to include libz statially instead of using dlopen/dlsym to use # it. # # Special version of libz.a which has z_ prefixed function # names. Required for software zlib fallback in case of small buffers # and accelerator unavailability. # libz_prefixed.o: zlib_objs = $(CONFIG_LIBZ_PATH)/adler32.lo \ $(CONFIG_LIBZ_PATH)/infback.lo \ $(CONFIG_LIBZ_PATH)/compress.lo \ $(CONFIG_LIBZ_PATH)/gzclose.lo \ $(CONFIG_LIBZ_PATH)/inffast.lo \ $(CONFIG_LIBZ_PATH)/trees.lo \ $(CONFIG_LIBZ_PATH)/crc32.lo \ $(CONFIG_LIBZ_PATH)/gzlib.lo \ $(CONFIG_LIBZ_PATH)/inflate.lo \ $(CONFIG_LIBZ_PATH)/uncompr.lo \ $(CONFIG_LIBZ_PATH)/deflate.lo \ $(CONFIG_LIBZ_PATH)/gzread.lo \ $(CONFIG_LIBZ_PATH)/inftrees.lo \ $(CONFIG_LIBZ_PATH)/zutil.lo \ $(CONFIG_LIBZ_PATH)/gzwrite.lo libz_prefixed.o: libz.o $(OBJCOPY) --prefix-symbols=z_ $< $@ libz.o: $(CONFIG_LIBZ_PATH)/libz.so $(LD) $(XLDFLAGS) -r -o $@ $(zlib_objs) install: install_zlib install_zlib: all mkdir -p $(LIB_INSTALL_PATH) cp -auv $(projA) $(LIB_INSTALL_PATH) (cd $(LIB_INSTALL_PATH) && \ ln -sf $(libname).a libz.a && \ ln -sf $(libname).so.$(libversion) libz.so && \ ln -sf $(libname).so.$(libversion) libz.so.1) mkdir -p $(INCLUDE_INSTALL_PATH) cp -uv ../include/zaddons.h $(INCLUDE_INSTALL_PATH) uninstall: @echo "removing $(INCLUDE_INSTALL_PATH) $(LIB_INSTALL_PATH)" $(RM) -r $(INCLUDE_INSTALL_PATH) $(LIB_INSTALL_PATH) # general things %.o: %.c $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ $(CC) -MM $(CPPFLAGS) $(CFLAGS) $< > $*.d clean distclean: $(RM) *.o *.d $(projs) *.so *.so.* ifneq ($(CONFIG_LIBCXL_PATH),) $(MAKE) -C $(CONFIG_LIBCXL_PATH) $@ endif -include $(objs:.o=.d) $(objs0:.o=.d) $(objs1:.o=.d) $(objs2:.o=.d) genwqe-user-4.0.18/lib/README000066400000000000000000000004031303345043000154640ustar00rootroot00000000000000README ====== The code in the lib directory contains the main library code for the genwqe accellerator. There are a handful of functions that allow the application to use the zlib accelerator in the system. The code replaces "zlib" by using the new libzADC genwqe-user-4.0.18/lib/card_defs.h000066400000000000000000000033101303345043000166670ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __LIB_CARD_H__ #define __LIB_CARD_H__ /** * @file card_defs.h * * @brief Common defines for libraries. Local definitions which are * not exported to the outside. * * IBM Accelerator Family 'GenWQE' */ #include #include #include #include #include /* For SYS_xxx definitions */ static inline pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } #define pr_err(fmt, ...) \ fprintf(stderr, "%08x.%08x %s:%u: Error: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, ## __VA_ARGS__) #define pr_warn(fmt, ...) do { \ if (_dbg_flag) \ fprintf(stderr, "%08x.%08x %s:%u: Warn: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) #define pr_dbg(fmt, ...) do { \ if (_dbg_flag) \ fprintf(stderr, fmt, ## __VA_ARGS__); \ } while (0) #define pr_info(fmt, ...) do { \ if (_dbg_flag) \ fprintf(stderr, "%08x.%08x %s:%u: Info: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) #endif /* __CARD_DEFS_H__ */ genwqe-user-4.0.18/lib/ddcb_capi.c000066400000000000000000001210161303345043000166440ustar00rootroot00000000000000/* * Copyright 2015, 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Specialized DDCB execution implementation. * * ToDo: Create version which can transparently support multiple cards * - Make sure that the appid is the same ... */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "afu_regs.h" #define CONFIG_DDCB_TIMEOUT 5 /* max time for a DDCB to be executed */ #define NUM_DDCBS 4 /* DDCB queue length */ extern int libddcb_verbose; extern FILE *libddcb_fd_out; #include /* For SYS_xxx definitions */ static inline pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } #define VERBOSE0(fmt, ...) do { \ if (libddcb_fd_out) \ fprintf(libddcb_fd_out, "%08x.%08x: " fmt, \ getpid(), gettid(), ## __VA_ARGS__); \ } while (0) #define VERBOSE1(fmt, ...) do { \ if (libddcb_fd_out && (libddcb_verbose > 0)) \ fprintf(libddcb_fd_out, "%08x.%08x: " fmt, \ getpid(), gettid(), ## __VA_ARGS__); \ } while (0) #define VERBOSE2(fmt, ...) do { \ if (libddcb_fd_out && (libddcb_verbose > 1)) \ fprintf(libddcb_fd_out, "%08x.%08x: " fmt, \ getpid(), gettid(), ## __VA_ARGS__); \ } while (0) #define VERBOSE3(fmt, ...) do { \ if (libddcb_fd_out && (libddcb_verbose > 3)) \ fprintf(libddcb_fd_out, "%08x.%08x: " fmt, \ getpid(), gettid(), ## __VA_ARGS__); \ } while (0) #define __free(ptr) free((ptr)) static void *__ddcb_done_thread(void *card_data); /** * Each CAPI compression card has one AFU, which provides one ddcb * queue per process. Multiple threads within one process share the * ddcb queue. Locking is needed to ensure that this works race free. */ struct ttxs { struct dev_ctx *ctx; /* Pointer to Card Context */ int compl_code; /* Completion Code */ sem_t wait_sem; int seqnum; /* Seq Number when done */ int card_no; /* Card number from Open */ int card_next; /* Next card to try in redundant mode */ unsigned int mode; uint64_t app_id; /* a copy of MMIO_APP_VERSION_REG */ uint64_t app_id_mask; /* used when opening the handle */ struct ttxs *verify; }; /* Thread wait Queue, allocate one entry per ddcb */ enum waitq_status { DDCB_FREE, DDCB_IN, DDCB_OUT, DDCB_ERR }; struct tx_waitq { enum waitq_status status; struct ddcb_cmd *cmd; struct ttxs *ttx; /* back Pointer to active ttx */ int seqnum; /* a copy of ddcb_seqnum at start time */ bool thread_wait; /* A thread is waiting to */ uint64_t q_in_time; /* Time in msec when i added this ddcb */ }; /** * A a device context is normally bound to a card which provides a * ddcb queue. Whenever a new context is created a queue is attached * to it. Whenever it is removed the queue is removed too. There can * be multiple contexts using just one card. */ struct dev_ctx { ddcb_t *ddcb; /* ddcb queue */ struct tx_waitq waitq[NUM_DDCBS]; unsigned int completed_tasks[NUM_DDCBS+1]; /* used for DDCB_DEBUG=1 */ unsigned int completed_ddcbs; /* used for DDCB_DEBUG=1 */ unsigned int process_irqs; /* used for DDCB_DEBUG=1 */ int card_no; /* Same card number as in ttx */ unsigned int mode; pthread_mutex_t lock; int clients; /* Thread open counter */ struct cxl_afu_h *afu_h; /* afu_h != NULL device is open */ int afu_fd; /* fd from cxl_afu_fd() */ int afu_rc; /* rc from __afu_open() */ long cr_device; /* config record device id */ long cr_vendor; /* config record vendor id */ long api_version_compatible; uint16_t ddcb_seqnum; uint16_t ddcb_free1; /* Not used */ unsigned int ddcb_num; /* How deep is my ddcb queue */ int ddcb_out; /* ddcb Output (done) index */ int ddcb_in; /* ddcb Input index */ struct cxl_event event; /* last AFU event */ int tout; /* Timeout Value for compeltion */ pthread_t ddcb_done_tid; sem_t open_done_sem; /* open done */ uint64_t app_id; /* a copy of MMIO_APP_VERSION_REG */ int cid_id; /* cid id from MMIO_DDCBQ_CID_REG */ sem_t free_sem; /* Sem to wait for free ddcb */ struct dev_ctx *verify; /* Verify field */ }; #define NUM_CARDS 4 /* max number of CAPI cards in system */ static ddcb_t my_ddcbs[NUM_CARDS][NUM_DDCBS] __attribute__((aligned(64*1024))); static struct dev_ctx my_ctx[NUM_CARDS]; static inline uint64_t get_msec(void) { struct timeval t; gettimeofday(&t, NULL); return t.tv_sec * 1000 + t.tv_usec/1000; } /* Add trace function by setting RT_TRACE */ //#define RT_TRACE #ifdef RT_TRACE #define RT_TRACE_SIZE 1000 struct trc_stru { uint32_t tok; uint32_t tid; uint32_t n1; uint32_t n2; void *p; }; static int trc_idx = 0, trc_wrap = 0; static struct trc_stru trc_buff[RT_TRACE_SIZE]; static pthread_mutex_t trc_lock; static void rt_trace_init(void) { pthread_mutex_init(&trc_lock, NULL); } static void rt_trace(uint32_t tok, uint32_t n1, uint32_t n2, void *p) { int i; pthread_mutex_lock(&trc_lock); i = trc_idx; trc_buff[i].tid = (uint32_t)get_msec(); trc_buff[i].tok = tok; trc_buff[i].n1 = n1; trc_buff[i].n2= n2; trc_buff[i].p = p; i++; if (i == RT_TRACE_SIZE) { i = 0; trc_wrap++; } trc_idx = i; pthread_mutex_unlock(&trc_lock); } static void rt_trace_dump(void) { int i; pthread_mutex_lock(&trc_lock); VERBOSE0("Index: %d Warp: %d\n", trc_idx, trc_wrap); for (i = 0; i < RT_TRACE_SIZE; i++) { if (0 == trc_buff[i].tok) break; VERBOSE0("%03d: %04d : %04x - %04x - %04x - %p\n", i, trc_buff[i].tid, trc_buff[i].tok, trc_buff[i].n1, trc_buff[i].n2, trc_buff[i].p); } trc_idx = 0; pthread_mutex_unlock(&trc_lock); } #else static void rt_trace_init(void) {} static void rt_trace(uint32_t tok __attribute__((unused)), uint32_t n1 __attribute__((unused)), uint32_t n2 __attribute__((unused)), void *p __attribute__((unused))) {} static void rt_trace_dump(void) {} #endif /* Command to ddcb */ static inline void cmd_2_ddcb(ddcb_t *pddcb, struct ddcb_cmd *cmd, uint16_t seqnum, bool use_irq) { pddcb->pre = DDCB_PRESET_PRE; pddcb->cmdopts_16 = __cpu_to_be16(cmd->cmdopts); pddcb->cmd = cmd->cmd; pddcb->acfunc = cmd->acfunc; /* functional unit */ pddcb->psp = (((cmd->asiv_length / 8) << 4) | ((cmd->asv_length / 8))); pddcb->n.ats_64 = __cpu_to_be64(cmd->ats); memcpy(&pddcb->n.asiv[0], &cmd->asiv[0], DDCB_ASIV_LENGTH_ATS); pddcb->icrc_hsi_shi_32 = __cpu_to_be32(0x00000000); /* for crc */ /* Write seqnum into reserved area, check for this seqnum is done in ddcb_2_cmd() */ pddcb->rsvd_0e = __cpu_to_be16(seqnum); /* DDCB completion irq */ if (use_irq) pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32; pddcb->seqnum = __cpu_to_be16(seqnum); pddcb->retc_16 = 0; if (libddcb_verbose > 3) { VERBOSE0("DDCB [%016llx] Seqnum 0x%x before execution:\n", (long long)(unsigned long)(void *)pddcb, seqnum); ddcb_hexdump(libddcb_fd_out, pddcb, sizeof(ddcb_t)); } } /** * Copy DDCB ASV to request struct. There is no endian conversion * made, since data structure in ASV is still unknown here * return true if the receiced ddcb is good. */ static bool ddcb_2_cmd(ddcb_t *ddcb, struct ddcb_cmd *cmd) { memcpy(&cmd->asv[0], (void *) &ddcb->asv[0], cmd->asv_length); /* copy status flags of the variant part */ cmd->vcrc = __be16_to_cpu(ddcb->vcrc_16); cmd->deque_ts = __be64_to_cpu(ddcb->deque_ts_64); cmd->cmplt_ts = __be64_to_cpu(ddcb->cmplt_ts_64); cmd->attn = __be16_to_cpu(ddcb->attn_16); cmd->progress = __be32_to_cpu(ddcb->progress_32); cmd->retc = __be16_to_cpu(ddcb->retc_16); /* Check received seqnum here (this will become a copy from rsvd_0e field) */ if (ddcb->rsvd_0e != ddcb->rsvd_c0) return false; return true; } static void afu_print_status(struct cxl_afu_h *afu_h, FILE *fp) { int i; uint64_t addr, reg; long cr_device = -1, cr_vendor = -1, cr_class = -1; if (fp == NULL) return; cxl_get_cr_device(afu_h, 0, &cr_device); cxl_get_cr_vendor(afu_h, 0, &cr_vendor); cxl_get_cr_class(afu_h, 0, &cr_class); fprintf(fp, " cr_device: 0x%016lx\n" " cr_vendor: 0x%016lx\n" " cr_class: 0x%016lx\n", cr_device, cr_vendor, cr_class); cxl_mmio_read64(afu_h, MMIO_IMP_VERSION_REG, ®); fprintf(fp, " Version Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_APP_VERSION_REG, ®); fprintf(fp, " Appl. Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_AFU_CONFIG_REG, ®); fprintf(fp, " Afu Config Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_AFU_STATUS_REG, ®); fprintf(fp, " Afu Status Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_AFU_COMMAND_REG, ®); fprintf(fp, " Afu Cmd Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_FRT_REG, ®); fprintf(fp, " Free Run Timer: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_START_REG, ®); fprintf(fp, " DDCBQ Start Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_CONFIG_REG, ®); fprintf(fp, " DDCBQ Conf Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_COMMAND_REG, ®); fprintf(fp, " DDCBQ Cmd Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_STATUS_REG, ®); fprintf(fp, " DDCBQ Stat Reg: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_CID_REG, ®); fprintf(fp, " DDCBQ Context ID: 0x%016llx\n", (long long)reg); cxl_mmio_read64(afu_h, MMIO_DDCBQ_WT_REG, ®); fprintf(fp, " DDCBQ WT Reg: 0x%016llx\n", (long long)reg); for (i = 0; i < MMIO_FIR_REGS_NUM; i++) { addr = MMIO_FIR_REGS_BASE + (uint64_t)(i * 8); cxl_mmio_read64(afu_h, addr, ®); fprintf(fp, " FIR Reg [%08llx]: 0x%016llx\n", (long long)addr, (long long)reg); } } /* Init Thread Wait Queue */ static void __setup_waitq(struct dev_ctx *ctx) { unsigned int i; struct tx_waitq *q; for (i = 0, q = &ctx->waitq[0]; i < ctx->ddcb_num; i++, q++) { q->status = DDCB_FREE; q->cmd = NULL; q->ttx = NULL; q->thread_wait = false; } } /** * NOTE: ctx->lock must be held when entering this function. * o Open afu device * o Map MMIO registers * o Allocate and setup ddcb queue * o Initialize queue hardware to become operational */ static int __afu_open(struct dev_ctx *ctx) { int rc = DDCB_OK; char device[64]; uint64_t mmio_dat; int rc0; /* Do not do anything if afu should have already been opened */ if (ctx->afu_h) return DDCB_OK; if (DDCB_MODE_MASTER & ctx->mode) sprintf(device, "/dev/cxl/afu%d.0m", ctx->card_no); else sprintf(device, "/dev/cxl/afu%d.0s", ctx->card_no); VERBOSE1(" [%s] AFU[%d] Enter Open: %s DDCBs @ %p\n", __func__, ctx->card_no, device, &ctx->ddcb[0]); ctx->ddcb_num = NUM_DDCBS; ctx->ddcb_seqnum = 0xf00d; /* Starting Seq */ ctx->ddcb_in = 0; /* ddcb Input Index */ ctx->ddcb_out = 0; /* ddcb Output Index */ rc0 = sem_init(&ctx->free_sem, 0, ctx->ddcb_num); if (rc0 != 0) { VERBOSE0(" [%s] initializing free_sem failed %d %s ...\n", __func__, rc0, strerror(errno)); return DDCB_ERRNO; } if (!(DDCB_MODE_MASTER & ctx->mode)) __setup_waitq(ctx); ctx->afu_h = cxl_afu_open_dev(device); if (NULL == ctx->afu_h) { rc = DDCB_ERR_CARD; goto err_exit; } /* Check if the compiled in API version is compatible with the one reported by the kernel driver */ rc = cxl_get_api_version_compatible(ctx->afu_h, &ctx->api_version_compatible); if ((rc != 0) || (ctx->api_version_compatible != CXL_KERNEL_API_VERSION)) { VERBOSE0(" [%s] ERR: incompatible API version: %ld/%d rc=%d\n", __func__, ctx->api_version_compatible, CXL_KERNEL_API_VERSION, rc); rc = DDCB_ERR_VERS_MISMATCH; goto err_afu_free; } /* FIXME This is still keeping the backwards compatibility */ /* Check vendor id */ rc = cxl_get_cr_vendor(ctx->afu_h, 0, &ctx->cr_vendor); if (rc == 0) { if (ctx->cr_vendor != CGZIP_CR_VENDOR) { VERBOSE0(" [%s] ERR: vendor_id: %ld/%d\n", __func__, (unsigned long)ctx->cr_vendor, CGZIP_CR_VENDOR); rc = DDCB_ERR_VERS_MISMATCH; goto err_afu_free; } } else VERBOSE0(" [%s] WARNING: checking vendor id: %08lx/%d\n", __func__, ctx->cr_vendor, rc); /* Check device id */ rc = cxl_get_cr_device(ctx->afu_h, 0, &ctx->cr_device); if (rc == 0) { if (ctx->cr_device != CGZIP_CR_DEVICE) { VERBOSE0(" [%s] ERR: device_id: %ld/%d\n", __func__, (unsigned long)ctx->cr_device, CGZIP_CR_VENDOR); rc = DDCB_ERR_CARD; goto err_afu_free; } } else VERBOSE0(" [%s] WARNING: checking device id: %08lx/%d\n", __func__, ctx->cr_device, rc); ctx->afu_fd = cxl_afu_fd(ctx->afu_h); rc = cxl_afu_attach(ctx->afu_h, (__u64)(unsigned long)(void *)ctx->ddcb); if (0 != rc) { rc = DDCB_ERR_CARD; goto err_afu_free; } if (cxl_mmio_map(ctx->afu_h, CXL_MMIO_BIG_ENDIAN) == -1) { rc = DDCB_ERR_CARD; goto err_afu_free; } if (!(DDCB_MODE_MASTER & ctx->mode)) { /* Only slaves can configure a Context for DMA */ cxl_mmio_write64(ctx->afu_h, MMIO_DDCBQ_START_REG, (uint64_t)(void *)ctx->ddcb); /* | 63..48 | 47....32 | 31........24 | 23....16 | 15.....0 | */ /* | Seqnum | Reserved | 1st ddcb num | max ddcb | Reserved | */ mmio_dat = (((uint64_t)ctx->ddcb_seqnum << 48) | ((uint64_t)ctx->ddcb_in << 24) | ((uint64_t)(ctx->ddcb_num - 1) << 16)); rc = cxl_mmio_write64(ctx->afu_h, MMIO_DDCBQ_CONFIG_REG, mmio_dat); if (rc != 0) { rc = DDCB_ERR_CARD; goto err_mmio_unmap; } } /* Get MMIO_APP_VERSION_REG */ cxl_mmio_read64(ctx->afu_h, MMIO_APP_VERSION_REG, &mmio_dat); ctx->app_id = mmio_dat; /* Save it */ /* Get Context ID Register */ cxl_mmio_read64(ctx->afu_h, MMIO_DDCBQ_CID_REG, &mmio_dat); ctx->cid_id = (int)mmio_dat & 0xffff; /* only need my context */ if (libddcb_verbose > 1) afu_print_status(ctx->afu_h, libddcb_fd_out); ctx->verify = ctx; VERBOSE1(" [%s] AFU[%d:%d] Exit rc: %d\n", __func__, ctx->card_no, ctx->cid_id, rc); return DDCB_OK; err_mmio_unmap: cxl_mmio_unmap(ctx->afu_h); err_afu_free: cxl_afu_free(ctx->afu_h); ctx->afu_h = NULL; err_exit: VERBOSE1(" [%s] AFU[%d] ERROR: rc: %d errno: %d %s\n", __func__, ctx->card_no, rc, errno, strerror(errno)); return rc; } /** * NOTE: ctx->lock must be held when entering this function. * ctx->afu_h must be valid * ctx->clients must be 0 */ static inline int __afu_close(struct dev_ctx *ctx, bool force) { struct cxl_afu_h *afu_h; uint64_t mmio_dat; int i = 0; int rc = DDCB_OK; if (NULL == ctx) return DDCB_ERR_INVAL; if (ctx->verify != ctx) return DDCB_ERR_INVAL; afu_h = ctx->afu_h; if (NULL == afu_h) { VERBOSE0("[%s] WARNING: Trying to close inactive AFU!\n", __func__); return DDCB_ERR_INVAL; } if (0 != ctx->clients) { /* * Enable this warning only in verbose mode. We have a * testcase which does not close the afu handles * properly, but just does exit(). This can cause the * usage count still be != 0. Force is applied when * the library destructor is being called. That should * be fine. */ VERBOSE1("[%s] AFU[%d:%d] Error clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); if (!force) return DDCB_ERR_INVAL; } VERBOSE1(" [%s] AFU[%d:%d] Enter Open Clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); while (1) { cxl_mmio_read64(afu_h, MMIO_DDCBQ_STATUS_REG, &mmio_dat); if (0x0ull == (mmio_dat & 0x10)) break; usleep(100); i++; if (1000 == i) { VERBOSE0("[%s] AFU[%d:%d] Error Timeout wait_afu_stop " "STATUS_REG: 0x%016llx\n", __func__, ctx->card_no, ctx->cid_id, (long long)mmio_dat); rc = DDCB_ERR_CARD; break; } } if (libddcb_verbose > 1) afu_print_status(ctx->afu_h, libddcb_fd_out); cxl_mmio_unmap(afu_h); cxl_afu_free(afu_h); ctx->afu_h = NULL; VERBOSE1(" [%s] AFU[%d:%d] Exit rc: %d\n", __func__, ctx->card_no, ctx->cid_id, rc); return rc; } static void afu_dump_queue(struct dev_ctx *ctx) { unsigned int i; ddcb_t *ddcb; for (i = 0, ddcb = &ctx->ddcb[0]; i < ctx->ddcb_num; i++, ddcb++) { VERBOSE0("DDCB %d [%016llx]\n", i, (long long)ddcb); ddcb_hexdump(libddcb_fd_out, ddcb, sizeof(ddcb_t)); } } /** * NOTE: ctx->lock must be held when entering this function. * * This needs to be executed only if the device is not * yet open. The Card (AFU) will be attaced in the done thread. */ static int card_dev_open(struct dev_ctx *ctx) { int rc = DDCB_OK; void *res = NULL; VERBOSE1(" [%s] AFU[%d] Enter clients: %d open_done_sem: %p\n", __func__, ctx->card_no, ctx->clients, &ctx->open_done_sem); if (ctx->ddcb_done_tid != 0) /* already in use!! */ return DDCB_OK; /* Create a semaphore to wait until afu Open is done */ rc = sem_init(&ctx->open_done_sem, 0, 0); if (0 != rc) { VERBOSE0("ERROR: initializing open_done_sem %p %d %s!\n", &ctx->open_done_sem, rc, strerror(errno)); return DDCB_ERRNO; } /* Now create the worker thread which opens the afu */ rc = pthread_create(&ctx->ddcb_done_tid, NULL, &__ddcb_done_thread, ctx); if (0 != rc) { VERBOSE1(" [%s] ERROR: pthread_create rc: %d\n", __func__, rc); return DDCB_ERR_ENOMEM; } TEMP_FAILURE_RETRY(sem_wait(&ctx->open_done_sem)); rc = ctx->afu_rc; /* Get RC */ if (DDCB_OK != rc) { /* The thread was not able to open or init tha AFU */ VERBOSE1(" [%s] AFU[%d] ERROR: rc: %d\n", __func__, ctx->card_no, rc); /* Wait for done thread to join */ pthread_join(ctx->ddcb_done_tid, &res); ctx->ddcb_done_tid = 0; } VERBOSE1(" [%s] AFU[%d:%d] Exit rc: %d\n", __func__, ctx->card_no, ctx->cid_id, rc); return rc; } /** * NOTE: ctx->lock must be held when entering this function. */ static int card_dev_close(struct dev_ctx *ctx) { int rc = DDCB_OK; void *res = NULL; VERBOSE1(" [%s] AFU[%d:%d] Enter clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); if (ctx->ddcb_done_tid) { rc = pthread_cancel(ctx->ddcb_done_tid); VERBOSE1(" [%s] AFU[%d:%d] Wait done_thread to join rc: %d\n", __func__, ctx->card_no, ctx->cid_id, rc); rc = pthread_join(ctx->ddcb_done_tid, &res); VERBOSE1(" [%s] AFU[%d:%d] clients: %d rc: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients, rc); ctx->ddcb_done_tid = 0; } VERBOSE1(" [%s] AFU[%d:%d] Exit clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); return rc; } static int __client_inc(struct dev_ctx *ctx, unsigned int mode) { int rc = DDCB_OK; pthread_mutex_lock(&ctx->lock); VERBOSE1(" [%s] AFU[%d:%d] Enter clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); if (ctx->clients == 0) { ctx->mode = mode; rc = card_dev_open(ctx); if (DDCB_OK == rc) ctx->clients++; /* increment clients only if good */ } else { if (mode != ctx->mode) rc = DDCB_ERRNO; else ctx->clients++; /* increment clients */ } VERBOSE1(" [%s] AFU[%d:%d] Exit clients: %d rc: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients, rc); pthread_mutex_unlock(&ctx->lock); return rc; } static void __client_dec(struct dev_ctx *ctx) { pthread_mutex_lock(&ctx->lock); VERBOSE1(" [%s] AFU[%d:%d] Enter Clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); ctx->clients--; /* * Since closing the AFU is so expensive, we keep the afu * handle and the allocating thread alive until the * application exits. * * if (0 == ctx->clients) * card_dev_close(ctx); */ VERBOSE1(" [%s] AFU[%d:%d] Exit Clients: %d\n", __func__, ctx->card_no, ctx->cid_id, ctx->clients); pthread_mutex_unlock(&ctx->lock); } static void *card_open(int card_no, unsigned int mode, int *card_rc, uint64_t appl_id __attribute__((unused)), uint64_t appl_id_mask __attribute__((unused))) { unsigned int i; int rc = DDCB_OK; struct ttxs *ttx = NULL; VERBOSE1("[%s] AFU[%d] Enter mode: 0x%x\n", __func__, card_no, mode); if ((card_no != ACCEL_REDUNDANT) && ((card_no < 0) || (card_no >= NUM_CARDS))) { rc = DDCB_ERR_INVAL; goto card_open_exit; } /* Allocate Thread Context */ ttx = calloc(1, sizeof(*ttx)); if (!ttx) { rc = DDCB_ERR_ENOMEM; goto card_open_exit; } /* Inc use count and initialize AFU on first open */ sem_init(&ttx->wait_sem, 0, 0); ttx->card_no = card_no; /* Save only right now */ ttx->app_id = appl_id; ttx->app_id_mask = appl_id_mask; ttx->card_next = rand() % NUM_CARDS; /* start always random */ ttx->mode = mode; ttx->verify = ttx; /* * We bind the client to the card in open for single card mode * and to any card in redundant mode. */ if (ttx->card_no != ACCEL_REDUNDANT) { ttx->ctx = &my_ctx[card_no]; /* select card context */ rc = __client_inc(ttx->ctx, mode); if (rc != DDCB_OK) { free(ttx); ttx = NULL; } } else { /* open all possible cards */ for (i = 0; i < NUM_CARDS; i++) { rc = __client_inc(&my_ctx[ttx->card_next], mode); if (rc == DDCB_OK) /* remember last one which is ok */ ttx->ctx = &my_ctx[ttx->card_next]; ttx->card_next = (ttx->card_next + 1) % NUM_CARDS; } } card_open_exit: if (card_rc) *card_rc = rc; VERBOSE1("[%s] AFU[%d] Exit ttx: %p\n", __func__, card_no, ttx); return ttx; } static int card_close(void *card_data) { unsigned int i; struct ttxs *ttx = (struct ttxs*)card_data; VERBOSE1("[%s] Enter ttx: %p\n", __func__, ttx); if (NULL == ttx) return DDCB_ERR_INVAL; if (ttx->verify != ttx) return DDCB_ERR_INVAL; rt_trace(0xdeaf, 0, 0, ttx); if (ttx->card_no != ACCEL_REDUNDANT) __client_dec(ttx->ctx); else for (i = 0; i < NUM_CARDS; i++) __client_dec(&my_ctx[i]); ttx->verify = NULL; free(ttx); rt_trace_dump(); VERBOSE1("[%s] Exit ttx: %p\n", __func__, ttx); return DDCB_OK; } static void start_ddcb(struct cxl_afu_h *afu_h, int seq) { uint64_t reg; reg = (uint64_t)seq << 48 | 1; /* Set Seq. Number + Start Bit */ cxl_mmio_write64(afu_h, MMIO_DDCBQ_COMMAND_REG, reg); } /** * Set command into next DDCB Slot */ static int __ddcb_execute_multi(void *card_data, struct ddcb_cmd *cmd) { struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx = NULL; struct tx_waitq *txq = NULL; ddcb_t *ddcb; int idx = 0; int seq, val; struct ddcb_cmd *my_cmd; if (NULL == ttx) return DDCB_ERR_INVAL; if (ttx->verify != ttx) return DDCB_ERR_INVAL; if (NULL == cmd) return DDCB_ERR_INVAL; ctx = ttx->ctx; /* get card Context */ if (DDCB_MODE_MASTER & ctx->mode) /* no DMA in Master Mode */ return DDCB_ERR_INVAL; my_cmd = cmd; while (my_cmd) { sem_getvalue(&ctx->free_sem, &val); TEMP_FAILURE_RETRY(sem_wait(&ctx->free_sem)); pthread_mutex_lock(&ctx->lock); idx = ctx->ddcb_in; ddcb = &ctx->ddcb[idx]; txq = &ctx->waitq[idx]; txq->ttx = ttx; /* set ttx pointer into txq */ txq->status = DDCB_IN; seq = (int)ctx->ddcb_seqnum; /* Get seq */ txq->cmd = my_cmd; /* my command to txq */ txq->seqnum = ctx->ddcb_seqnum; /* Save seq Number */ txq->q_in_time = get_msec(); /* Save now time in msec */ ctx->ddcb_seqnum++; /* Next seq */ rt_trace(0x00a0, seq, idx, ttx); VERBOSE1("[%s] AFU[%d:%d] seq: 0x%x slot: %d cmd: %p\n", __func__, ctx->card_no, ctx->cid_id, seq, idx, my_cmd); /* Increment ddcb_in and warp back to 0 */ ctx->ddcb_in = (ctx->ddcb_in + 1) % ctx->ddcb_num; cmd_2_ddcb(ddcb, my_cmd, seq, (ctx->mode & DDCB_MODE_POLLING) ? false : true); start_ddcb(ctx->afu_h, seq); /* Get Next cmd and continue if there is one */ my_cmd = (struct ddcb_cmd *)my_cmd->next_addr; if (NULL == my_cmd) txq->thread_wait = true; pthread_mutex_unlock(&ctx->lock); } /* Block Caller */ VERBOSE2("[%s] Wait ttx: %p\n", __func__, ttx); TEMP_FAILURE_RETRY(sem_wait(&ttx->wait_sem)); rt_trace(0x00af, ttx->seqnum, idx, ttx); VERBOSE2("[%s] return ttx: %p\n", __func__, ttx); return ttx->compl_code; /* Give Completion code back to caller */ } static int ddcb_execute(void *card_data, struct ddcb_cmd *cmd) { int rc; unsigned int i; struct ttxs *ttx = (struct ttxs*)card_data; if (ttx->card_no == ACCEL_REDUNDANT) { for (i = 0; i < NUM_CARDS; i++) { ttx->card_next = (ttx->card_next + 1) % NUM_CARDS; if (my_ctx[ttx->card_next].afu_h != NULL) { ttx->ctx = &my_ctx[ttx->card_next]; break; } } } rc = __ddcb_execute_multi(card_data, cmd); if (DDCB_OK != rc) errno = EBADF; /* Return Invalid exchange */ return rc; } static bool __ddcb_done_post(struct dev_ctx *ctx, int compl_code) { int idx, elapsed_time; ddcb_t *ddcb; struct tx_waitq *txq; struct ttxs *ttx = NULL; pthread_mutex_lock(&ctx->lock); idx = ctx->ddcb_out; ddcb = &ctx->ddcb[idx]; txq = &ctx->waitq[idx]; /* Check if Nothing to do, goto exit and wait again */ if (DDCB_IN != txq->status) goto post_exit_stop; elapsed_time = (int)(get_msec() - txq->q_in_time); if (DDCB_ERR_IRQTIMEOUT == compl_code) { if (ddcb->retc_16) VERBOSE2("\t[%s] AFU[%d:%d] seq: 0x%x slot: %d compl_code: %d" " retc: %4.4x after %d msec. wait 4 IRQ\n", __func__, ctx->card_no, ctx->cid_id, txq->seqnum, idx, compl_code, ddcb->retc_16, elapsed_time); /* Select Timeout and no data received */ if (elapsed_time < (ctx->tout * 1000)) goto post_exit_cont; /* Continue until timeout */ VERBOSE2("\t[%s] AFU[%d:%d] seq: 0x%x slot: %d timeout " "after %d msec\n", __func__, ctx->card_no, ctx->cid_id, txq->seqnum, idx, elapsed_time); } if ((DDCB_OK == compl_code) && (0 == ddcb->retc_16)) { /* Still waiting for retc to be set */ rt_trace(0x001a, ddcb->retc_16, idx, 0); VERBOSE2("\t[%s] AFU[%d:%d] seq: 0x%x slot: %d " "retc: 0 wait\n", __func__, ctx->card_no, ctx->cid_id, txq->seqnum, idx); goto post_exit_stop; } if (libddcb_verbose > 3) { /* For debug only */ VERBOSE0("AFU[%d:%d] DDCB %d [%016llx] after execution " "compl_code: %d retc16: %4.4x\n", ctx->card_no, ctx->cid_id, idx, (long long)ddcb, compl_code, ddcb->retc_16); ddcb_hexdump(libddcb_fd_out, ddcb, sizeof(ddcb_t)); } /* Copy the ddcb back to cmd, and check for error */ if (false == ddcb_2_cmd(ddcb, txq->cmd)) { /* Overwrite compl_code only if not set before */ if (DDCB_OK != compl_code) compl_code = DDCB_ERR_EXEC_DDCB; } if (DDCB_OK != compl_code) VERBOSE0("\t[%s] AFU[%d:%d] seq: 0x%x slot: %d compl_code: %d" " retc: %x after: %d msec\n", __func__, ctx->card_no, ctx->cid_id, txq->seqnum, idx, compl_code, ddcb->retc_16, elapsed_time); else VERBOSE1("\t[%s] AFU[%d:%d] seq: 0x%x slot: %d compl_code: %d" " retc: %x after: %d msec\n", __func__, ctx->card_no, ctx->cid_id, txq->seqnum, idx, compl_code, ddcb->retc_16, elapsed_time); ttx = txq->ttx; ttx->compl_code = compl_code; rt_trace(0x0011, txq->seqnum, idx, ttx); sem_post(&ctx->free_sem); if (txq->thread_wait) { rt_trace(0x0012, txq->seqnum, idx, ttx); VERBOSE1("\t[%s] AFU[%d:%d] Post: %p\n", __func__, ctx->card_no, ctx->cid_id, ttx); sem_post(&ttx->wait_sem); txq->thread_wait = false; } /* Increment and wrap back to start */ ctx->ddcb_out = (ctx->ddcb_out + 1) % ctx->ddcb_num; txq->status = DDCB_FREE; post_exit_cont: pthread_mutex_unlock(&ctx->lock); return true; /* Continue Loop */ post_exit_stop: pthread_mutex_unlock(&ctx->lock); return false; /* Stop Loop */ } /** * The cleanup function gets invoked after the thread was canceld by * sending card_dev_close(). This function was intended to close the * AFU. But it turned out that closing it has sigificant performance * impact. So we decided to keep the afu resource opened until the * application terminates. This will absorb one file descriptor plus * the memory associated to the afu handle. * * Note: Open and Close the AFU must handled by the same thread id. * ctx->lock must be held when entering this function. */ static void __ddcb_done_thread_cleanup(void *arg __attribute__((unused))) { struct dev_ctx *ctx = (struct dev_ctx *)arg; VERBOSE1("\t[%s]\n", __func__); __afu_close(ctx, true); } /** * Process DDCB queue results using polling for completion. This * implementation might not yet be perfect from an error isolation * standpoint. E.g. how to handle error interrupt conditions without * impacting performance? We still do it to figure possible * performance differentces between interrupt and polling driven * operation. */ static int __ddcb_process_polling(struct dev_ctx *ctx) { int tasks; VERBOSE1("[%s] AFU[%d:%d] Enter polling work loop\n", __func__, ctx->card_no, ctx->cid_id); while (1) { /* * Using trylock in combination with testcancel to * avoid deadlock situations when competing for the * ctx->lock on shutdown ... */ /* while (pthread_mutex_trylock(&ctx->lock) != 0) pthread_testcancel(); */ pthread_testcancel(); tasks = 0; while (__ddcb_done_post(ctx, DDCB_OK)) tasks++; ctx->completed_ddcbs += tasks; if (tasks < NUM_DDCBS) ctx->completed_tasks[tasks]++; else ctx->completed_tasks[NUM_DDCBS]++; /* pthread_mutex_unlock(&ctx->lock); */ } VERBOSE1("[%s] AFU[%d:%d] Exit polling work loop\n", __func__, ctx->card_no, ctx->cid_id); return 0; } /** * Process DDCB queue results using completion processing with * interrupt. */ static int __ddcb_process_irqs(struct dev_ctx *ctx) { int rc; VERBOSE1("[%s] AFU[%d:%d] Enter interrupt work loop\n", __func__, ctx->card_no, ctx->cid_id); while (1) { fd_set set; struct timeval timeout; FD_ZERO(&set); FD_SET(ctx->afu_fd, &set); /* Set timeout to "tout" seconds */ timeout.tv_sec = 0; // ctx->tout; timeout.tv_usec = 100 * 1000; /* 100 msec */ rc = select(ctx->afu_fd + 1, &set, NULL, NULL, &timeout); if (0 == rc) { /* Timeout will Post error code only if context is active */ __ddcb_done_post(ctx, DDCB_ERR_IRQTIMEOUT); continue; } if ((rc == -1) && (errno == EINTR)) { VERBOSE0("WARNING: select returned -1 " "and errno was EINTR, retrying\n"); continue; } rt_trace(0x0010, 0, 0, 0); /* * FIXME I wonder if we must exit in this * case. select() returning a negative value is * clearly a critical issue. Only if errno == EINTR, * we should rety. * * At least we should wakeup potential DDCB execution * requestors, such that the error will be passed to * the layers above and the application can be stopped * if needed. */ if (rc < 0) { VERBOSE0("ERROR: waiting for interrupt! rc: %d\n", rc); afu_print_status(ctx->afu_h, libddcb_fd_out); while (__ddcb_done_post(ctx, DDCB_ERR_SELECTFAIL)) { /* empty */ } continue; } ctx->process_irqs++; /* Increment stat conuter */ rc = cxl_read_event(ctx->afu_h, &ctx->event); if (0 != rc) { VERBOSE0("\tERROR: cxl_read_event rc: %d errno: %d\n", rc, errno); continue; } VERBOSE2("\tcxl_read_event(...) = %d for context: %d " "type: %d size: %d\n", rc, ctx->cid_id, ctx->event.header.type, ctx->event.header.size); switch (ctx->event.header.type) { case CXL_EVENT_AFU_INTERRUPT: { unsigned int tasks = 0; /* Process all ddcb's */ VERBOSE2("\tCXL_EVENT_AFU_INTERRUPT: flags: 0x%x " "irq: 0x%x\n", ctx->event.irq.flags, ctx->event.irq.irq); while (__ddcb_done_post(ctx, DDCB_OK)) tasks++; ctx->completed_ddcbs += tasks; if (tasks < NUM_DDCBS) ctx->completed_tasks[tasks]++; else ctx->completed_tasks[NUM_DDCBS]++; break; } case CXL_EVENT_DATA_STORAGE: rt_trace(0xbbbb, ctx->ddcb_out, ctx->ddcb_in, NULL); VERBOSE0("\tCXL_EVENT_DATA_STORAGE: flags: 0x%x " "addr: 0x%016llx dsisr: 0x%016llx\n", ctx->event.fault.flags, (long long)ctx->event.fault.addr, (long long)ctx->event.fault.dsisr); afu_print_status(ctx->afu_h, libddcb_fd_out); afu_dump_queue(ctx); rt_trace_dump(); while (__ddcb_done_post(ctx, DDCB_ERR_EVENTFAIL)) { /* empty */ } break; case CXL_EVENT_AFU_ERROR: VERBOSE0("\tCXL_EVENT_AFU_ERROR: flags: 0x%x " "error: 0x%016llx\n", ctx->event.afu_error.flags, (long long)ctx->event.afu_error.error); afu_print_status(ctx->afu_h, libddcb_fd_out); while (__ddcb_done_post(ctx, DDCB_ERR_EVENTFAIL)) { /* empty */ } break; default: VERBOSE0("\tcxl_read_event() %d unknown header type\n", ctx->event.header.type); __ddcb_done_post(ctx, DDCB_ERR_EVENTFAIL); break; } } return 0; } /** * DDCB completion and timeout handling. This function implements the * thread which looks out for completed DDCBs. Due to a CAPI * restriction it also needs to open and close the AFU handle used to * communicate to the CAPI card. */ static void *__ddcb_done_thread(void *card_data) { int rc = 0, rc0; struct dev_ctx *ctx = (struct dev_ctx *)card_data; VERBOSE1("[%s] AFU[%d] Enter\n", __func__, ctx->card_no); rc = __afu_open(ctx); ctx->afu_rc = rc; /* Save rc */ rc0 = sem_post(&ctx->open_done_sem); /* Post card_dev_open() */ if (rc0 != 0) { VERBOSE0("[%s] AFU[%d] ERROR: %d %s\n", __func__, ctx->card_no, rc0, strerror(errno)); __afu_close(ctx, false); ctx->afu_rc = -1; return NULL; } if (DDCB_OK != rc) { /* Error Exit here in case open the AFU failed */ VERBOSE1("[%s] AFU[%d:%d] ERROR: %d Thread Exit\n", __func__, ctx->card_no, ctx->cid_id, rc); /* Join in card_dev_open() */ return NULL; } /* Push the Cleanup Handler to close the AFU */ pthread_cleanup_push(__ddcb_done_thread_cleanup, ctx); if (DDCB_MODE_MASTER & ctx->mode) { /* We do not have any code to execute when the master was oppend */ /* Master will be only used for peek and poke */ while (1) { sleep(1); } } if (DDCB_MODE_POLLING & ctx->mode) __ddcb_process_polling(ctx); else __ddcb_process_irqs(ctx); pthread_cleanup_pop(1); return NULL; } static const char *_card_strerror(void *card_data __attribute__((unused)), int card_rc __attribute__((unused))) { return NULL; } static uint64_t card_read_reg64(void *card_data, uint32_t offs, int *card_rc) { int rc = 0; uint64_t data = 0; struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; if (ttx && (ttx->verify == ttx)) { ctx = ttx->ctx; if (ctx->afu_h) { rc = cxl_mmio_read64(ctx->afu_h, offs, &data); if (card_rc) *card_rc = rc; return data; } } if (card_rc) *card_rc = DDCB_ERR_INVAL; return 0; } static uint32_t card_read_reg32(void *card_data __attribute__((unused)), uint32_t offs __attribute__((unused)), int *card_rc __attribute__((unused))) { int rc = 0; uint32_t data = 0; struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; if (ttx && (ttx->verify == ttx)) { ctx = ttx->ctx; if (ctx->afu_h) { rc = cxl_mmio_read32(ctx->afu_h, offs, &data); if (card_rc) *card_rc = rc; return data; } } if (card_rc) *card_rc = DDCB_ERR_INVAL; return 0; } static int card_write_reg64(void *card_data, uint32_t offs, uint64_t data) { struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; if (ttx && (ttx->verify == ttx)) { ctx = ttx->ctx; if (ctx->afu_h) return cxl_mmio_write64(ctx->afu_h, offs, data); } return DDCB_ERR_INVAL; } static int card_write_reg32(void *card_data, uint32_t offs, uint32_t data) { struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; if (ttx && (ttx->verify == ttx)) { ctx = ttx->ctx; if (ctx->afu_h) return cxl_mmio_write32(ctx->afu_h, offs, data); } return DDCB_ERR_INVAL; } /** * The CAPI card implementation is always matching the zEDCv2 * compressor implementation. It is complicated to return the right * version in case of multicard mode, since the DDCB execution is * altering through the cards. The right solution here is to enhance * the appl_id_mask, such that the version bits are considered and * only cards with the same id are being used. */ static uint64_t _card_get_app_id(void *card_data) { struct ttxs *ttx = (struct ttxs *)card_data; if (ttx == NULL) return DDCB_ERR_INVAL; return ttx->app_id; } /** * The Queue worktimer increments every 4 cycles. */ static uint64_t _card_get_queue_work_time(void *card_data) { int rc; struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; uint64_t data; if (ttx && (ttx->verify == ttx)) { ctx = ttx->ctx; if (!ctx) return 0; rc = cxl_mmio_read64(ctx->afu_h, MMIO_DDCBQ_WT_REG, &data); if (rc != 0) return 0; /* FIXME New versions do not need masking. */ return data & 0x00ffffffffffffffull; } return 0; } /** * Our CAPI version runs with 250 MHz. */ static uint64_t _card_get_frequency(void *card_data __attribute__((unused))) { /* FIXME Version register contains that info. */ return 250 * 1000000; } static void card_dump_hardware_version(void *card_data, FILE *fp) { struct ttxs *ttx = (struct ttxs*)card_data; struct dev_ctx *ctx; if (!(ttx && (ttx->verify == ttx))) return; ctx = ttx->ctx; if (!ctx) return; afu_print_status(ctx->afu_h, fp); } static int card_pin_memory(void *card_data __attribute__((unused)), const void *addr __attribute__((unused)), size_t size __attribute__((unused)), int dir __attribute__((unused))) { return DDCB_OK; } static int card_unpin_memory(void *card_data __attribute__((unused)), const void *addr __attribute__((unused)), size_t size __attribute__((unused))) { return DDCB_OK; } static void *card_malloc(void *card_data __attribute__((unused)), size_t size) { return memalign(sysconf(_SC_PAGESIZE), size); } static int card_free(void *card_data __attribute__((unused)), void *ptr, size_t size __attribute__((unused))) { if (ptr == NULL) return DDCB_OK; free(ptr); return DDCB_OK; } static void __dev_dump(struct dev_ctx *ctx, FILE *fp) { unsigned int i; bool work_done = false; if (fp == NULL) return; for (i = 0; i < NUM_DDCBS + 1; i++) { if (0 != ctx->completed_tasks[i]) { work_done = true; break; } } if (false == work_done) return; /* Exit if not used */ /* Keep this in a single print so we do not get mixed lines from other process */ fprintf(fp, " AFU[%d:%d] irqs: %d] Completed DDCBs: %lld\n" " Stats: %d(wait), %d(x1), %d(x2), %d(x3), %d(x4 an more)\n", ctx->card_no, ctx->cid_id, ctx->process_irqs, (long long)ctx->completed_ddcbs, (int)ctx->completed_tasks[0], (int)ctx->completed_tasks[1], (int)ctx->completed_tasks[2], (int)ctx->completed_tasks[3], (int)ctx->completed_tasks[4]); } static int _accel_dump_statistics(FILE *fp) { unsigned int card_no; for (card_no = 0; card_no < NUM_CARDS; card_no++) __dev_dump(&my_ctx[card_no], fp); return 0; } static struct ddcb_accel_funcs accel_funcs = { .card_type = DDCB_TYPE_CAPI, .card_name = "CAPI", /* functions */ .card_open = card_open, .card_close = card_close, .ddcb_execute = ddcb_execute, .card_strerror = _card_strerror, .card_read_reg64 = card_read_reg64, .card_read_reg32 = card_read_reg32, .card_write_reg64 = card_write_reg64, .card_write_reg32 = card_write_reg32, .card_get_app_id = _card_get_app_id, .card_get_queue_work_time = _card_get_queue_work_time, .card_get_frequency = _card_get_frequency, .card_dump_hardware_version = card_dump_hardware_version, .card_pin_memory = card_pin_memory, .card_unpin_memory = card_unpin_memory, .card_malloc = card_malloc, .card_free = card_free, /* statistics */ .dump_statistics = _accel_dump_statistics, .num_open = 0, .num_close = 0, .num_execute = 0, .time_open = 0, .time_execute = 0, .time_close = 0, .priv_data = NULL, }; static void capi_card_init(void) __attribute__((constructor)); static void capi_card_init(void) { int rc, tout = CONFIG_DDCB_TIMEOUT; unsigned int card_no; const char *ttt = getenv("DDCB_TIMEOUT"); rt_trace_init(); if (ttt) tout = strtoul(ttt, (char **) NULL, 0); for (card_no = 0; card_no < NUM_CARDS; card_no++) { struct dev_ctx *ctx = &my_ctx[card_no]; ctx->card_no = card_no; ctx->app_id = 0x0; ctx->afu_h = NULL; ctx->ddcb_done_tid = 0; ctx->ddcb = my_ddcbs[card_no]; ctx->ddcb_num = NUM_DDCBS; ctx->tout = tout; rc = pthread_mutex_init(&ctx->lock, NULL); if (0 != rc) { VERBOSE0("ERROR: initializing mutex failed!\n"); return; } } ddcb_register_accelerator(&accel_funcs); } static void capi_card_exit(void) __attribute__((destructor)); static void capi_card_exit(void) { unsigned int card_no; for (card_no = 0; card_no < NUM_CARDS; card_no++) { struct dev_ctx *ctx = &my_ctx[card_no]; card_dev_close(ctx); } } genwqe-user-4.0.18/lib/ddcb_card.c000066400000000000000000000122171303345043000166430ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Specialized DDCB execution implementation. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* outside interface */ #include /* internal implementation */ static void *card_open(int card_no, unsigned int mode, int *card_rc, uint64_t appl_id, uint64_t appl_id_mask) { return genwqe_card_open(card_no, mode, card_rc, appl_id, appl_id_mask); } static int card_close(void *card_data) { return genwqe_card_close(card_data); } static int ddcb_execute(void *card_data, struct ddcb_cmd *req) { return genwqe_card_execute_ddcb(card_data, (struct genwqe_ddcb_cmd *)req); } static const char *_card_strerror(void *card_data __attribute__((unused)), int card_rc) { return card_strerror(card_rc); } static uint64_t card_read_reg64(void *card_data, uint32_t offs, int *card_rc) { return genwqe_card_read_reg64(card_data, offs, card_rc); } static uint32_t card_read_reg32(void *card_data, uint32_t offs, int *card_rc) { return genwqe_card_read_reg32(card_data, offs, card_rc); } static int card_write_reg64(void *card_data, uint32_t offs, uint64_t val) { return genwqe_card_write_reg64(card_data, offs, val); } static int card_write_reg32(void *card_data, uint32_t offs, uint32_t val) { return genwqe_card_write_reg32(card_data, offs, val); } static uint64_t _card_get_app_id(void *card_data) { return card_get_app_id(card_data); } static uint64_t _card_get_frequency(void *card_data) { uint16_t speed; /* MHz MHz MHz MHz */ static const int speed_grade[] = { 250, 200, 166, 175 }; uint64_t slu_unitcfg; slu_unitcfg = card_read_reg64(card_data, IO_SLU_UNITCFG, NULL); speed = (uint16_t)((slu_unitcfg >> 28) & 0x0full); if (speed >= ARRAY_SIZE(speed_grade)) return 0; /* illegal value */ return speed_grade[speed] * 1000000; /* in Hz */ } static void card_dump_hardware_version(void *card_data, FILE *fp) { uint64_t slu_unitcfg; uint64_t app_unitcfg; slu_unitcfg = card_read_reg64(card_data, IO_SLU_UNITCFG, NULL); app_unitcfg = card_read_reg64(card_data, IO_APP_UNITCFG, NULL); fprintf(fp, " Version Reg: 0x%016llx\n" " Appl. Reg: 0x%016llx\n", (long long)slu_unitcfg, (long long)app_unitcfg); } /** * Special formular is required to get the right time for our GenWQE * implementation. */ static uint64_t _card_get_queue_work_time(void *card_data) { uint64_t queue_wtime; queue_wtime = card_read_reg64(card_data, IO_SLC_QUEUE_WTIME, NULL); return queue_wtime * 8; } static int card_pin_memory(void *card_data, const void *addr, size_t size, int dir) { return genwqe_pin_memory(card_data, addr, size, dir); } static int card_unpin_memory(void *card_data, const void *addr, size_t size) { return genwqe_unpin_memory(card_data, addr, size); } static void *card_malloc(void *card_data, size_t size) { return genwqe_card_malloc(card_data, size); } static int card_free(void *card_data, void *ptr, size_t size) { return genwqe_card_free(card_data, ptr, size); } static int _card_dump_statistics(FILE *fp) { return genwqe_dump_statistics(fp); } static struct ddcb_accel_funcs accel_funcs = { .card_type = DDCB_TYPE_GENWQE, .card_name = "GENWQE", /* functions */ .card_open = card_open, .card_close = card_close, .ddcb_execute = ddcb_execute, .card_strerror = _card_strerror, .card_read_reg64 = card_read_reg64, .card_read_reg32 = card_read_reg32, .card_write_reg64 = card_write_reg64, .card_write_reg32 = card_write_reg32, .card_get_app_id = _card_get_app_id, .card_get_queue_work_time = _card_get_queue_work_time, .card_get_frequency = _card_get_frequency, .card_dump_hardware_version = card_dump_hardware_version, .card_pin_memory = card_pin_memory, .card_unpin_memory = card_unpin_memory, .card_malloc = card_malloc, .card_free = card_free, /* statistics */ .dump_statistics = _card_dump_statistics, .num_open = 0, .num_close = 0, .num_execute = 0, .time_open = 0, .time_execute = 0, .time_close = 0, .priv_data = NULL, }; static void genwqe_card_init(void) __attribute__((constructor)); /* constructor */ static void genwqe_card_init(void) { ddcb_register_accelerator(&accel_funcs); } genwqe-user-4.0.18/lib/deflate.c000066400000000000000000000555331303345043000163720ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @brief This part of the libzedc library is responsible to perform * compression (deflate) of the compressed data. The library supports * the data formats described in RFC1950, RFC1951, and RFC1952. * * IBM Accelerator Family 'GenWQE'/zEDC */ /**************************************************************************** * Compression (Deflate) ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hw_defs.h" static inline int output_data_avail(struct zedc_stream_s *strm) { return strm->avail_out; } static inline int input_data_avail(struct zedc_stream_s *strm) { return strm->avail_in != 0; } static inline int push_output_byte(struct zedc_stream_s *strm, uint8_t data) { if (strm->avail_out == 0) return 0; /* no byte written */ *strm->next_out = data; strm->total_out++; strm->next_out++; strm->avail_out--; return 1; /* one byte written */ } /** * @brief Prepare and insert format specific header bytes * (RFC1950 / RFC1952) * @param strm userspace job context */ static int deflate_add_header(struct zedc_stream_s *strm) { struct zedc_fifo *f = &strm->out_fifo; switch (strm->format) { case ZEDC_FORMAT_DEFL: return 0; /* no extra header for DEFLATE */ case ZEDC_FORMAT_GZIP: { uint8_t flg = 0x00, os = 0xff; uint8_t xfl = 0x04; /* XFL 4: fastest algorithm */ unsigned int i, name_len = 0, c_len = 0, e_len = 0; uint32_t mt = (uint32_t)time(NULL); /* Note: ZEDC_FIFO_SIZE = 256 */ fifo_push(f, 0x1f); /* ID1 */ fifo_push(f, 0x8b); /* ID2 */ fifo_push(f, 0x08); /* CM */ struct gzedc_header_s *gz_h = strm->gzip_head; if (gz_h) { if (gz_h->name) name_len = strlen(gz_h->name); if (name_len) flg |= FNAME; if (gz_h->comment) c_len = strlen(gz_h->comment); if (c_len) flg |= FCOMMENT; if (gz_h->extra) { e_len = gz_h->extra_len; flg |= FEXTRA; } os = gz_h->os; mt = gz_h->time; //xfl = gz_h->xflags; /* Check if we are not going to overflow the FIFO */ if ((name_len + c_len + e_len) > 240) return 1; if (gz_h->xflags & 0x01) flg |= FTEXT; if (gz_h->xflags & 0x02) flg |= FHCRC; } fifo_push(f, flg); /* FLG */ fifo_push32(f, __cpu_to_le32(mt)); /* MT */ fifo_push(f, xfl); fifo_push(f, os); /* OS */ if (flg & FEXTRA) { fifo_push(f, e_len & 0xff); fifo_push(f, (e_len >> 8) & 0xff); for (i = 0; i < e_len; i++) fifo_push(f, gz_h->extra[i]); } if (flg & FNAME) for (i = 0; i <= name_len; i++) fifo_push(f, strm->gzip_head->name[i]); if (flg & FCOMMENT) for (i = 0; i <= c_len; i++) fifo_push(f, strm->gzip_head->comment[i]); if (flg & FHCRC) { if (gz_h) { /* insert some dummy CRC for now , add code later */ fifo_push(f, 0xde); fifo_push(f, 0xef); } } break; } case ZEDC_FORMAT_ZLIB: { /* * A zlib stream has the following structure: * * 0 1 * +---+---+ * |CMF|FLG| (more-->) * +---+---+ * * (if FLG.FDICT set) * * 0 1 2 3 * +---+---+---+---+ * | DICTID | (more-->) * +---+---+---+---+ * * +=====================+---+---+---+---+ * |...compressed data...| ADLER32 | * +=====================+---+---+---+---+ */ if (strm->havedict) { fifo_push(f, 0x78); /* CMF */ fifo_push(f, 0xbb); /* FLG with FDICT set */ fifo_push32(f, __cpu_to_be32(strm->dict_adler32)); } else { fifo_push(f, 0x78); /* CMF */ fifo_push(f, 0x9c); /* FLG */ } break; } } strm->header_added = 1; return 0; } /** * Write to the output stream. */ static void deflate_write_out_fifo(struct zedc_stream_s *strm) { uint8_t data; struct zedc_fifo *f = &strm->out_fifo; while (output_data_avail(strm) && fifo_pop(f, &data) == 1) { push_output_byte(strm, data); } return; } static void __deflateInit_state(zedc_streamp strm) { /* zedc_handle_t zedc = (zedc_handle_t)strm->device; */ fifo_init(&strm->out_fifo); fifo_init(&strm->in_fifo); strm->total_in = strm->total_out = 0; /* initialize workspace */ strm->wsp_page = 0; /* reset toggle input / output area */ strm->dict_len = 0; /* ensure empty dictionary */ strm->obytes_in_dict = 0; strm->out_dict_offs = 0; /* initialize Save & Restore */ strm->obyte = HDR_BTYPE_FIXED; /* deflate header */ strm->onumbits = 3; /* deflate header = 3 bits */ strm->crc32 = 0; strm->adler32 = 1; strm->dict_adler32 = 0; strm->header_added = 0; /* status flags */ strm->eob_added = 0; strm->trailer_added = 0; strm->havedict = 0; strm->in_hdr_scratch_len = 0; strm->in_hdr_bits = 0; strm->hdr_ib = 0; strm->scratch_ib = 0; strm->inp_processed = 0; strm->outp_returned = 0; strm->proc_bits = 0; strm->infl_stat = 0; strm->hdr_start = 0; strm->out_hdr_bits = 0; strm->out_hdr_start_bits = 0; strm->copyblock_len = 0; strm->tree_bits = 0; strm->pad_bits = 0; strm->inp_data_offs = 0; strm->in_data_used = 0; } /** * @brief initialize subsequent zedc_deflate() calls * @param strm common zedc parameter set * @param level compression level */ int zedc_deflateInit2(zedc_streamp strm, int level, int method, int windowBits, int memLevel, int strategy) { int rc; zedc_handle_t zedc; if (!strm) return ZEDC_STREAM_ERROR; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; if (!is_zedc(zedc)) return ZEDC_ERR_ILLEGAL_APPID; rc = zedc_alloc_workspace(strm); if (rc != ZEDC_OK) return rc; strm->windowBits = windowBits; strm->level = level; strm->method = method; strm->memLevel = memLevel; strm->strategy = strategy; __deflateInit_state(strm); rc = zedc_format_init(strm); if (rc != ZEDC_OK) { /* presets for DEFLATE, GZIP, ZLIB */ zedc_free_workspace(strm); return rc; } return ZEDC_OK; } int zedc_deflateSetDictionary(zedc_streamp strm, const uint8_t *dictionary, unsigned int dictLength) { if (!strm) return ZEDC_STREAM_ERROR; /* We cannot set the dictionary after we have already written the zlib header! */ if (strm->header_added == 1) return ZEDC_STREAM_ERROR; if (dictLength > ZEDC_DICT_LEN) return ZEDC_STREAM_ERROR; memcpy(&strm->wsp->dict[0], dictionary, dictLength); strm->dict_len = dictLength; strm->dict_adler32 = __adler32(1, dictionary, dictLength); strm->havedict = 1; return ZEDC_OK; } int zedc_deflateCopy(zedc_streamp dest, zedc_streamp source) { int rc; memcpy(dest, source, sizeof(*dest)); rc = zedc_alloc_workspace(dest); if (rc != ZEDC_OK) return rc; /* Try only to copy what is really needed ... */ unsigned int p = dest->wsp_page; memcpy(dest->wsp->tree, source->wsp->tree, sizeof(*dest->wsp->tree)); memcpy(dest->wsp->dict[p], source->wsp->dict[p], sizeof(dest->wsp->dict[p])); return ZEDC_OK; } int zedc_deflateReset(zedc_streamp strm) { int rc; if (!strm) return ZEDC_STREAM_ERROR; __deflateInit_state(strm); rc = zedc_format_init(strm); if (rc != ZEDC_OK) /* presets for DEFLATE, GZIP, ZLIB */ return rc; return ZEDC_OK; } /** * @brief add trailer for gzip coding (RFC1952) or * add trailer for zlib coding (RFC1950) * depending on 'windowBits' * @param strm userspace job context */ static int deflate_add_trailer(struct zedc_stream_s *strm) { struct zedc_fifo *f = &strm->out_fifo; if (!strm->eob_added) return 0; /* EOB must be written first */ if (strm->trailer_added) return 0; /* Don't add it multiple times */ switch (strm->format) { case ZEDC_FORMAT_DEFL: break; /* no extra trailer for DEFLATE */ case ZEDC_FORMAT_GZIP: /* prepare GZIP trailer */ fifo_push32(f, __cpu_to_le32(strm->crc32)); fifo_push32(f, __cpu_to_le32(strm->total_in)); break; case ZEDC_FORMAT_ZLIB: /* prepare ZLIB trailer */ fifo_push32(f, __cpu_to_be32(strm->adler32)); break; } strm->trailer_added = 1; return 1; } /* bitmask to isolate valid bits from deflate */ static const uint8_t bmsk[8] = { 0xff, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f }; /** * We are at the end of compression (no input data available) An extra * zero byte must be appended as end-of-block marker if this was the * last block in the compressed stream. * * (RFC1951 End-Of-Block Marker = %000_0000). * * To sync up the stream at the end we like to write this pattern: * [F_EOB, F_HDR(BFINAL), F_EOB] = 0000_000.0_11.00_0000_0. * = { 0x00, 0x03, 0x00 } * = 7 + 3 + 7 = 17 bits */ static int deflate_write_eob(struct zedc_stream_s *strm) { struct zedc_fifo *f = &strm->out_fifo; /* const uint8_t eob_sync[3] = { 0x80, 0x01, 0x00 }; urg, bit-order */ /* Avoid adding EOBs multiple times */ if (strm->eob_added == 1) return 0; /* If we have remaining single bits, we cannot add the EOB yet */ if (strm->onumbits >= 8) return 0; if (strm->onumbits == 0) { fifo_push(f, 0x80); /*0b10000000 */ fifo_push(f, 0x01); /*0b00000001 */ fifo_push(f, 0x00); /*0b00000000 */ } else { fifo_push(f, strm->obyte & bmsk[strm->onumbits]); fifo_push(f, 0x03 << (strm->onumbits - 1)); /*0b00000011 ... */ fifo_push(f, 0x00); /*0b00000000 */ } strm->onumbits = 0; strm->eob_added = 1; return 1; /* EOB stored to FIFO */ } /* * Add sync flush for rfc1951: * 7 bits for End of Block * 1 bit for BFINAL * 2 bits for End of Fix Hufman block * 16 Bits with 0....0 for Length * 16 Bits with 1....1 for (not)Length */ static void deflate_sync_flush(struct zedc_stream_s *strm) { struct zedc_fifo *f = &strm->out_fifo; uint8_t data = 0; if (strm->onumbits == 0) { fifo_push(f, 0); fifo_push(f, 0); } else { data = strm->obyte & bmsk[strm->onumbits]; fifo_push(f, data); fifo_push(f,0); if (strm->onumbits > 6) /* if data is more than 6 bits */ fifo_push(f, 0); /* add 1 or 2 more in the next byte */ strm->onumbits = 0; } fifo_push(f, 0); /* Add Len */ fifo_push(f, 0); /* Add Len */ fifo_push(f, 0xff); /* Add n_Len */ fifo_push(f, 0xff); /* Add n_Len */ strm->obyte = HDR_BTYPE_FIXED; /* deflate header */ strm->onumbits = 3; /* deflate header = 3 bits */ return; } /** * @brief Post-process for deflate (RFC 1951) * - store remaining data if output buffer is full * - mask valid bits of last byte * @param strm userspace job context * @param asv pointer to ASV area of processed DDCB * @return 0 if successful * < 0 if failed */ static int deflate_process_results(struct zedc_stream_s *strm, struct zedc_asv_defl *asv) { unsigned int len, i; zedc_handle_t zedc = (zedc_handle_t)strm->device; struct zedc_fifo *f = &strm->out_fifo; len = strm->inp_processed = __be32_to_cpu(asv->inp_processed); strm->outp_returned = __be32_to_cpu(asv->outp_returned); /* sum of uncompressed bytes used for RFC 1952) */ if (len > strm->avail_in) { pr_err("inp_processed=%d avail_in=%d invalid: " " retc=%x attn=%x progress=%x\n", strm->inp_processed, strm->avail_in, strm->retc, strm->attn, strm->progress); /* Now become really verbose ... Let's see what happens. */ zedc_asiv_defl_print(strm, 1); zedc_asv_defl_print(strm, 1); zedc->zedc_rc = ZEDC_ERR_RETLEN; return zedc->zedc_rc; } strm->avail_in -= len; strm->next_in += len; strm->total_in += len; /* get length of output data */ len = strm->outp_returned; /* Sanity check */ if ((len == 0) || (len > strm->avail_out)) { pr_err("outp_returned=%u inp_processed=%d " "avail_in=%d avail_out=%d invalid: " " retc=%x attn=%x progress=%x\n", strm->outp_returned, strm->inp_processed, strm->avail_in, strm->avail_out, strm->retc, strm->attn, strm->progress); /* Now become really verbose ... Let's see what happens. */ zedc_asiv_defl_print(strm, 1); zedc_asv_defl_print(strm, 1); zedc->zedc_rc = ZEDC_ERR_RETLEN; return zedc->zedc_rc; } /* Check if onumbits are valid for new or for old hardware */ if (dyn_huffman_supported(zedc)) { if (asv->onumbits > (ZEDC_ONUMBYTES_v1 + ZEDC_ONUMBYTES_EXTRA) * 8) { pr_err("onumbits %d too large (O)\n", asv->onumbits); zedc->zedc_rc = ZEDC_ERR_RETOBITS; return zedc->zedc_rc; } } else { if (asv->onumbits > ZEDC_ONUMBYTES_v0 * 8) { pr_err("onumbits %d too large (N)\n", asv->onumbits); zedc->zedc_rc = ZEDC_ERR_RETOBITS; return zedc->zedc_rc; } } strm->next_out += len; strm->avail_out -= len; strm->total_out += len; /* * Store onumbits for next DDCB. * * if ONUMBITS == 0: * - Output buffer contains all bits on a byte boundary. * if ONUMBITS == 1...7: * - there are partial bits which must be appended in the * output buffer * if ONUMBITS > 7: * - there are bytes provided in OBYTES/OBYTES_EXTRA which * could not be stored due to a completely filled output * buffer. This must be done in a subsequent cycle after * emptied the output buffer. */ /* Sanity check: Hardware put not all required bits into output buf */ if ((strm->avail_out != 0) && (asv->onumbits > 7)) { pr_err("** err: unstored data bytes **\n"); zedc->zedc_rc = ZEDC_ERR_RETOBITS; return zedc->zedc_rc; } /* Push remaining bytes into output FIFO */ if (dyn_huffman_supported(zedc)) { /* * For the new format we can get more bytes than * originally expected. In the v1 buffer there is one * more byter and there is one byte in the middle of * the DDCB data, which has a different meaning * (out_dict_offs). We need to jump over it. */ for (i = 0, strm->onumbits = asv->onumbits; (strm->onumbits > 7) && (i < ZEDC_ONUMBYTES_v1); i++, strm->onumbits -= 8) { fifo_push(f, asv->obits[i]); } if ((strm->onumbits) && (i < ZEDC_ONUMBYTES_v1)) { strm->obyte = asv->obits[i]; return 0; } for (i = 0; (strm->onumbits > 7) && (i < ZEDC_ONUMBYTES_EXTRA); i++, strm->onumbits -= 8) { fifo_push(f, asv->obits_extra[i]); } if ((strm->onumbits) && (i < ZEDC_ONUMBYTES_EXTRA)) { strm->obyte = asv->obits_extra[i]; return 0; } } else { for (i = 0, strm->onumbits = asv->onumbits; (strm->onumbits > 7) && (i < ZEDC_ONUMBYTES_v0); i++, strm->onumbits -= 8) { fifo_push(f, asv->obits[i]); } /* copy the incomplete remaining byte */ if (strm->onumbits) strm->obyte = asv->obits[i]; } return 0; } /** * @brief do deflate (compress) * @param strm common zedc parameter set * @param flush flag if pending output data should be written */ int zedc_deflate(zedc_streamp strm, int flush) { int rc, p; struct zedc_asiv_defl *asiv; struct zedc_asv_defl *asv; zedc_handle_t zedc; struct ddcb_cmd *cmd; struct zedc_fifo *f; unsigned int i, tries = 1; uint64_t out_dict = 0x0; uint32_t out_dict_len = 0x0; if (!strm) return ZEDC_STREAM_ERROR; f = &strm->out_fifo; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; strm->flush = flush; cmd = &strm->cmd; ddcb_cmd_init(cmd); /* Add ZLIB/GZIP prefix if needed */ if (0 == strm->header_added) { if (deflate_add_header(strm)) return ZEDC_STREAM_ERROR; } /* Ensure that output FIFO gets written first */ deflate_write_out_fifo(strm); if (!output_data_avail(strm)) return ZEDC_OK; /* Instructed to finish and no input data: write EOB and trailer */ if ((strm->flush == ZEDC_FINISH) && !input_data_avail(strm)) { deflate_write_eob(strm); deflate_add_trailer(strm); deflate_write_out_fifo(strm); } /* End-Of-Block added, and written out */ if ((strm->eob_added) && (strm->trailer_added) && fifo_empty(f)) return ZEDC_STREAM_END; /* done */ /* Don't ask hardware if we have no output space */ if (!output_data_avail(strm)) return ZEDC_OK; /* Don't ask hardware if we have nothing to process */ if (!input_data_avail(strm)) return ZEDC_OK; /* Prepare Deflate DDCB */ cmd->cmd = ZEDC_CMD_DEFLATE; cmd->acfunc = DDCB_ACFUNC_APP; cmd->cmdopts = DDCB_OPT_DEFL_SAVE_DICT; /* SAVE_DICT */ if (strm->flags & ZEDC_FLG_CROSS_CHECK) cmd->cmdopts |= DDCB_OPT_DEFL_RAS_CHECK;/* RAS */ /* Set DYNAMIC_HUFFMAN */ if (dyn_huffman_supported(zedc) && (strm->strategy != ZEDC_FIXED)) cmd->cmdopts |= DDCB_OPT_DEFL_IBUF_INDIR; cmd->asiv_length = 0x70 - 0x18; /* range for crc protection */ cmd->asv_length = 0xc0 - 0x80; cmd->ats = 0; /* input buffer */ if ((strm->dma_type[ZEDC_IN] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_defl, in_buff, ATS_TYPE_FLAT_RD); else cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_defl, in_buff, ATS_TYPE_SGL_RD); /* output buffer */ if ((strm->dma_type[ZEDC_OUT] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_defl, out_buff, ATS_TYPE_FLAT_RDWR); else cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_defl, out_buff, ATS_TYPE_SGL_RDWR); /* workspace */ if ((strm->dma_type[ZEDC_WS] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) { cmd->ats |= (ATS_SET_FLAGS(struct zedc_asiv_defl, in_dict, ATS_TYPE_FLAT_RD) | ATS_SET_FLAGS(struct zedc_asiv_defl, out_dict, ATS_TYPE_FLAT_RDWR)); } else { cmd->ats |= (ATS_SET_FLAGS(struct zedc_asiv_defl, in_dict, ATS_TYPE_SGL_RD) | ATS_SET_FLAGS(struct zedc_asiv_defl, out_dict, ATS_TYPE_SGL_RDWR)); } /* Setup ASIV part (provided in big endian byteorder) */ asiv = (struct zedc_asiv_defl *)&cmd->asiv; asv = (struct zedc_asv_defl *)&cmd->asv; asiv->in_buff = __cpu_to_be64((unsigned long)strm->next_in); asiv->in_buff_len = __cpu_to_be32(strm->avail_in); asiv->out_buff = __cpu_to_be64((unsigned long)strm->next_out); asiv->out_buff_len = __cpu_to_be32(strm->avail_out); /* Toggle workspace page (in <-> out) */ p = strm->wsp_page; asiv->in_dict = __cpu_to_be64((unsigned long)strm->wsp->dict[p] + strm->out_dict_offs); asiv->out_dict = __cpu_to_be64((unsigned long)strm->wsp->dict[p ^ 1]); strm->wsp_page ^= 1; asiv->in_dict_len = __cpu_to_be32(strm->dict_len); asiv->out_dict_len = __cpu_to_be32(ZEDC_DICT_LEN); asiv->ibits[0] = strm->obyte; asiv->inumbits = strm->onumbits; asiv->in_crc32 = __cpu_to_be32(strm->crc32); asiv->in_adler32 = __cpu_to_be32(strm->adler32); /* * Optimization attempt: If we are called with Z_FINISH, and * we assume that the data will fit into the provided output * buffer, we try to run the hardware without dictionary save * function. If we do not see all data absorbed and all * available output written, we need to restart with * dictionary save option. * * The desire is to keep small transfers efficient. It will * not have significant effect if we deal with huge data * streams. */ cmd->cmdopts |= DDCB_OPT_DEFL_SAVE_DICT; tries = 1; if ((strm->flags & ZEDC_FLG_SKIP_LAST_DICT) && (((flush == ZEDC_FINISH) || (flush == ZEDC_FULL_FLUSH)) && (strm->avail_out >= strm->avail_in))) { out_dict = asiv->out_dict; out_dict_len = asiv->out_dict_len; cmd->cmdopts &= ~DDCB_OPT_DEFL_SAVE_DICT; asiv->out_dict = 0x0; asiv->out_dict_len = 0x0; tries = 2; } for (i = 0; i < tries; i++) { zedc_asiv_defl_print(strm, zedc_dbg); rc = zedc_execute_request(zedc, cmd); zedc_asv_defl_print(strm, zedc_dbg); strm->retc = cmd->retc; strm->attn = cmd->attn; strm->progress = cmd->progress; /* Check for unexecuted DDCBs too, where RETC is 0x000. */ if ((rc < 0) || (cmd->retc == 0x000)) { struct ddcb_cmd *cmd = &strm->cmd; pr_err("deflate failed rc=%d card_rc=%d\n" " DDCB returned " "(RETC=%03x ATTN=%04x PROGR=%x) %s\n", rc, zedc->card_rc, cmd->retc, cmd->attn, cmd->progress, cmd->retc == 0x102 ? "" : "ERR"); return ZEDC_STREAM_ERROR; } /* Great, all data absorbed and all data fitted into output */ if ((strm->avail_in == __be32_to_cpu(asv->inp_processed)) && (strm->avail_out >= __be32_to_cpu(asv->outp_returned))) break; /* What a pity, need to repeat to get back dictionary */ if ((strm->flags & ZEDC_FLG_SKIP_LAST_DICT) && (((flush == ZEDC_FINISH) || (flush == ZEDC_FULL_FLUSH)) && (strm->avail_out >= strm->avail_in))) { cmd->cmdopts |= DDCB_OPT_DEFL_SAVE_DICT; asiv->out_dict = out_dict; asiv->out_dict_len = out_dict_len; pr_warn("[%s] What a pity, optimization did " "not work\n" " (RETC=%03x ATTN=%04x PROGR=%x)\n", __func__, cmd->retc, cmd->attn, cmd->progress); } } /* Analyze ASV part (provided in big endian byteorder!) */ strm->crc32 = __be32_to_cpu(asv->out_crc32); strm->adler32 = __be32_to_cpu(asv->out_adler32); strm->dict_len = __be16_to_cpu(asv->out_dict_used); strm->out_dict_offs = asv->out_dict_offs; if (strm->out_dict_offs >= 16) { pr_err("DICT_OFFSET too large (%u)\n", strm->out_dict_offs); return ZEDC_STREAM_ERROR; } /* Post-processing of DDCB status */ rc = deflate_process_results(strm, asv); if (rc < 0) return ZEDC_STREAM_ERROR; /* Instructed to finish and no input data, write EOB and trailer */ if ((strm->flush == ZEDC_FINISH) && !input_data_avail(strm)) { deflate_write_eob(strm); /* Add EOB */ deflate_add_trailer(strm); /* ZLIB/GZIP postfix */ deflate_write_out_fifo(strm); } /* Handle ZEDC_SYNC_FLUSH + ZEDC_PARTIAL_FLUSH the same way Testcase CDHF_03 */ if ((strm->flush == ZEDC_SYNC_FLUSH) || (strm->flush == ZEDC_PARTIAL_FLUSH)) { deflate_sync_flush(strm); deflate_write_out_fifo(strm); } /* FIX for HW290108 Testcase CDHF_06 */ if (strm->flush == ZEDC_FULL_FLUSH) { deflate_sync_flush(strm); deflate_write_out_fifo(strm); strm->dict_len = 0; } /* End-Of-Block added, and written out */ if ((strm->eob_added) && (strm->trailer_added) && fifo_empty(f)) return ZEDC_STREAM_END; /* done */ return ZEDC_OK; } /** * @brief end deflate (compress) * @param strm common zedc parameter set */ int zedc_deflateEnd(zedc_streamp strm) { zedc_handle_t zedc; struct zedc_fifo *f; if (!strm) return ZEDC_STREAM_ERROR; f = &strm->out_fifo; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; while (!fifo_empty(f)) { uint8_t data; fifo_pop(f, &data); pr_err("FIFO not empty: %02x\n", data); } zedc_free_workspace(strm); return ZEDC_OK; } int zedc_deflateSetHeader(zedc_streamp strm, gzedc_headerp head) { strm->gzip_head = head; return ZEDC_OK; } genwqe-user-4.0.18/lib/hardware.c000066400000000000000000001300511303345043000165500ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include /** * Hardware zlib implementation. This code is using the libzHW library * to do hardware supported inflate and deflate. To overcome * performance degragation by using small buffers the deflate * functionality is using sufficiently large buffers for input and * output. */ #undef CONFIG_DEBUG #undef CONFIG_USE_PINNING /* FIXME Driver has problems with get_user_pages_fast not pinning all requested pages. Need to work on a fix for that before we can enable this. */ /* * BUF_SIZE of 0 is used to avoid buffering. Env-variables can * overwrite those defaults. */ #define CONFIG_INFLATE_BUF_SIZE (128 * 1024) #define CONFIG_DEFLATE_BUF_SIZE (768 * 1024) /* FIXME Ensure values are really the same for newer/older zlib versions */ #define rc_zedc_to_libz(x) ((x)) #define rc_libz_to_zedc(x) ((x)) struct hw_state { int card_no; int card_type; unsigned int mode; zedc_stream h; /* hardware compression context */ int rc; /* hardware return code e.g. Z_STREAM_END */ unsigned int page_size; /* buffering for the moment only for compression */ size_t ibuf_total; /* total_size of ibuf_base */ size_t ibuf_avail; /* available bytes in ibuf */ uint8_t *ibuf_base; /* buffer for input data */ uint8_t *ibuf; /* current position in ibuf to put data */ size_t obuf_total; /* total_size of obuf_base */ size_t obuf_avail; /* available bytes in obuf */ uint8_t *obuf_base; /* buffer for output data */ uint8_t *obuf; /* current position in obuf to put data */ uint8_t *obuf_next; /* next position to read data */ unsigned int inflate_req; /* # of inflates */ unsigned int deflate_req; /* # of deflates */ }; /** * @return True if output buffer is empty, else False. */ static int output_buffer_empty(struct hw_state *s) { return s->obuf_avail == s->obuf_total; } /** * @return Remaining bytes in obuf. */ static int output_buffer_bytes(struct hw_state *s) { return s->obuf - s->obuf_next; } #define ZEDC_VERBOSE_LIBCARD_MASK 0x0000ff00 /* debug flags for libcard */ #define ZEDC_VERBOSE_LIBZEDC_MASK 0x000000ff /* debug flags for libzedc */ #define ZEDC_VERBOSE_DDCB 0x00010000 /* dump DDCBs if requested */ static int zedc_verbose = 0x00000000; /* verbosity flag */ static int zlib_xcheck = 1; static unsigned int zlib_ibuf_total = CONFIG_DEFLATE_BUF_SIZE; static unsigned int zlib_obuf_total = CONFIG_INFLATE_BUF_SIZE; /* Try to cache filehandles for faster access. Do not close them. */ static zedc_handle_t zedc_cards[128 + 1]; static zedc_handle_t __zedc_open(int card_no, int card_type, int mode, int *err_code) { int flags = (zlib_inflate_flags | zlib_deflate_flags); if ((flags & ZLIB_FLAG_CACHE_HANDLES) == 0x0) return zedc_open(card_no, card_type, mode, err_code); if (card_no == -1) { if (zedc_cards[128]) return zedc_cards[128]; zedc_cards[128] = zedc_open(card_no, card_type, mode, err_code); return zedc_cards[128]; } if (card_no < 0 || card_no >= 128) return NULL; if (zedc_cards[card_no] != NULL) { return zedc_cards[card_no]; } zedc_cards[card_no] = zedc_open(card_no, card_type, mode, err_code); return zedc_cards[card_no]; } static int __zedc_close(zedc_handle_t zedc __unused) { int flags = (zlib_inflate_flags | zlib_deflate_flags); if ((flags & ZLIB_FLAG_CACHE_HANDLES) == 0x0) return zedc_close(zedc); /* Ignore close in cached fd mode ... */ return ZEDC_OK; } static void stream_zedc_to_zlib(z_streamp s, zedc_streamp h) { s->next_in = (uint8_t *)h->next_in; /* next input byte */ s->avail_in = h->avail_in; /* number of bytes available at next_in */ s->total_in = h->total_in; /* total nb of input bytes read so far */ s->next_out = h->next_out; /* next output byte should be put there */ s->avail_out = h->avail_out; /* remaining free space at next_out */ s->total_out = h->total_out; /* total nb of bytes output so far */ } static void stream_zlib_to_zedc(zedc_streamp h, z_streamp s) { h->next_in = s->next_in; /* next input byte */ h->avail_in = s->avail_in; /* number of bytes available at next_in */ h->total_in = s->total_in; /* total nb of input bytes read so far */ h->next_out = s->next_out; /* next output byte should be put there */ h->avail_out = s->avail_out; /* remaining free space at next_out */ h->total_out = s->total_out; /* total nb of bytes output so far */ } /** * Take care CRC/ADLER is correctly reported to the upper levels. */ static void __fixup_crc_or_adler( z_streamp s, zedc_streamp h) { s->adler = (h->format == ZEDC_FORMAT_GZIP) ? h->crc32 : h->adler32; } static void __free(void *ptr) { if (ptr == NULL) return; free(ptr); } /** * Theoretical maximum size of the data is worst case of 9/8 * of the input buffer. We add one page more because our * hardware encoder is sometimes storing some left-over bytes. * * zLib documentation: "The worst case choice of * parameters can result in an expansion of at most * 13.5%, plus eleven bytes." * * zEDC was better here than zEDCv2. zEDCv2 requires * us to increase the factor to 15/8, which wastes * some memory in most cases. What a pity. */ uLong h_deflateBound(z_streamp strm __attribute__((unused)), uLong sourceLen) { unsigned int page_size = sysconf(_SC_PAGESIZE); return sourceLen * 15/8 + page_size; } int h_deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version __unused, int stream_size __unused) { int rc, err_code = 0; struct hw_state *s; zedc_handle_t zedc; unsigned int page_size = sysconf(_SC_PAGESIZE); strm->total_in = 0; strm->total_out = 0; s = calloc(1, sizeof(*s)); if (s == NULL) return Z_MEM_ERROR; s->card_type = zlib_accelerator; s->card_no = zlib_card; s->mode = DDCB_MODE_ASYNC | DDCB_MODE_RDWR; if (zlib_deflate_flags & ZLIB_FLAG_USE_POLLING) s->mode |= DDCB_MODE_POLLING; zedc = __zedc_open(s->card_no, s->card_type, s->mode, &err_code); if (!zedc) { rc = Z_STREAM_ERROR; goto free_hw_state; } s->h.device = zedc; s->deflate_req = 0; s->page_size = page_size; /* Default is SGLIST */ s->h.dma_type[ZEDC_IN] = DDCB_DMA_TYPE_SGLIST; s->h.dma_type[ZEDC_OUT] = DDCB_DMA_TYPE_SGLIST; s->h.dma_type[ZEDC_WS] = DDCB_DMA_TYPE_SGLIST; if (zlib_deflate_flags & ZLIB_FLAG_USE_FLAT_BUFFERS) { if (zlib_ibuf_total != 0) { s->h.dma_type[ZEDC_IN] = DDCB_DMA_TYPE_FLAT; s->h.dma_type[ZEDC_OUT] = DDCB_DMA_TYPE_FLAT; } s->h.dma_type[ZEDC_WS] = DDCB_DMA_TYPE_FLAT; } #if defined(CONFIG_USE_PINNING) s->h.dma_type[ZEDC_IN] |= DDCB_DMA_PIN_MEMORY; s->h.dma_type[ZEDC_OUT] |= DDCB_DMA_PIN_MEMORY; s->h.dma_type[ZEDC_WS] |= DDCB_DMA_PIN_MEMORY; #endif if (zlib_xcheck) s->h.flags |= ZEDC_FLG_CROSS_CHECK; if (zedc_verbose & ZEDC_VERBOSE_DDCB) s->h.flags |= ZEDC_FLG_DEBUG_DATA; if (zlib_deflate_flags & ZLIB_FLAG_OMIT_LAST_DICT) s->h.flags |= ZEDC_FLG_SKIP_LAST_DICT; if (zlib_ibuf_total) { s->ibuf_total = s->ibuf_avail = zlib_ibuf_total; s->ibuf_base = s->ibuf = zedc_memalign(zedc, s->ibuf_total, s->h.dma_type[ZEDC_IN]); if (s->ibuf_base == NULL) { rc = Z_MEM_ERROR; goto close_card; } s->obuf_total = s->obuf_avail = h_deflateBound(strm, zlib_ibuf_total); s->obuf_base = s->obuf = s->obuf_next = zedc_memalign(zedc, s->obuf_total, s->h.dma_type[ZEDC_OUT]); if (s->obuf_base == NULL) { rc = Z_MEM_ERROR; goto free_ibuf; } } hw_trace("[%p] h_deflateInit2_: card_type=%d card_no=%d " "zlib_ibuf_total=%d\n", strm, s->card_type, s->card_no, zlib_ibuf_total); rc = zedc_deflateInit2(&s->h, level, method, windowBits, memLevel, strategy); __fixup_crc_or_adler(strm, &s->h); if (rc != ZEDC_OK) { rc = rc_zedc_to_libz(rc); goto free_obuf; } strm->state = (void *)s; /* remember hardware state */ return rc_zedc_to_libz(rc); free_obuf: zedc_free(zedc, s->obuf_base, s->obuf_total, s->h.dma_type[ZEDC_OUT]); free_ibuf: zedc_free(zedc, s->ibuf_base, s->ibuf_total, s->h.dma_type[ZEDC_IN]); close_card: __zedc_close(zedc); free_hw_state: __free(s); return rc; } /** * Implementation note: This mechanism will not work, if the caller is * using driver allocated memory. Currently only the device driver * keeps track of the allocated buffers. The library does not and can * therefore not initiate the a copy. This will cause the mechanism * only to work, if users use self allocated memory together with * hardware sglists. */ int h_deflateCopy(z_streamp dest, z_streamp source) { struct hw_state *s_source; struct hw_state *s_dest; zedc_handle_t zedc; int rc = Z_OK, err_code; s_source = (struct hw_state *)source->state; s_dest = calloc(1, sizeof(*s_dest)); if (s_dest == NULL) { pr_err("Cannot get destination buffer\n"); return Z_MEM_ERROR; } memcpy(s_dest, s_source, sizeof(*s_dest)); rc = rc_zedc_to_libz(zedc_deflateCopy(&s_dest->h, &s_source->h)); if (rc != Z_OK) { pr_err("zEDC deflateCopy returned %d\n", rc); goto err_free_s_dest; } zedc = __zedc_open(s_dest->card_no, s_dest->card_type, s_dest->mode, &err_code); if (!zedc) { pr_err("Cannot open accelerator handle\n"); rc = Z_STREAM_ERROR; goto err_zedc_close; } s_dest->h.device = zedc; hw_trace(" Allocated zedc device %p\n", zedc); /* * FIXME ... check if all that stuff below is really correct ... * * We need to allocate space for the buffers and make sure * that the pointers point to the right addresses depending on * the fill-level. Furthermore we need to copy the data over * to the new buffers. */ if (s_source->ibuf_total) { s_dest->ibuf_total = s_source->ibuf_total; s_dest->ibuf_avail = s_source->ibuf_avail; s_dest->ibuf_base = zedc_memalign(zedc, s_dest->ibuf_total, s_dest->h.dma_type[ZEDC_IN]); if (s_dest->ibuf_base == NULL) { rc = Z_MEM_ERROR; goto err_zedc_close; } s_dest->ibuf = s_dest->ibuf_base + (s_source->ibuf - s_source->ibuf_base); memcpy(s_dest->ibuf_base, s_source->ibuf_base, s_source->ibuf - s_source->ibuf_base); } if (s_source->obuf_total) { s_dest->obuf_total = s_source->obuf_total; s_dest->obuf_avail = s_source->obuf_avail; s_dest->obuf_base = zedc_memalign(zedc, s_dest->obuf_total, s_dest->h.dma_type[ZEDC_OUT]); if (s_dest->obuf_base == NULL) { rc = Z_MEM_ERROR; goto err_free_ibuf_base; } s_dest->obuf = s_dest->obuf_base + (s_source->obuf - s_source->obuf_base); s_dest->obuf_next = s_dest->obuf_base + (s_source->obuf_next - s_source->obuf_base); memcpy(s_dest->obuf_next, s_source->obuf_next, s_dest->obuf_total - s_dest->obuf_avail); } dest->state = (void *)s_dest; return Z_OK; err_free_ibuf_base: free(s_dest->ibuf_base); s_dest->ibuf_base = NULL; err_zedc_close: __zedc_close(zedc); err_free_s_dest: free(s_dest); return rc; } int h_deflateReset(z_streamp strm) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_deflateReset\n", strm); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; /* reset buffers */ strm->total_in = 0; strm->total_out = 0; s->deflate_req = 0; s->ibuf_avail = s->ibuf_total; s->ibuf = s->ibuf_base; s->obuf_avail = s->obuf_total; s->obuf = s->obuf_base; s->obuf_next = s->obuf_base; s->rc = Z_OK; rc = zedc_deflateReset(h); __fixup_crc_or_adler(strm, h); return rc_zedc_to_libz(rc); } int h_deflateSetDictionary(z_streamp strm, const uint8_t *dictionary, unsigned int dictLength) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_deflateSetDictionary dictionary=%p dictLength=%d\n", strm, dictionary, dictLength); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; rc = zedc_deflateSetDictionary(h, dictionary, dictLength); return rc_zedc_to_libz(rc); } int h_deflateSetHeader(z_streamp strm, gz_headerp head) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_deflateSetHeader headerp=%p\n", strm, head); if (strm == NULL) return Z_STREAM_ERROR; if (sizeof(*head) != sizeof(gzedc_header)) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; rc = zedc_deflateSetHeader(h, (gzedc_header *)head); return rc_zedc_to_libz(rc); } static inline int __deflate(z_streamp strm, struct hw_state *s, int flush) { int rc; zedc_stream *h = &s->h; hw_trace("[%p] h_deflate (%d): flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d\n", strm, s->deflate_req, flush_to_str(flush), h->next_in, h->avail_in, h->next_out, h->avail_out); rc = zedc_deflate(h, flush); __fixup_crc_or_adler(strm, h); s->deflate_req++; hw_trace("[%p] flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d rc=%d\n", strm, flush_to_str(flush), h->next_in, h->avail_in, h->next_out, h->avail_out, rc); return rc; } /** * Collect input data */ static int h_read_ibuf(z_streamp strm) { int tocopy; struct hw_state *s = (struct hw_state *)strm->state; if ((s->ibuf_avail == 0) || /* no input buffer space */ (strm->avail_in == 0)) /* or no input data */ return 0; tocopy = MIN(strm->avail_in, s->ibuf_avail); hw_trace("[%p] *** collecting %d bytes ...\n", strm, tocopy); memcpy(s->ibuf, strm->next_in, tocopy); s->ibuf_avail -= tocopy; s->ibuf += tocopy; /* book-keeping for input buffer */ strm->avail_in -= tocopy; strm->next_in += tocopy; strm->total_in += tocopy; return tocopy; } /** * Flush available output bytes to given stream. * * @strm Compression stream used to push out data. * @return Remaining bytes in internal output buffer. */ static unsigned int h_flush_obuf(z_streamp strm) { int tocopy; unsigned int obuf_bytes; struct hw_state *s = (struct hw_state *)strm->state; obuf_bytes = output_buffer_bytes(s); /* remaining bytes in obuf */ if (strm->avail_out == 0) /* no output space available */ return obuf_bytes; if (obuf_bytes == 0) /* give out what is there */ return obuf_bytes; tocopy = MIN(strm->avail_out, obuf_bytes); hw_trace("[%p] *** giving out %d bytes, " "remaining %d bytes in temporary, " "%d in internal buffer\n", strm, tocopy, obuf_bytes - tocopy, zedc_inflate_pending_output(&s->h)); memcpy(strm->next_out, s->obuf_next, tocopy); s->obuf_next += tocopy; s->obuf_avail += tocopy; /* bytes were given out / FIXME (+)? */ obuf_bytes = output_buffer_bytes(s); /* remaining bytes in obuf */ /* book-keeping for output buffer */ strm->avail_out -= tocopy; strm->next_out += tocopy; strm->total_out += tocopy; return obuf_bytes; } /** * Optimization Remarks * * If ibuf_total is not 0 we use the allocated input and output * buffers instead of the user buffers. We collect the data into our * pre-pinnned buffers and compress when we have enough data or if * !Z_NO_FLUSH is true. When flushing is desired we ensure that we * always fill the available output buffer with data. The output data * comes from the pre-pinnned output buffer into the user buffer. * * We observed so far that using a 1 MiB buffer helps to improve * performance a lot if the input data is e.g. around 16 KiB per * request (zpipe.c defaults). */ int h_deflate(z_streamp strm, int flush) { int rc = Z_OK, loops = 0; struct hw_state *s; zedc_stream *h; unsigned int obuf_bytes, ibuf_bytes; if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; if (s->ibuf_total == 0) { /* Special case: buffering fully disabled */ stream_zlib_to_zedc(h, strm); s->rc = rc_zedc_to_libz(__deflate(strm, s, flush)); stream_zedc_to_zlib(strm, h); return s->rc; } hw_trace("[%p] h_deflate: flush=%s avail_in=%d avail_out=%d " "ibuf_avail=%d obuf_avail=%d\n", strm, flush_to_str(flush), strm->avail_in, strm->avail_out, (int)s->ibuf_avail, (int)s->obuf_avail); do { hw_trace("[%p] *** loop=%d flush=%s\n", strm, loops, flush_to_str(flush)); /* Collect input data ... */ h_read_ibuf(strm); /* Give out what is already there */ h_flush_obuf(strm); if (strm->avail_out == 0) /* need more output space */ return Z_OK; /* * Here we start the hardware to do the compression * job, user likes to flush or no more ibuf space * avail. */ if ((flush != Z_NO_FLUSH) || (s->ibuf_avail == 0)) { ibuf_bytes = s->ibuf - s->ibuf_base; /* input bytes */ hw_trace("[%p] *** sending %d bytes to hardware ...\n", strm, ibuf_bytes); s->obuf_next = h->next_out = s->obuf_base; /* start */ s->obuf_avail = s->obuf_total; h->next_in = s->ibuf_base; h->avail_in = ibuf_bytes; h->avail_out = s->obuf_total; /* * If we still have more input data we must * not tell hardware to finish/flush the * compression stream. This happens if our * buffer is smaller than the data the user * provides. */ s->rc = rc_zedc_to_libz(__deflate(strm, s, (strm->avail_in != 0) ? Z_NO_FLUSH : flush)); s->obuf = h->next_out; /* end of output data */ s->obuf_avail = h->avail_out; if (h->avail_in == 0) { /* good: all input absorbed */ s->ibuf = s->ibuf_base; s->ibuf_avail = s->ibuf_total; } else { pr_err("not all input absorbed! " "avail_in is still %d bytes\n", h->avail_in); return Z_STREAM_ERROR; } /* Sanity checking: obuf too small but input pending */ if ((h->avail_in != 0) && (h->avail_out == 0)) { pr_err("obuf was not large enough!\n"); return Z_STREAM_ERROR; } } if (strm->avail_in != 0) hw_trace("[%p] Not yet finished (avail_in=%d)\n", strm, strm->avail_in); /* Give out what is already there */ h_flush_obuf(strm); if (strm->avail_out == 0) /* need more output space */ return Z_OK; ibuf_bytes = s->ibuf - s->ibuf_base; /* accumulated input */ obuf_bytes = s->obuf - s->obuf_next; /* bytes in obuf */ if ((flush == Z_FINISH) && /* finishing desired */ (s->rc == Z_STREAM_END) && /* hardware saw FEOB */ (strm->avail_in == 0) && /* no more input from caller */ (ibuf_bytes == 0) && /* no more input in buf */ (obuf_bytes == 0)) /* no more outp data in buf */ return Z_STREAM_END; /* nothing to do anymore */ loops++; } while (strm->avail_in != 0); return rc; } int h_deflateEnd(z_streamp strm) { int rc; zedc_stream *h; struct hw_state *s; zedc_handle_t zedc; if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; zedc = (zedc_handle_t)h->device; rc = zedc_deflateEnd(h); zedc_free(zedc, s->obuf_base, s->obuf_total, s->h.dma_type[ZEDC_OUT]); zedc_free(zedc, s->ibuf_base, s->ibuf_total, s->h.dma_type[ZEDC_IN]); __zedc_close(zedc); __free(s); return rc_zedc_to_libz(rc); } int h_inflateInit2_(z_streamp strm, int windowBits, const char *version __unused, int stream_size __unused) { int rc, err_code = 0; struct hw_state *s; zedc_handle_t zedc; strm->total_in = 0; strm->total_out = 0; s = calloc(1, sizeof(*s)); if (s == NULL) return Z_MEM_ERROR; s->card_type = zlib_accelerator; s->card_no = zlib_card; s->mode = DDCB_MODE_ASYNC | DDCB_MODE_RDWR; if (zlib_inflate_flags & ZLIB_FLAG_USE_POLLING) s->mode |= DDCB_MODE_POLLING; hw_trace("[%p] h_inflateInit2_: card_type=%d card_no=%d " "zlib_obuf_total=%d\n", strm, s->card_type, s->card_no, zlib_obuf_total); zedc = __zedc_open(s->card_no, s->card_type, s->mode, &err_code); if (!zedc) { rc = Z_STREAM_ERROR; goto free_hw_state; } s->inflate_req = 0; s->h.avail_in = 0; s->h.next_in = ZEDC_NULL; s->h.device = zedc; /* Default is using SGLISTs */ s->h.dma_type[ZEDC_IN] = DDCB_DMA_TYPE_SGLIST; s->h.dma_type[ZEDC_OUT] = DDCB_DMA_TYPE_SGLIST; s->h.dma_type[ZEDC_WS] = DDCB_DMA_TYPE_SGLIST; if (zlib_inflate_flags & ZLIB_FLAG_USE_FLAT_BUFFERS) { s->h.dma_type[ZEDC_IN] = DDCB_DMA_TYPE_SGLIST; if (zlib_obuf_total != 0) s->h.dma_type[ZEDC_OUT] = DDCB_DMA_TYPE_FLAT; /* FIXME FIXME */ pr_err(" NOTE: Potential hardware bug. We might get DDCBs\n" " with timeouts: RETC=0x110, ATTN=0xe004\n"); s->h.dma_type[ZEDC_WS] = DDCB_DMA_TYPE_FLAT; } #if defined(CONFIG_USE_PINNING) s->h.dma_type[ZEDC_IN] |= DDCB_DMA_PIN_MEMORY; s->h.dma_type[ZEDC_OUT] |= DDCB_DMA_PIN_MEMORY; s->h.dma_type[ZEDC_WS] |= DDCB_DMA_PIN_MEMORY; #endif if (zlib_xcheck) /* FIXME Not needed/supported for inflate */ s->h.flags |= ZEDC_FLG_CROSS_CHECK; if (zedc_verbose & ZEDC_VERBOSE_DDCB) s->h.flags |= ZEDC_FLG_DEBUG_DATA; if (zlib_inflate_flags & ZLIB_FLAG_OMIT_LAST_DICT) s->h.flags |= ZEDC_FLG_SKIP_LAST_DICT; /* We only use output buffering for inflate */ if (zlib_obuf_total) { s->obuf_total = s->obuf_avail = zlib_obuf_total; s->obuf_base = s->obuf = s->obuf_next = zedc_memalign(zedc, s->obuf_total, s->h.dma_type[ZEDC_OUT]); if (s->obuf_base == NULL) { rc = Z_MEM_ERROR; goto close_card; } } rc = zedc_inflateInit2(&s->h, windowBits); __fixup_crc_or_adler(strm, &s->h); if (rc != ZEDC_OK) { rc = rc_zedc_to_libz(rc); goto free_obuf; } strm->state = (void *)s; /* remember hardware state */ return rc_zedc_to_libz(rc); free_obuf: zedc_free(zedc, s->obuf_base, s->obuf_total, s->h.dma_type[ZEDC_OUT]); close_card: __zedc_close(zedc); free_hw_state: __free(s); return rc; } int h_inflateReset(z_streamp strm) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_inflateReset\n", strm); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; /* reset buffers */ strm->total_in = 0; strm->total_out = 0; s->inflate_req = 0; s->obuf_avail = s->obuf_total; s->obuf = s->obuf_base; s->obuf_next = s->obuf_base; s->rc = Z_OK; if (h->tree_bits + h->pad_bits + h->scratch_ib + h->scratch_bits) hw_trace("[%p] warn: (0x%x 0x%x 0x%x 0x%x)\n", strm, (unsigned int)h->tree_bits, (unsigned int)h->pad_bits, (unsigned int)h->scratch_ib, (unsigned int)h->scratch_bits); rc = zedc_inflateReset(h); __fixup_crc_or_adler(strm, h); return rc_zedc_to_libz(rc); } int h_inflateReset2(z_streamp strm, int windowBits) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_inflateReset2(windowBits=%d)\n", strm, windowBits); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; /* reset buffers */ strm->total_in = 0; strm->total_out = 0; s->inflate_req = 0; s->obuf_avail = s->obuf_total; s->obuf = s->obuf_base; s->obuf_next = s->obuf_base; s->rc = Z_OK; if (h->tree_bits + h->pad_bits + h->scratch_ib + h->scratch_bits) hw_trace("[%p] warn: (0x%x 0x%x 0x%x 0x%x)\n", strm, (unsigned int)h->tree_bits, (unsigned int)h->pad_bits, (unsigned int)h->scratch_ib, (unsigned int)h->scratch_bits); rc = zedc_inflateReset2(h, windowBits); __fixup_crc_or_adler(strm, h); return rc_zedc_to_libz(rc); } int h_inflateSetDictionary(z_streamp strm, const uint8_t *dictionary, unsigned int dictLength) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_inflateSetDictionary dictionary=%p dictLength=%d\n", strm, dictionary, dictLength); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; rc = zedc_inflateSetDictionary(h, dictionary, dictLength); return rc_zedc_to_libz(rc); } int h_inflateGetDictionary(z_streamp strm, uint8_t *dictionary, unsigned int *dictLength) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_inflateGetDictionary dictionary=%p &dictLength=%p\n", strm, dictionary, dictLength); if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) { return Z_STREAM_ERROR; } h = &s->h; rc = zedc_inflateGetDictionary(h, dictionary, dictLength); return rc_zedc_to_libz(rc); } int h_inflateGetHeader(z_streamp strm, gz_headerp head) { int rc; zedc_stream *h; struct hw_state *s; hw_trace("[%p] h_inflateGetHeader headerp=%p\n", strm, head); if (strm == NULL) return Z_STREAM_ERROR; if (sizeof(*head) != sizeof(gzedc_header)) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; rc = zedc_inflateGetHeader(h, (gzedc_header *)head); return rc_zedc_to_libz(rc); } static inline int __inflate(z_streamp strm, struct hw_state *s, int flush) { int rc; zedc_stream *h = &s->h; hw_trace("[%p] __inflate (%d): flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_in=%ld total_out=%ld " "crc/adler=%08x/%08x\n", strm, s->inflate_req, flush_to_str(flush), h->next_in, h->avail_in, h->next_out, h->avail_out, h->total_in, h->total_out, h->crc32, h->adler32); rc = zedc_inflate(h, flush); __fixup_crc_or_adler(strm, h); hw_trace("[%p] ________h (%d) flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_in=%ld total_out=%ld " "crc/adler=%08x/%08x rc=%s\n", strm, s->inflate_req, flush_to_str(flush), h->next_in, h->avail_in, h->next_out, h->avail_out, h->total_in, h->total_out, h->crc32, h->adler32, ret_to_str(rc)); s->inflate_req++; return rc; } /** * FIXME Circumvention for hardware deficiency * * Our hardware does not continue processing input bytes, once it has * no output bytes anymore. This causes our hardware missing the FEOB * information which can be in empty blocks which follow the regular * data. Software would return Z_STREAM_END in those cases and not * Z_OK, which is expected by some applications e.g. the MongoDB zlib * compression engine. * * It is possible recall hardware inflate with at least one output * byte, to get the desired Z_STREAM_END information from the * hardware, at the cost of an additional DDCB, which is itself * expensive too. * * Empty blocks are added by hardware support code and the software * implementation in different fashions. Z_SYNC_FLUSH does similar * things too. Hardware support code adds an empty fixed huffman block * followed by another empty fixed huffman block with the BFINAL bit * on. Software uses just the latter. */ #define CONFIG_CIRCUMVENTION_FOR_Z_STREAM_END enum stream_state { READ_HDR, COPY_BLOCK, FIXED_HUFFMAN, DYN_HUFFMAN, }; static const char *state_str[] = { "READ_HDR", "COPY_BLOCK", "FIXED_HUFFMAN", "DYN_HUFFMAN" }; struct stream_ending { uint8_t d[16]; unsigned int proc_bits; /* processed bits in current byte */ unsigned int remaining_bytes; unsigned int avail_in; unsigned int idx; unsigned int in_hdr_scratch_len; enum stream_state state; }; /** * Retrieve @bits from @e. Without moving the position forward. */ static inline int get_bits(struct stream_ending *e, unsigned int bits, uint64_t *d) { int rc = 0; unsigned int b, proc_bits, idx; uint64_t data = 0ull; for (proc_bits = e->proc_bits, idx = e->idx, b = 0; b < bits; idx++) { for (; proc_bits < 8 && b < bits; proc_bits++, b++) { data <<= 1ull; if (idx >= e->avail_in) { rc = 1; continue; /* no valid bytes anymore */ } if (e->d[idx] & (1 << proc_bits)) data |= 1ull; } proc_bits = 0; /* start new byte at bit offset 0 */ } *d = data; return rc; } /** * Move the position forward by @bits bits. */ static inline int drop_bits(struct stream_ending *e, unsigned int bits) { unsigned int idx; /* hw_trace("proc_bits=%d idx=%d ---> ", e->proc_bits, e->idx); */ idx = e->idx + (e->proc_bits + bits) / 8; if (idx >= e->avail_in) { /* hw_trace("EOF\n"); */ return 1; /* we do not have such many bits */ } e->idx = idx; e->proc_bits = (e->proc_bits + bits) % 8; /* hw_trace("proc_bits=%d idx=%d\n", e->proc_bits, e->idx); */ return 0; } /** * Copy blocks have their length information synched on a byte * boundary. We need this to move the stream forward to a byte * position. */ static inline int sync_to_byte(struct stream_ending *e) { if (e->proc_bits == 0) return 0; e->proc_bits = 0; e->idx++; return 0; } /** * There can be leftover input bytes in the scratch section. This is * used to figure out how many bytes are there to be considered. */ static inline unsigned int __in_hdr_scratch_len(zedc_streamp strm) { unsigned int len; len = strm->hdr_ib + strm->tree_bits + strm->pad_bits + strm->scratch_ib + strm->scratch_bits; return (uint32_t)(len / 8ULL); } /** * I think we should be able to derive the info if we are in a dynamic * huffman block via the 3 header bits. But anyways ... * * If there are tree bits defined, we are for sure in a dynamic * huffman block. In this case we do not know the dynamic huffman end * of block symbol, which prevents software parsing the information in * the remaining bytes. Do not apply the BFINAL dectection * circumvention in this case. * * BTYPE specifies how the data are compressed, as follows: * 00 - no compression * 01 - compressed with fixed Huffman codes * 10 - compressed with dynamic Huffman codes * 11 - reserved (error) */ static inline int __in_hdr_bits(zedc_streamp strm) { unsigned int headerarea_size = ((strm->tree_bits + strm->hdr_ib + 63)/64) * 8; uint8_t btype = (strm->infl_stat & INFL_STAT_HDR_TYPE) >> 5; const char *btype_str[] = { "NO_COMPRESSION", "FIXED_HUFFMAN", "DYNAMIC_HUFFMAN", "RESERVED" }; hw_trace("SCRATCH BITS: headerarea_size=%d hdr_ib=%d tree_bits=%d " "pad_bits=%d scratch_ib=%d scratch_bits=%d " "infl_stat.hdr_type=%s\n", headerarea_size, strm->hdr_ib, strm->tree_bits, strm->pad_bits, strm->scratch_ib, strm->scratch_bits, btype_str[btype]); return strm->tree_bits; } static inline void __reset_hdr_scratch_len(zedc_streamp strm) { strm->hdr_ib = 0; strm->tree_bits = 0; strm->pad_bits = 0; strm->scratch_ib = 0; strm->scratch_bits = 0; } /** * NOTES: Missing are reading more data if we run out of space in our * temporary buffer, more testing for corner cases, figuring out if we * are really at a header-start position (talk to hardware team). * * Consider moving this code at the end of DDCB processing. This is * where it really belongs, to mimic the exact zlib software * behavior. It could easily be, that this simplifies testing a lot, * since one could use the exact amount of output bytes and insist on * seeing Z_STREAM_END as return code. Now we need to call inflate() a * 2nd time (even with avail_out == 0), to get the Z_STREAM_END return * code. */ static inline int __check_stream_end(z_streamp strm) { int rc, ret = Z_OK; uint64_t d; struct stream_ending e; struct hw_state *s = (struct hw_state *)strm->state;; zedc_stream *h = &s->h; unsigned int len; uint8_t offs; /* Copy input data in one contignous buffer before analyzing it */ memset(&e, 0, sizeof(e)); e.state = READ_HDR; e.proc_bits = h->proc_bits; e.remaining_bytes = sizeof(e.d); e.avail_in = 0; e.idx = 0; e.in_hdr_scratch_len = __in_hdr_scratch_len(h); len = MIN(e.in_hdr_scratch_len, e.remaining_bytes); memcpy(&e.d[e.avail_in], h->wsp->tree, len); e.remaining_bytes -= len; e.avail_in += len; len = MIN(strm->avail_in, e.remaining_bytes); memcpy(&e.d[e.avail_in], strm->next_in, len); e.remaining_bytes -= len; e.avail_in += len; hw_trace("Accumulated input data (__in_hdr_scratch_len=%d " "strm->avail_in=%d):\n", e.in_hdr_scratch_len, strm->avail_in); if (zlib_hw_trace_enabled()) ddcb_hexdump(zlib_log, e.d, e.avail_in); /* Now let us have a look what we have here */ while (1) { /* fprintf(zlib_log, "STATE: %s\n", state_str[e.state]); */ switch (e.state) { case READ_HDR: hw_trace("READ_HDR\n"); rc = get_bits(&e, 3, &d); hw_trace(" d=%08llx rc=%d\n", (long long)d, rc); if (rc) goto go_home; drop_bits(&e, 3); switch (d & 0x3) { case 0x0: e.state = COPY_BLOCK; break; case 0x1: e.state = DYN_HUFFMAN; /* we need to stop, since the end symbol is unknown to us */ goto go_home; case 0x2: e.state = FIXED_HUFFMAN; break; case 0x3: /* error */ default: goto go_home; } if (d & 0x4) { hw_trace(" Z_STREAM_END/BFINAL potentially " "detected!\n"); ret = Z_STREAM_END; } break; case FIXED_HUFFMAN: hw_trace("FIXED_HUFFMAN\n"); rc = get_bits(&e, 7, &d); hw_trace(" d=%08llx, 00000000 indicates empty " "FIXED_HUFFMAN\n", (long long)d); if (rc) goto go_home; drop_bits(&e, 7); if (d != 0x0) /* end of stream required here */ goto go_home; e.state = READ_HDR; /* If we saw the BFINAL bit, we can safely exit */ if (ret == Z_STREAM_END) goto sync_avail_in; break; case COPY_BLOCK: hw_trace("COPY_BLOCK\n"); sync_to_byte(&e); rc = get_bits(&e, 32, &d); hw_trace(" d=%08llx, 0000ffff indicates empty " "COPY_BLOCK\n", (long long)d); if (rc) goto go_home; drop_bits(&e, 32); if (d != 0x0000ffff) /* 0000ffff required here */ goto go_home; e.state = READ_HDR; /* If we saw the BFINAL bit, we can safely exit */ if (ret == Z_STREAM_END) goto sync_avail_in; break; default: hw_trace("Brrr STATE: %s\n", state_str[e.state]); goto go_home; } } sync_avail_in: /* * Only if we saw Z_STREAM_END and no problems understanding * the empty HUFFMAN or COPY_BLOCKs arised, we sync up the * stream. * * For DEFLATE and ZLIB we need to read the adler32 or * the crc32 and the uncompressed data size to finally say * that everything is right. So let us not use the circumvention * in this case. */ /* * e.idx: number of bytes which were analyzed * e.in_hdr_scratch_len: bytes taken from scratch buffer */ if (e.idx <= e.in_hdr_scratch_len) offs = 0; /* no avail_in adjustment needed */ else { /* do not consider bytes from scratch area */ /* add 1 idx starts at 0 */ offs = e.idx - e.in_hdr_scratch_len + 1; __reset_hdr_scratch_len(h); } strm->avail_in -= offs; strm->next_in += offs; strm->total_in += offs; hw_trace(" e.idx=%d e.in_hdr_scratch_len=%d offs=%d " "next_in=%02x\n", e.idx, e.in_hdr_scratch_len, offs, *strm->next_in); return ret; /* more data or even Z_STREAM_END found */ go_home: hw_trace(" Aborting search for Z_STREAM_END for now!\n"); return Z_OK; /* more data required */ } /** * FIXME We use always the internal buffer. Using the external one * results in minimal performance gain when using sgl-described * buffers, but flat buffers are better anyways. */ int h_inflate(z_streamp strm, int flush) { int rc = Z_OK, use_internal_buffer = 1; zedc_stream *h; struct hw_state *s; unsigned int loops = 0; unsigned int obuf_bytes; if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; if (s->obuf_total == 0) { /* Special case: buffering fully disabled */ stream_zlib_to_zedc(h, strm); s->rc = rc_zedc_to_libz(__inflate(strm, s, flush)); stream_zedc_to_zlib(strm, h); return s->rc; } /* Use internal buffer if the given output buffer is smaller */ if ((s->h.dma_type[ZEDC_OUT] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_SGLIST) use_internal_buffer = (s->obuf_total > strm->avail_out); hw_trace("[%p] h_inflate: flush=%s avail_in=%d avail_out=%d " "ibuf_avail=%d obuf_avail=%d use_int_buf=%d\n", strm, flush_to_str(flush), strm->avail_in, strm->avail_out, (int)s->ibuf_avail, (int)s->obuf_avail, use_internal_buffer); /* No progress possible (no more input and no buffered output): Z_BUF_ERROR */ obuf_bytes = s->obuf - s->obuf_next; /* bytes in obuf */ if (obuf_bytes == 0) { hw_trace("[%p] OBYTES_IN_DICT %d bytes\n", strm, h->obytes_in_dict); if (s->rc == Z_STREAM_END) /* hardware saw FEOB */ return Z_STREAM_END; /* nothing to do anymore */ /* * NOTE: strm->avail_in can be 0 but some bytes might * still be in the scratch buffer. This causes * one of our test-cases to fail. So the criteria * when to return Z_BUF_ERROR is currently wrong. * Therefore disabling Z_BUF_ERROR return here. * This causes a small deviation from what software zlib * does in situations when there is no input data * available. */ /* if (strm->avail_in == 0) * return Z_BUF_ERROR; */ } do { hw_trace("[%p] loops=%d flush=%s\n", strm, loops, flush_to_str(flush)); /* Give out what is already there */ obuf_bytes = h_flush_obuf(strm); if ((s->rc == Z_STREAM_END) && /* hardware saw FEOB */ (obuf_bytes == 0)) /* no more output in buf */ return Z_STREAM_END; /* nothing to do anymore */ if (((obuf_bytes != 0) || zedc_inflate_pending_output(h)) && (strm->avail_out == 0)) return Z_OK; /* need new output buffer */ /* * Need more output space, just useful if Z_STREAM_END * not seen before. */ if ((s->rc != Z_STREAM_END) && (strm->avail_out == 0)) { rc = Z_OK; #ifdef CONFIG_CIRCUMVENTION_FOR_Z_STREAM_END /* For MongoDB PoC */ if (zlib_inflate_flags & ZLIB_FLAG_DISABLE_CV_FOR_Z_STREAM_END) { hw_trace("[%p] ZLIB_FLAG_DISABLE_" "CV_FOR_Z_STREAM_END\n", strm); goto skip_circumvention; } hw_trace("[%p] CONFIG_CIRCUMVENTION_FOR_Z_STREAM_END\n", strm); /* * Do not try this ZLIB or GZIP, were we * expect adler32 or crc32/data_size in the * stream trailer. We want the lowlevel lib to * do the checksum processing in this case. */ if (h->format != ZEDC_FORMAT_DEFL) return rc; /* * fprintf(zlib_log, "SCRATCH\n"); * ddcb_hexdump(zlib_log, h->wsp->tree, * __in_hdr_scratch_len(h)); * fprintf(zlib_log, "NEXT_IN\n"); * ddcb_hexdump(zlib_log, strm->next_in, * MIN(strm->avail_in, (unsigned int)0x20)); * fprintf(zlib_log, * " in_hdr_scratch_len = %d\n" * " proc_bits = %d\n", * __in_hdr_scratch_len(h), h->proc_bits); */ rc = __in_hdr_bits(h); if (rc != 0) { hw_trace(" __in_hdr_bits %d: cannot parse " "dynamic huffman block, returning\n", rc); return Z_OK; } rc = __check_stream_end(strm); if (rc == Z_STREAM_END) { hw_trace(" Suppress Z_STREAM_END %ld %ld\n", s->obuf_avail, s->obuf_total); s->rc = Z_STREAM_END; rc = Z_OK; } hw_trace("[%p] .......... flush=%s avail_in=%d " "avail_out=%d __check_stream=%s\n", strm, flush_to_str(flush), strm->avail_in, strm->avail_out, ret_to_str(rc)); skip_circumvention: #endif return rc; } /* * Original idea: Do not send 0 data to HW * * Why it is needed regardless: * If the underlying code buffers output data, we * need to call it to get this data. We need to trust * the lowlevel code not to call hardware if not needed, * since that would impact performance. */ if ((0 == strm->avail_in) && ((Z_NO_FLUSH == flush) || (Z_PARTIAL_FLUSH == flush) || (Z_FULL_FLUSH == flush))) return Z_OK; if (!output_buffer_empty(s)) { pr_err("[%p] obuf should be empty here!\n", strm); return Z_DATA_ERROR; } /* * Here we start the hardware to do the decompression * job. We need to use hardware in any case to * determine if we have seen a final end of block * condition. */ hw_trace("[%p] Sending avail_in=%d bytes to hardware " "(obuf_total=%d)\n", strm, strm->avail_in, (int)s->obuf_total); h->next_in = strm->next_in; /* use stream input buffer */ h->avail_in = strm->avail_in; h->total_in = strm->total_in; if (use_internal_buffer) { /* entire buffer */ h->next_out = s->obuf_next = s->obuf_base; h->avail_out = s->obuf_total; } else { h->next_out = strm->next_out; h->avail_out = strm->avail_out; } h->total_out = strm->total_out; /* Call hardware to perform the decompression task. */ s->rc = rc_zedc_to_libz(__inflate(strm, s, flush)); strm->next_in = (uint8_t *)h->next_in; /* new pos ... */ strm->avail_in = h->avail_in; /* new pos in input data */ strm->total_in = h->total_in; /* new pos in input data */ strm->data_type = h->data_type; if (use_internal_buffer) { /* entire buffer */ s->obuf = h->next_out; /* end of out data */ s->obuf_avail = h->avail_out; /* available bytes */ } else { strm->next_out = h->next_out; strm->avail_out = h->avail_out; strm->total_out = h->total_out; } /* Give out what is already there */ h_flush_obuf(strm); if (s->rc == Z_NEED_DICT) return s->rc; if ((s->rc == Z_STREAM_ERROR) || (s->rc == Z_DATA_ERROR) || (s->rc == Z_BUF_ERROR)) return s->rc; /* Hardware saw FEOB and output buffer is empty */ if ((s->rc == Z_STREAM_END) && output_buffer_empty(s)) return Z_STREAM_END; /* nothing to do anymore */ if (strm->avail_out == 0) /* need more output space */ return Z_OK; hw_trace("[%p] data_type 0x%x\n", strm, strm->data_type); if (strm->data_type & 0x80) { hw_trace("[%p] Z_DO_BLOCK_EXIT\n", strm); return s->rc; } loops++; } while (strm->avail_in != 0); /* strm->avail_out == 0 handled above */ hw_trace("[%p] __________ flush=%s avail_in=%d avail_out=%d\n", strm, flush_to_str(flush), strm->avail_in, strm->avail_out); return rc_zedc_to_libz(rc); } int h_inflateEnd(z_streamp strm) { int rc; zedc_stream *h; struct hw_state *s; zedc_handle_t zedc; int ibuf_bytes, obuf_bytes; if (strm == NULL) return Z_STREAM_ERROR; s = (struct hw_state *)strm->state; if (s == NULL) return Z_STREAM_ERROR; h = &s->h; zedc = (zedc_handle_t)h->device; ibuf_bytes = s->ibuf - s->ibuf_base; /* accumulated input */ obuf_bytes = s->obuf - s->obuf_next; /* bytes in obuf */ if (ibuf_bytes || obuf_bytes) pr_err("[%p] In/Out buffer not empty! ibuf_bytes=%d " "obuf_bytes=%d\n", strm, ibuf_bytes, obuf_bytes); rc = zedc_inflateEnd(h); zedc_free(zedc, s->obuf_base, s->obuf_total, s->h.dma_type[ZEDC_OUT]); __zedc_close((zedc_handle_t)h->device); __free(s); return rc_zedc_to_libz(rc); } /** * ZEDC_VERBOSE: * 0x0000cczz * |||| * ||``== libzedc debug flags * ``==== libcard debug flags * */ void zedc_hw_init(void) { char *verb = getenv("ZLIB_VERBOSE"); char *accel = getenv("ZLIB_ACCELERATOR"); char *ibuf_s = getenv("ZLIB_IBUF_TOTAL"); char *obuf_s = getenv("ZLIB_OBUF_TOTAL"); char *card = getenv("ZLIB_CARD"); char *xcheck_str = getenv("ZLIB_CROSS_CHECK"); ddcb_set_logfile(zlib_log); zedc_set_logfile(zlib_log); if (verb != NULL) { int z, c; zedc_verbose = str_to_num(verb); c = (zedc_verbose & ZEDC_VERBOSE_LIBCARD_MASK) >> 8; z = (zedc_verbose & ZEDC_VERBOSE_LIBZEDC_MASK) >> 0; ddcb_debug(c); zedc_lib_debug(z); } if (accel != NULL) { if (strncmp(accel, "CAPI", 4) == 0) zlib_accelerator = DDCB_TYPE_CAPI; else zlib_accelerator = DDCB_TYPE_GENWQE; } if (card != NULL) { if (strncmp(card, "RED", 3) == 0) zlib_card = ACCEL_REDUNDANT; else zlib_card = atoi(card); } if (xcheck_str != NULL) zlib_xcheck = str_to_num(xcheck_str); if (ibuf_s != NULL) zlib_ibuf_total = str_to_num(ibuf_s); if (obuf_s != NULL) zlib_obuf_total = str_to_num(obuf_s); /* * USE_FLAT_BUFFERS and CACHE_HANDLES only work for GenWQE. */ if (zlib_accelerator != DDCB_TYPE_GENWQE) { zlib_deflate_flags &= ~(ZLIB_FLAG_USE_FLAT_BUFFERS | ZLIB_FLAG_CACHE_HANDLES); zlib_inflate_flags &= ~(ZLIB_FLAG_USE_FLAT_BUFFERS | ZLIB_FLAG_CACHE_HANDLES); } } void zedc_hw_done(void) { unsigned int card_no; int flags = (zlib_inflate_flags | zlib_deflate_flags); if (zlib_log != stderr) { zedc_set_logfile(NULL); ddcb_set_logfile(NULL); } if ((flags & ZLIB_FLAG_CACHE_HANDLES) == 0x0) return; for (card_no = 0; card_no <= 128; card_no++) { if (zedc_cards[card_no] == NULL) continue; zedc_close(zedc_cards[card_no]); } } genwqe-user-4.0.18/lib/hw_defs.h000066400000000000000000000111321303345043000163750ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __ZEDC_DEFS_H__ #define __ZEDC_DEFS_H__ #include #include #include #include #include /* For SYS_xxx definitions */ #include #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0])) #endif #ifndef ABS # define ABS(a) (((a) < 0) ? -(a) : (a)) #endif static inline pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } extern int zedc_dbg; extern FILE *zedc_log; #define pr_err(fmt, ...) do { \ if (zedc_log) \ fprintf(zedc_log, "%08x.%08x %s:%u: Error: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) #define pr_warn(fmt, ...) do { \ if (zedc_log) \ fprintf(zedc_log, "%08x.%08x %s:%u: Warn: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) #define pr_dbg(fmt, ...) do { \ if (zedc_log && zedc_dbg) \ fprintf(zedc_log, fmt, ## __VA_ARGS__); \ } while (0) #define pr_info(fmt, ...) do { \ if (zedc_log && zedc_dbg) \ fprintf(zedc_log, "%08x.%08x %s:%u: Info: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) #define pr_log(dbg, fmt, ...) do { \ if (zedc_log && (dbg)) \ fprintf(zedc_log, "%08x.%08x %s:%u: Info: " fmt, \ getpid(), gettid(), __FILE__, __LINE__, \ ## __VA_ARGS__); \ } while (0) /****************************************************************************** * zEDC Support *****************************************************************************/ /* zedc device descriptor */ struct zedc_dev_t { int mode; int zedc_rc; /* libzedc return codes; detailed info * in cases were we needed to return. */ accel_t card; /* Ptr. to card */ int card_rc; /* libcard return codes */ int card_errno; int collect_debug_data; }; /** * APP_ID: * 0x00000000475a4950 old * 0x00000002475a4950 new * VV * G Z I P */ static inline int is_zedc(zedc_handle_t zedc) { uint64_t app_id = accel_get_app_id(zedc->card); return (app_id & 0xFFFFFFFF) == 0x475a4950; } static inline int dyn_huffman_supported(zedc_handle_t zedc) { uint64_t app_id = accel_get_app_id(zedc->card); return (app_id & 0xFF00000000ull) >= 0x0200000000ull; } /* * RFC1951 * * BTYPE specifies how the data are compressed, as follows: * 00 - no compression * 01 - compressed with fixed Huffman codes * 10 - compressed with dynamic Huffman codes * 11 - reserved (error) * * E.g. fixed Header 01, read from left ... * * RFC1951 End-Of-Block Marker = %000_0000 */ #define HDR_BTYPE_NO 0x00 #define HDR_BTYPE_FIXED 0x02 #define HDR_BTYPE_DYN 0x04 #define HDR_BTYPE_RES 0x06 #define HDR_BFINAL 0x01 #define FIXED_EOB 0x00 /* 7 bits 0s */ /* RFC1952 GZIP */ #define FTEXT 0x01 #define FHCRC 0x02 #define FEXTRA 0x04 #define FNAME 0x08 #define FCOMMENT 0x10 #define FNAME_MAXLEN 64 /* ensure that we do not overflow our FIFO */ #define FCOMMENT_MAXLEN 64 /* ensure that we do not overflow our FIFO */ /** * @brief manage execution of an inflate or a deflate job * @param zedc ZEDC device handle * @param cmd pointer to command descriptor */ int zedc_execute_request(zedc_handle_t zedc, struct ddcb_cmd *cmd); int zedc_alloc_workspace(zedc_streamp strm); int zedc_free_workspace(zedc_streamp strm); void zedc_asv_infl_print(zedc_streamp strm); void zedc_asiv_infl_print(zedc_streamp strm); void zedc_asv_defl_print(zedc_streamp strm, int dbg); void zedc_asiv_defl_print(zedc_streamp strm, int dbg); /** * @brief Prepare format specific deflate header when user * calls initializes decompression. * provided window_bits: * 8 .... 15: ZLIB / RFC1950 (window size 2^8 ... 2^15) * -8 ... -15: DEFLATE / RFC1951 (window size 2^8 ... 2^15) * >= 16: GZIP / RFC1952 */ int zedc_format_init(struct zedc_stream_s *strm); unsigned long __adler32(unsigned long adl, const unsigned char *buf, int len); #endif /* __ZEDC_DEFS_H__ */ genwqe-user-4.0.18/lib/inflate.c000066400000000000000000001225131303345043000164010ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * @brief This part of the libzedc library is responsible to perform * decompression (inflate) of the compressed data. The library * supports the data formats described in RFC1950, RFC1951, and * RFC1952. * * IBM Accelerator Family 'GenWQE'/zEDC */ /**************************************************************************** * DeCompression (Inflate) ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hw_defs.h" #define INFLATE_HDR_OK 0 #define INFLATE_HDR_NEED_MORE_DATA 1 #define INFLATE_HDR_ZLIB_NEED_DICT 2 #define INFLATE_HDR_ERROR 3 /** * @brief estimate the amount of bytes consumed * solely from the input stream */ static uint32_t inp_proc_update(uint32_t inp_processed, uint32_t proc_bits, uint32_t pre_scratch_bits) { uint64_t in_total; /* Bits total */ /* total amount of bits consumed by decompressor */ in_total = (uint64_t)inp_processed * 8; in_total += (uint64_t)proc_bits; in_total -= (uint64_t)pre_scratch_bits; in_total = (in_total + 7ULL) / 8ULL; return (uint32_t)in_total; /* Return number of bytes consumed */ } static void extract_new_tree(zedc_streamp strm) { uint8_t *target; const uint8_t *src; uint64_t in_scratch_bytes; uint64_t hdr_offs; uint64_t hdr_start_total_bits; unsigned cnt; int64_t src_offs; in_scratch_bytes = (strm->scratch_bits + strm->scratch_ib) / 8; /* new tree detected (hdr_start > 0) */ /* offs relative to first bit in scratch/data area */ hdr_start_total_bits = (uint64_t)strm->hdr_start * 8 + strm->out_hdr_start_bits; hdr_offs = hdr_start_total_bits - strm->in_hdr_bits + strm->scratch_ib; strm->hdr_ib = hdr_offs % 8; /* ignore bits, rel to byte offs */ cnt = ((uint64_t)strm->out_hdr_bits + strm->hdr_ib + 7)/8; target = strm->wsp->tree; /* offset relative to beginning of input data area */ src_offs = hdr_offs/8 - in_scratch_bytes; if ((hdr_start_total_bits == 0) && (strm->in_hdr_bits == 0)) { /* * We didn't have a header before, tree starts in * scratch/data avoid copying bytes that exist in * scratch already, because scratch becomes the new * tree. */ if (cnt > in_scratch_bytes) { cnt -= in_scratch_bytes; target += in_scratch_bytes; src_offs += in_scratch_bytes; } else cnt = 0; } /* NOTE: This is the same as in scratch_update() */ /* copy abs(src_offs) bytes behind the tree ... */ if (cnt > 0 && src_offs < 0) { src = strm->wsp->tree + strm->in_hdr_scratch_len + src_offs; memmove(target, src, abs(src_offs)); target += abs(src_offs); cnt -= abs(src_offs); src_offs = 0; } /* copy remaining cnt bytes ... */ if (cnt) { src = strm->next_in + src_offs; memmove(target, src, cnt); } strm->tree_bits = strm->out_hdr_bits; /* padding bits derived from actual tree */ if (strm->tree_bits > 0) { strm->pad_bits = 64ULL - ((strm->hdr_ib + strm->tree_bits) % 64ULL); strm->pad_bits &= 63ULL; } else strm->pad_bits = 0; } /* Call this after tree update */ static void scratch_update(zedc_streamp strm) { uint8_t *target; const uint8_t *src; uint64_t in_scratch_bytes; uint64_t scratch_offs; unsigned cnt; int64_t src_offs; in_scratch_bytes = (strm->scratch_bits + strm->scratch_ib) / 8; /* new tree detected (hdr_start > 0) */ /* offs relative to first bit in scratch/data area */ scratch_offs = (uint64_t)strm->inp_processed * 8ULL + (uint64_t)strm->proc_bits - strm->in_hdr_bits + strm->scratch_ib; /* target = start of scratch with new tree. */ target = strm->wsp->tree + ((strm->tree_bits + strm->hdr_ib + 63) & 0xFFFFFFC0) / 8; /* current processing offset relative to begin of input data area */ src_offs = scratch_offs / 8 - in_scratch_bytes; if (src_offs >= 0) { cnt = (scratch_offs % 8) ? 1 : 0; strm->inp_data_offs = src_offs + cnt; } else { /* scratch bytes must at least persist */ strm->inp_data_offs = 0; cnt = in_scratch_bytes - scratch_offs/8; } /* if output buffer is NOT full, copy all remaining input bytes if output buffer is full, copy only a partial byte to scratch. */ if (strm->avail_out > strm->outp_returned) { /* not full */ /* take into account if format != DEFLATE */ cnt += strm->avail_in - strm->inp_data_offs; strm->inp_data_offs = strm->avail_in; } strm->scratch_bits = cnt * 8 - (scratch_offs % 8); strm->scratch_ib = scratch_offs % 8; /* NOTE: This is the same as in extract_new_tree() */ /* copy abs(src_offs) bytes behind the tree ... */ if (cnt > 0 && src_offs < 0) { src = strm->wsp->tree + strm->in_hdr_scratch_len + src_offs; memmove(target, src, abs(src_offs)); target += abs(src_offs); cnt -= abs(src_offs); src_offs = 0; } /* copy remaining cnt bytes ... */ if (cnt) { src = strm->next_in + src_offs; memmove(target, src, cnt); } } /** * @brief Procsses header and tree area in workspace * * HW reported that a complete tree was found. Called when hdr-bits * are provided and none written so far. * * HW reported that a complete tree was found. HDR_START then * represents offset in decomp's input-data which is composed of HDR + * TREE + SCRATCH + INPUT_STREAM Copy header to start of tree area in * workspace. */ static void setup_tree(zedc_streamp strm) { uint64_t hdr_start_total_bits; /* * If End-Of-Block has been passed or reached, all tree * parameters are obsolete, a new tree is expected. */ if (strm->infl_stat & INFL_STAT_PASSED_EOB) { strm->tree_bits = 0; strm->pad_bits = 0; strm->hdr_ib = 0; if (strm->infl_stat & INFL_STAT_REACHED_EOB) { /* on eob */ strm->out_hdr_bits = 0; strm->out_hdr_start_bits = 0; /* got from DDCB */ } if (strm->infl_stat & INFL_STAT_FINAL_EOB) { strm->inp_data_offs = strm->in_data_used; strm->scratch_bits = 0; strm->eob_seen = 1; /* final EOB seen */ return; } } hdr_start_total_bits = (uint64_t)strm->hdr_start * 8 + strm->out_hdr_start_bits; /* Have we found a NEW header? */ /* * out_hdr_bits must indicate a header but it's not a new * header if header start/_bits == 0, and in_hdr_bit != 0 * (tree exists at offset 0 of given tree) */ if ((strm->out_hdr_bits > 0) && ((hdr_start_total_bits > 0) || (strm->in_hdr_bits == 0))) { strm->tree_bits = strm->out_hdr_bits; extract_new_tree(strm); } scratch_update(strm); } /** * @brief if an EOB marker was passed all tree and scratch data become * obsolete. If HDR_START > 0 then copy tree data from * input buffer to scratch area. * As long as input_processed = 0 input data must be * collected in scratch. * If a valid tree is detected (out_hdr_bits > 0) the tree must * be conserved in scratch and padding bytes must be appended. * * @param strm inflate job context * @param asv pointer to ASV part of DDCB */ static int post_scratch_upd(zedc_streamp strm) { const uint8_t *src; uint8_t *target; uint16_t len; uint64_t count; zedc_handle_t zedc = (zedc_handle_t)strm->device; /* anything processed ? */ if (strm->inp_processed || strm->proc_bits) { count = inp_proc_update(strm->inp_processed, strm->proc_bits, strm->pre_scratch_bits); strm->in_data_used = count; setup_tree(strm); } /* if no input data processed copy input-data to tree area */ if ((strm->inp_processed == 0) && (strm->proc_bits == 0)) { /* Special CASE: empty Input */ if (strm->avail_in > (ZEDC_TREE_LEN - strm->in_hdr_scratch_len)) { pr_err("scratch buffer too small\n"); zedc->zedc_rc = ZEDC_ERR_TREE_OVERRUN; return zedc->zedc_rc; } if (strm->avail_in) { target = strm->wsp->tree + strm->in_hdr_scratch_len; src = (uint8_t *)strm->next_in; memcpy(target, src, strm->avail_in); strm->inp_data_offs += strm->avail_in; strm->scratch_bits += strm->avail_in * 8; } } /* * If we cut within a copyblock a new header must be provided * representing the remaining bytes in the block. Overwriting * tree is valid because copy blocks always end on a byte * boundary. OUT_HDR_BITS will always be 40 header type must * be checked (HW 243728). */ if ((strm->copyblock_len) && ((strm->infl_stat & INFL_STAT_HDR_TYPE) == 0) && (strm->out_hdr_bits != 0)) { target = strm->wsp->tree; len = strm->copyblock_len; if (strm->infl_stat & INFL_STAT_HDR_BFINAL) /* final block? */ target[0] = 0x01; /* restore final block */ else target[0] = 0x00; *(uint16_t *)(target + 1) = __cpu_to_le16(len); *(uint16_t *)(target + 3) = __cpu_to_le16(~len); *(uint16_t *)(target + 5) = 0xaaaa; /* dummy */ strm->hdr_ib = 0; strm->tree_bits = 40; /* 5 bytes */ strm->pad_bits = 24; /* total 64bit */ } return ZEDC_OK; } /** * @brief Remove ZLIB header from inflate stream * ZLIB has two fixed header bytes and optionally * a four byte Dictionary ID * @param strm inflate stream context * @return < 0 if compliance check fails * 0 if success * * A zlib stream has the following structure: * * 0 1 * +---+---+ * |CMF|FLG| (more-->) * +---+---+ * * (if FLG.FDICT set) * * 0 1 2 3 * +---+---+---+---+ * | DICTID | (more-->) * +---+---+---+---+ * * +=====================+---+---+---+---+ * |...compressed data...| ADLER32 | * +=====================+---+---+---+---+ */ static int inflate_rem_zlib_header(struct zedc_stream_s *strm) { uint16_t val16; head_state next_state = strm->header_state; /* Current State */ bool more_data = false; int rc = INFLATE_HDR_OK; if (strm->prefx_idx < 1) { /* min header bytes collected ? */ strm->header_state = HEADER_START; return INFLATE_HDR_NEED_MORE_DATA; } while ((next_state != HEADER_DONE) && (false == more_data)) { switch (next_state) { case HEADER_START: if (strm->prefx_idx == 1) { val16 = ((uint16_t)strm->prefx[0] << 8) + strm->prefx[1]; if ((val16 % 31) != 0) { pr_err("ZLIB header invalid (FCHECK)\n"); return INFLATE_HDR_ERROR; } /* check CMF */ if (((val16 & 0x0f00) != 0x0800) || ((val16 & 0xf000) > 0x7000)) { pr_err("ZLIB header invalid (CMF)\n"); return INFLATE_HDR_ERROR; } if (val16 & 0x0020) { /* bit 5 of FLG = FDICT */ next_state = ZLIB_ADLER; more_data = true; } else next_state = HEADER_DONE; } else more_data = true; break; case ZLIB_ADLER: if (strm->prefx_idx == 5) { /* zlib header with adler32 data ... */ strm->dict_adler32 = ((uint32_t)strm->prefx[2] << 24 | (uint32_t)strm->prefx[3] << 16 | (uint32_t)strm->prefx[4] << 8 | (uint32_t)strm->prefx[5]); strm->adler32 = strm->dict_adler32; strm->havedict = 0; next_state = HEADER_DONE; rc = INFLATE_HDR_ZLIB_NEED_DICT; } else more_data = true; break; case HEADER_DONE: default: break; } } strm->header_state = next_state; if (more_data) rc = INFLATE_HDR_NEED_MORE_DATA; return rc; /* can be INFLATE_HDR_OK or INFLATE_HDR_ZLIB_NEED_DICT */ } /** * @brief Remove GZIP header from inflate stream * GZIP can have a variable amount of header data * depending on the FLAGs set. Re-enter until all * flags are processed * * @param strm inflate stream context * * @return 0 success * 1 need more data * 3 compliance check fails */ static int inflate_rem_gzip_header(struct zedc_stream_s *strm) { uint8_t flg; /* GZIP FLG Byte */ struct gzedc_header_s *gz_h; int my_idx = 0; head_state next_state = strm->header_state; /* Current State */ bool more_data = false; if (strm->prefx_idx < 9) /* min header bytes collected ? */ return INFLATE_HDR_NEED_MORE_DATA; /* Get more data */ gz_h = strm->gzip_head; if (strm->prefx_idx == 9) strm->header_state = HEADER_START; /* Current State = Start */ flg = strm->prefx[3]; /* Get FLG Byte */ while ((next_state != HEADER_DONE) && (false == more_data)) { switch (next_state) { case HEADER_START: if ((strm->prefx[0] != 0x1f) || /* ID1 */ (strm->prefx[1] != 0x8b) || /* ID2 */ (strm->prefx[2] != 0x08)) { /* CM */ return INFLATE_HDR_ERROR; /* Fault */ } if (gz_h) { /* Get time, xflags and os */ unsigned int tmp; memcpy(&tmp, &strm->prefx[4], 4); gz_h->time = __le32_to_cpu(tmp); gz_h->xflags = strm->prefx[8]; gz_h->os = strm->prefx[9]; } /* next is check flag */ next_state = FLAGS_CHECK_EMPTY; break; case FLAGS_CHECK_EMPTY: if (flg == 0) next_state = HEADER_DONE; else next_state = FLAGS_CHECK_EXTRA; break; case FLAGS_CHECK_EXTRA: if (flg & 0x04) { /* FEXTRA bit set ? */ more_data = true; next_state = FLAGS_GET_EXTRA_LEN1; /* FNAME is next */ } else next_state = FLAGS_CHECK_FNAME; break; case FLAGS_GET_EXTRA_LEN1: strm->xlen = (uint16_t)*strm->next_in; /* Reset Index to get extra data */ strm->gzip_header_idx = 0; more_data = true; next_state = FLAGS_GET_EXTRA_LEN2; /* Next State */ break; case FLAGS_GET_EXTRA_LEN2: strm->xlen |= (uint16_t)*strm->next_in << 8; if (gz_h) /* Save for get Header */ gz_h->extra_len = strm->xlen; next_state = FLAGS_GET_EXTRA; /* Next State */ more_data = true; break; case FLAGS_GET_EXTRA: /* get Extra binary data */ if (1 == strm->xlen) { /* FNAME is Next State */ next_state = FLAGS_CHECK_FNAME; more_data = false; } else { strm->xlen--; more_data = true; } if (gz_h) { my_idx = strm->gzip_header_idx; /* Get index */ if (my_idx < (int)gz_h->extra_max) { gz_h->extra[my_idx++] = *strm->next_in; strm->gzip_header_idx = my_idx; /* and save back */ } else return INFLATE_HDR_ERROR;/* Fault */ } break; case FLAGS_CHECK_FNAME: if (flg & 0x08) { /* FNAME bit set ? */ next_state = FLAGS_GET_FNAME; more_data = true; strm->gzip_header_idx = 0; /* Reset index */ } else next_state = FLAGS_CHECK_FCOMMENT; break; case FLAGS_GET_FNAME: if (gz_h) { my_idx = strm->gzip_header_idx; /* Get index */ if (my_idx < (int)gz_h->name_max) { gz_h->name[my_idx++] = *strm->next_in; strm->gzip_header_idx = my_idx; /* and save back */ } else return INFLATE_HDR_ERROR; /* Fault */ } if (*strm->next_in == 0) next_state = FLAGS_CHECK_FCOMMENT; /* check FCOMMENT */ else more_data = true; break; case FLAGS_CHECK_FCOMMENT: if (flg & 0x10) { /* FCOMMENT bit set ? */ more_data = true; /* get FCOMMENT */ next_state = FLAGS_GET_FCOMMENT; /* Reset index */ strm->gzip_header_idx = 0; } else next_state = FLAGS_CHECK_FHCRC; break; case FLAGS_GET_FCOMMENT: if (gz_h) { my_idx = strm->gzip_header_idx; /* Get index */ if (my_idx < (int)gz_h->comm_max) { gz_h->comment[my_idx++] = *strm->next_in; strm->gzip_header_idx = my_idx; /* and save back */ } else return INFLATE_HDR_ERROR; /* Fault */ } if (*strm->next_in == 0) next_state = FLAGS_CHECK_FHCRC; /* FHCRC is Next State */ else more_data = true; /* Get more data */ break; case FLAGS_CHECK_FHCRC: if (flg & 0x02) { /* FHCRC bit set ? */ more_data = true; next_state = FLAGS_GET_FHCRC1; } else next_state = FLAGS_CHECK_FTEXT; break; case FLAGS_GET_FHCRC1: strm->gzip_hcrc = (uint16_t)*strm->next_in; /* Get 1st Byte */ next_state = FLAGS_GET_FHCRC2; /* Next is 2nd byte */ more_data = true; break; case FLAGS_GET_FHCRC2: /* 2nd byte of FHCRC */ strm->gzip_hcrc |= (uint16_t)*strm->next_in << 8; /* Need more work here to compare deflate and inflate */ next_state = FLAGS_CHECK_FTEXT; /* Check FTEXT */ break; case FLAGS_CHECK_FTEXT: if (flg & 0x01) { /* FTEXT bit set ? */ if (gz_h) gz_h->text = 1; /* Set Text flag */ } next_state = HEADER_DONE; /* Exit while */ break; case HEADER_DONE: /* never reach this */ default: /* only to make gcc happy */ break; } } if (HEADER_DONE == next_state) { if (gz_h) gz_h->done = 1; } strm->header_state = next_state; if (more_data) return INFLATE_HDR_NEED_MORE_DATA; return INFLATE_HDR_OK; } /** * @brief Remove header from GZIP or ZLIB files to get a plain * inflate coded stream * @param strm inflate stream context * @return < 0 if GZIP/ZLIB header is invalid * 0 if success */ static int inflate_format_rem_header(struct zedc_stream_s *strm, int flush) { int rc, rc1; int block_req = 0; zedc_handle_t zedc = NULL; if (strm->format == ZEDC_FORMAT_DEFL) return ZEDC_OK; /* no header for DEFLATE/INFLATE */ strm->data_type &= ~0x80; if (strm->prefx_len == 0) { /* removing not yet prepared ? */ strm->prefx_idx = 0; if (strm->format == ZEDC_FORMAT_GZIP) strm->prefx_len = 10; /* min bytes to remove */ else strm->prefx_len = 2; /* format = ZLIB */ if (ZEDC_BLOCK == flush) block_req = 1; } /* * Copy header bytes to local buffer * GZIP can have 'Extra Bytes' and 'Filename' in header. * * Restructuring this loop might help to avoid calling the * rem_*_header() functions too often. */ rc = ZEDC_OK; rc1 = 0; while (strm->avail_in) { if (strm->prefx_idx < ZEDC_FORMAT_STORAGE) strm->prefx[strm->prefx_idx] = *strm->next_in; if (strm->format == ZEDC_FORMAT_GZIP) rc1 = inflate_rem_gzip_header(strm); else rc1= inflate_rem_zlib_header(strm); strm->next_in++; strm->avail_in--; strm->total_in++; strm->prefx_idx++; if (INFLATE_HDR_OK == rc1) { rc = ZEDC_OK; break; } else if (INFLATE_HDR_ERROR == rc1) { zedc = (zedc_handle_t)strm->device; zedc->zedc_rc = ZEDC_ERR_GZIP_HDR; rc = ZEDC_ERR_GZIP_HDR; break; /* Error */ } else if (INFLATE_HDR_ZLIB_NEED_DICT == rc1) { rc = ZEDC_NEED_DICT; break; } /* Continue with INFLATE_HDR_MORE_DATA */ } if ((1 == block_req) && (ZEDC_OK == rc)) strm->data_type |= 0x80; /* Set Signal in data_type */ return rc; } /** * @brief Remove trailer from gzip (RFC1952) or ZLIB (RFC1950) * encoded files * A signal is needed to indicate End-Of-Final-Block has * been detected * @param strm decompression job context */ static int inflate_format_rem_trailer(struct zedc_stream_s *strm) { uint32_t val32[2]; zedc_handle_t zedc = (zedc_handle_t)strm->device; if (strm->format == ZEDC_FORMAT_DEFL) return ZEDC_OK; /* no trailer for DEFLATE/INFLATE */ if (strm->postfx_len == 0) { /* removing not yet prepared ? */ strm->postfx_idx = 0; if (strm->format == ZEDC_FORMAT_GZIP) strm->postfx_len = 8; /* GZIP: LEN/CRC32 */ else strm->postfx_len = 4; /* ZLIB: ADLER32 */ } /* save trailer to local buffer */ while ((strm->postfx_idx < strm->postfx_len) && strm->avail_in) { /* Can postfx_idx exceed the size of the postfx buffer if the input data is too large? */ strm->postfx[strm->postfx_idx++] = *strm->next_in++; strm->avail_in--; strm->total_in++; /* * After 4 trailing bytes the checksum in both formats * is present and can be verified. */ if ((strm->postfx_idx == 4) && (strm->format == ZEDC_FORMAT_GZIP)) { memcpy(&val32[0], &strm->postfx[0], 4); strm->file_crc32 = __le32_to_cpu(val32[0]); if (strm->file_crc32 != strm->crc32) { zedc->zedc_rc = ZEDC_ERR_CRC32; return zedc->zedc_rc; } } if (strm->postfx_idx >= strm->postfx_len) { if (strm->format == ZEDC_FORMAT_GZIP) { /* remaining eight bytes */ memcpy(&val32[0], &strm->postfx[0], 8); /* val32[0] = CRC32 from GZIP stream */ /* val32[1] = ISIZE from GZIP stream */ strm->file_crc32 = __le32_to_cpu(val32[0]); strm->file_size = __le32_to_cpu(val32[1]); /* compare trailer info and HW result */ if (strm->file_crc32 != strm->crc32) { zedc->zedc_rc = ZEDC_ERR_CRC32; return zedc->zedc_rc; } } else { /* remaining 4 bytes (BE adler32) */ memcpy(&val32[0], &strm->postfx[0], 4); /* val32[0] = ADLER32 from ZLIB stream */ strm->file_adler32 = __be32_to_cpu(val32[0]); /* same value as HW returned ? */ if (strm->file_adler32 != strm->adler32) { pr_err("ADLER32 mismatch: " "%08llx/%08llx\n", (long long)strm->file_adler32, (long long)strm->adler32); zedc->zedc_rc = ZEDC_ERR_ADLER32; return zedc->zedc_rc; } } } } if (strm->postfx_idx == strm->postfx_len) return ZEDC_OK; /* removing done */ return 1; /* must re-enter */ } /** * @brief Figure out if data is left from previous task due to * insufficent output buffer space. * @param strm decompression job context */ int zedc_inflate_pending_output(struct zedc_stream_s *strm) { return strm->obytes_in_dict; } /** * @brief If data is left from previous task due to insufficent * output buffer space, this data must first be stored * to the new output buffer. * @param strm decompression job context */ static int inflate_flush_output_buffer(struct zedc_stream_s *strm) { uint8_t *pdict; zedc_handle_t zedc = (zedc_handle_t)strm->device; if (strm->obytes_in_dict == 0) return ZEDC_OK; /* * Unstored data was temporarily stored by HW at the end of * dictionary. First restore these bytes if new output buffer * is available. */ /* FIXME rename 'dict_len' to 'out_dict_used' to match spec */ if (strm->dict_len < strm->obytes_in_dict) { pr_err("invalid 'obytes_in_dict' ZEDC_ERR_DICT_OVERRUN\n"); zedc->zedc_rc = ZEDC_ERR_DICT_OVERRUN; return zedc->zedc_rc; } /* obytes at end of dict */ pdict = strm->wsp->dict[strm->wsp_page] + strm->out_dict_offs + strm->dict_len - strm->obytes_in_dict; while (strm->avail_out && strm->obytes_in_dict) { *strm->next_out++ = *pdict++; strm->avail_out--; strm->total_out++; strm->obytes_in_dict--; } return ZEDC_OK; } /** * @brief Post-process for inflate (RFC 1951) * - save necessary states for 'save & restore' * - store remaining data if output buffer is full * @param strm decompression job context * @param asv pointer to ASV area of processed DDCB * @return 0 if successful */ static void get_inflate_asv(struct zedc_stream_s *strm, struct zedc_asv_infl *asv) { /* * If HW was not able to decompress data due to insufficient * data INP_PROCESSED=0 is returned. Then additional input * data is needed. Some output fields in DDCB don't represent * its real values and must be left in its previous state. * * Invert condition. Condition means hw processed some data. * If hardware was unable to process data, we need more input! */ if ((asv->inp_processed != 0) || (asv->proc_bits != 0)) { strm->out_hdr_bits = __be16_to_cpu(asv->out_hdr_bits); strm->hdr_start = __be32_to_cpu(asv->hdr_start); strm->out_hdr_start_bits = asv->hdr_start_bits; } strm->copyblock_len = __be16_to_cpu(asv->copyblock_len); strm->crc32 = __be32_to_cpu(asv->out_crc32); strm->adler32 = __be32_to_cpu(asv->out_adler32); /* prepare dictionary for next call */ strm->dict_len = __be16_to_cpu(asv->out_dict_used); strm->out_dict_offs = asv->out_dict_offs; strm->outp_returned = __be32_to_cpu(asv->outp_returned); strm->inp_processed = __be32_to_cpu(asv->inp_processed); strm->proc_bits = asv->proc_bits; /* store values needed for next call */ strm->obytes_in_dict = __be16_to_cpu(asv->obytes_in_dict); strm->infl_stat = asv->infl_stat; } /** * @brief Set ASIV part in Inflate DDCB * @param cmd command params from user * @param strm decompression job context * @param asiv pointer to ASIV part of corresponding DDCB * @return always 0 */ static void set_inflate_asiv(struct zedc_stream_s *strm, struct zedc_asiv_infl *asiv) { int p; uint64_t len; /* genwqe_hexdump(stderr, strm->next_in, MIN(strm->avail_in, (unsigned int)0x20)); */ asiv->in_buff = __cpu_to_be64((unsigned long)strm->next_in); asiv->in_buff_len = __cpu_to_be32(strm->avail_in); asiv->out_buff = __cpu_to_be64((unsigned long)strm->next_out); asiv->out_buff_len = __cpu_to_be32(strm->avail_out); /* setup header tree and scratch area */ asiv->inp_scratch = __cpu_to_be64((unsigned long)strm->wsp->tree); len = strm->hdr_ib + strm->tree_bits + strm->pad_bits + strm->scratch_ib + strm->scratch_bits; if (len % 8ULL) pr_warn("[%s] in_hdr_scratch_len: 0x%llx not consistent " "(0x%x 0x%x 0x%x 0x%x)\n", __func__, (long long)len, (unsigned int)strm->tree_bits, (unsigned int)strm->pad_bits, (unsigned int)strm->scratch_ib, (unsigned int)strm->scratch_bits); strm->in_hdr_scratch_len = (uint32_t)(len / 8ULL); strm->pre_scratch_bits = strm->tree_bits + strm->scratch_bits; /* This must not exceed ZEDC_TREE_LEN */ if (strm->in_hdr_scratch_len > ZEDC_TREE_LEN) pr_warn("[%s] in_scratch_len=%d exceeds ZEDC_TREE_LEN=%d\n", __func__, strm->in_hdr_scratch_len, ZEDC_TREE_LEN); asiv->in_scratch_len = __cpu_to_be32(strm->in_hdr_scratch_len); asiv->scratch_ib = strm->scratch_ib; asiv->hdr_ib = strm->hdr_ib; strm->in_hdr_bits = strm->tree_bits; asiv->in_hdr_bits = __cpu_to_be16(strm->in_hdr_bits); /* toggle dictionary page */ p = strm->wsp_page; asiv->in_dict = __cpu_to_be64((unsigned long)strm->wsp->dict[p] + strm->out_dict_offs); asiv->out_dict = __cpu_to_be64((unsigned long)strm->wsp->dict[p ^ 1]); strm->wsp_page ^= 1; asiv->in_dict_len = __cpu_to_be32(strm->dict_len); asiv->out_dict_len = __cpu_to_be32(ZEDC_DICT_LEN); asiv->in_crc32 = __cpu_to_be32(strm->crc32); asiv->in_adler32 = __cpu_to_be32(strm->adler32); } static int __save_buf_to_file(const char *fname, const uint8_t *buff, int len) { int rc; FILE *fp; if (buff == NULL) return ZEDC_ERR_INVAL; if (len == 0) return ZEDC_ERR_INVAL; pr_err("preserving %s %d bytes ...\n", fname, len); fp = fopen(fname, "w+"); if (!fp) { pr_err("Cannot open file %s: %s\n", fname, strerror(errno)); return ZEDC_ERRNO; } rc = fwrite(buff, len, 1, fp); if (rc != 1) { pr_err("Cannot write all data: %d\n", rc); fclose(fp); return ZEDC_ERRNO; } fclose(fp); return ZEDC_OK; } int zedc_inflateSaveBuffers(zedc_streamp strm, const char *prefix) { int rc; struct zedc_asiv_infl *asiv; struct ddcb_cmd *cmd; char fname[_POSIX_PATH_MAX]; if (!strm) return ZEDC_STREAM_ERROR; cmd = &strm->cmd; asiv = (struct zedc_asiv_infl *)&cmd->asiv; /* Buffers to dump */ snprintf(fname, sizeof(fname) - 1, "%s_in_buff.bin", prefix); rc = __save_buf_to_file(fname, (void *)(unsigned long) __be64_to_cpu(asiv->in_buff), __be32_to_cpu(asiv->in_buff_len)); if (rc != ZEDC_OK) return rc; snprintf(fname, sizeof(fname) - 1, "%s_out_buf.bin", prefix); __save_buf_to_file(fname, (void *)(unsigned long) __be64_to_cpu(asiv->out_buff), __be32_to_cpu(asiv->out_buff_len)); if (rc != ZEDC_OK) return rc; snprintf(fname, sizeof(fname) - 1, "%s_in_dict.bin", prefix); __save_buf_to_file(fname, (void *)(unsigned long) __be64_to_cpu(asiv->in_dict), __be32_to_cpu(asiv->in_dict_len)); if (rc != ZEDC_OK) return rc; snprintf(fname, sizeof(fname) - 1, "%s_out_dict.bin", prefix); __save_buf_to_file(fname, (void *)(unsigned long) __be64_to_cpu(asiv->out_dict), __be32_to_cpu(asiv->out_dict_len)); if (rc != ZEDC_OK) return rc; snprintf(fname, sizeof(fname) - 1, "%s_inp_scratch.bin", prefix); __save_buf_to_file(fname, (void *)(unsigned long) __be64_to_cpu(asiv->inp_scratch), __be32_to_cpu(asiv->in_scratch_len)); if (rc != ZEDC_OK) return rc; return ZEDC_OK; } /** * @brief main function for decompression * @param strm Common zedc parameter set. * @param flush Flush mode. * @return ZEDC_OK, ZEDC_STREAM_END, ZEDC_STREAM_ERROR, * ZEDC_MEM_ERROR. * * Review error conditions. E.g. some functions do not have a return * code. Is that ok or do we need to add it? */ int zedc_inflate(zedc_streamp strm, int flush) { int rc, zrc; uint32_t len; struct zedc_asiv_infl *asiv; struct zedc_asv_infl *asv; zedc_handle_t zedc; struct ddcb_cmd *cmd; unsigned int i, tries = 1; uint64_t out_dict = 0x0; uint32_t out_dict_len = 0x0; if (!strm) return ZEDC_STREAM_ERROR; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; cmd = &strm->cmd; ddcb_cmd_init(cmd); /* clear completely */ /* * A limitation is needed to prevent internal overflow input * buffer must be smaller than 4GiB - 1KiB since additional * data can be added for S&R purposes. */ if (strm->avail_in > ZEDC_INFL_AVAIL_IN_MAX) { pr_err("input buffer too large\n"); return ZEDC_MEM_ERROR; } strm->flush = flush; strm->inp_data_offs = 0; /* * Pre-processing, restore data from previous task and copy * obytes to output buffer. */ rc = inflate_flush_output_buffer(strm); if (rc) { pr_err("inflate failed rc=%d\n", rc); return ZEDC_STREAM_ERROR; } /* Did we reach End-Of-Final-Block (or seen it before) ? */ if (strm->infl_stat & INFL_STAT_FINAL_EOB) strm->eob_seen = 1; /* final EOB seen */ if (strm->eob_seen) { /* remove ZLIB/GZIP trailer */ rc = inflate_format_rem_trailer(strm); if (rc < 0) /* CRC or ADLER check failed */ return ZEDC_DATA_ERROR; if (rc == 1) return ZEDC_OK; /* need more trailer data */ if (strm->obytes_in_dict == 0) return ZEDC_STREAM_END; return ZEDC_OK; /* must re-enter */ } /* Output buffer now full ? */ if (strm->avail_out == 0) return ZEDC_OK; /* must re-enter */ /* Remove potential ZLIB/GZIP prefix */ if (HEADER_DONE != strm->header_state) { rc = inflate_format_rem_header(strm, flush); if (ZEDC_OK != rc) return rc; } if (strm->data_type & 0x80) return ZEDC_OK; /* must re-enter */ /* Exit if no input data present */ if ((strm->avail_in == 0) && (strm->scratch_bits == 0)) goto chk_ret; /* check stat and return END or OK */ /* Prepare Inflate DDCB */ cmd->cmd = ZEDC_CMD_INFLATE; cmd->acfunc = DDCB_ACFUNC_APP; cmd->asiv_length = 0x70 - 0x18; /* parts to be crc protected */ cmd->asv_length = 0xc0 - 0x80; cmd->ats = 0; cmd->cmdopts = 0x0; asiv = (struct zedc_asiv_infl *)&cmd->asiv; asv = (struct zedc_asv_infl *)&cmd->asv; /* input buffer: Use always SGL here */ if ((strm->dma_type[ZEDC_IN] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_infl, in_buff, ATS_TYPE_FLAT_RD); else cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_infl, in_buff, ATS_TYPE_SGL_RD); /* output buffer */ if ((strm->dma_type[ZEDC_OUT] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_infl, out_buff, ATS_TYPE_FLAT_RDWR); else cmd->ats |= ATS_SET_FLAGS(struct zedc_asiv_infl, out_buff, ATS_TYPE_SGL_RDWR); /* workspace */ if ((strm->dma_type[ZEDC_WS] & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) { cmd->ats |= (ATS_SET_FLAGS(struct zedc_asiv_infl, in_dict, ATS_TYPE_FLAT_RD) | ATS_SET_FLAGS(struct zedc_asiv_infl, out_dict, ATS_TYPE_FLAT_RDWR) | ATS_SET_FLAGS(struct zedc_asiv_infl, inp_scratch, ATS_TYPE_FLAT_RDWR)); } else { cmd->ats |= (ATS_SET_FLAGS(struct zedc_asiv_infl, in_dict, ATS_TYPE_SGL_RD) | ATS_SET_FLAGS(struct zedc_asiv_infl, out_dict, ATS_TYPE_SGL_RDWR) | ATS_SET_FLAGS(struct zedc_asiv_infl, inp_scratch, ATS_TYPE_SGL_RDWR)); } if (strm->flags & ZEDC_FLG_CROSS_CHECK) cmd->cmdopts |= DDCB_OPT_INFL_RAS_CHECK; /* + RAS */ /* Setup ASIV part (in big endian byteorder) */ set_inflate_asiv(strm, (struct zedc_asiv_infl *)&cmd->asiv); /* * Optimization attempt: If we are called with Z_FINISH, and we * assume that the data will fit into the provided output * buffer, we try to run the hardware without dictionary save * function. If we do not see INFL_STAT_FINAL_EOB, we need to * restart with dictionary save option. * * The desire is to keep small transfers efficient. It will * not have significant effect if we deal with huge data * streams. */ cmd->cmdopts |= DDCB_OPT_INFL_SAVE_DICT; /* SAVE_DICT */ tries = 1; if ((strm->flags & ZEDC_FLG_SKIP_LAST_DICT) && (flush == ZEDC_FINISH) && (strm->avail_out > strm->avail_in * 2)) { //static int count = 0; out_dict = asiv->out_dict; out_dict_len = asiv->out_dict_len; cmd->cmdopts &= ~DDCB_OPT_INFL_SAVE_DICT; asiv->out_dict = 0x0; asiv->out_dict_len = 0x0; tries = 2; //if (count++ < 2) // fprintf(stderr, "[%s] Try to optimize dict transfer: " // "avail_in=%d avail_out=%d\n", // __func__, strm->avail_in, strm->avail_out); } for (i = 0; i < tries; i++) { /* Execute inflate in HW */ zedc_asiv_infl_print(strm); rc = zedc_execute_request(zedc, cmd); zedc_asv_infl_print(strm); strm->retc = cmd->retc; strm->attn = cmd->attn; strm->progress = cmd->progress; /* * Dynamic/Fixed block decode: Distance is too far * back in the dictionary: (RETC=104 ATTN=801a * PROGR=0). */ if ((rc == DDCB_ERR_EXEC_DDCB) && (cmd->retc == DDCB_RETC_FAULT) && (cmd->attn == 0x801A)) { strm->adler32 = strm->dict_adler32; pr_err("inflate ZEDC_NEED_DICT\n"); return ZEDC_NEED_DICT; } /* * GenWQE treats success or failure a little * differently than the CAPI implementation. CAPI * flags success, if the DDCB was treated by hardware * at all. This includes cases where RETC is not * 0x102. For GenWQE we flag success only if there is * a RETC of 0x102, this is done in the Linux driver. * * Doing this wrong, can lead to problems in the code * below, which processes DDCB result data, which * might not be valid, e.g. memmove() with wrong size. */ if ((rc < 0) || (cmd->retc != DDCB_RETC_COMPLETE)) { struct ddcb_cmd *cmd = &strm->cmd; pr_err("inflate failed rc=%d\n" "DDCB returned (RETC=%03x ATTN=%04x PROGR=%x) " "%s\n", rc, cmd->retc, cmd->attn, cmd->progress, cmd->retc == 0x102 ? "" : "ERR"); return ZEDC_STREAM_ERROR; } /* Wonderful, we have all the data we need, stop processing */ if (asv->infl_stat & INFL_STAT_FINAL_EOB) break; /* What a pity, we guessed wrong and need to repeat. We did not see the last byte in the last block yet! */ if ((strm->flags & ZEDC_FLG_SKIP_LAST_DICT) && (flush == ZEDC_FINISH)) { cmd->cmdopts |= DDCB_OPT_INFL_SAVE_DICT; asiv->out_dict = out_dict; asiv->out_dict_len = out_dict_len; pr_warn("[%s] What a pity, we guessed wrong " "and need to repeat\n", __func__); } } get_inflate_asv(strm, asv); rc = post_scratch_upd(strm); if (rc < 0) { pr_err("inflate scratch update failed rc=%d\n", rc); return ZEDC_STREAM_ERROR; } /* Sanity check: Hardware bug Get length of output data. Can also be 0! */ if (strm->outp_returned > strm->avail_out) { pr_err("OUTP_RETURNED too large (0x%x)\n", strm->outp_returned); return ZEDC_STREAM_ERROR; } strm->next_out += strm->outp_returned; strm->avail_out -= strm->outp_returned; strm->total_out += strm->outp_returned; /* Sanity check: Hardware claims to have processed more input data than offered. */ len = strm->inp_data_offs; /* Just input bytes from next_in, not repeated tree, hdr, scratch bits */ /* fprintf(stderr, "LEN(%s): len=%d\n", __func__, len); */ if (len > strm->avail_in) { pr_err("consumed=%u/avail_in=%u\n", len, strm->avail_in); goto abort; } strm->next_in += len; strm->avail_in -= len; strm->total_in += len; zrc = ZEDC_OK; /* preset 0 */ /* Did we reach End-Of-Final-Block (or seen it before) ? */ if (strm->infl_stat & INFL_STAT_FINAL_EOB) strm->eob_seen = 1; /* final EOB seen */ if (strm->eob_seen) { /* remove ZLIB/GZIP trailer */ rc = inflate_format_rem_trailer(strm); if (rc < 0) /* CRC or ADLER check failed */ return ZEDC_DATA_ERROR; if (rc == 1) return ZEDC_OK; /* need more trailer data */ if (strm->obytes_in_dict == 0) return ZEDC_STREAM_END; return ZEDC_OK; /* must re-enter */ } /* If FEOB is in the middle of input and output is not excausted yet, it might be just ok. */ if (strm->avail_in && strm->avail_out) { pr_warn("[%s] input not completely processed " "(avail_in=%d avail_out=%d zrc=%d)\n", __func__, strm->avail_in, strm->avail_out, zrc); } return zrc; chk_ret: /* End of final block and no dict data to copy */ if ((strm->infl_stat & INFL_STAT_FINAL_EOB) && (strm->obytes_in_dict == 0)) return ZEDC_STREAM_END; /* done */ return ZEDC_OK; /* must re-enter */ abort: return ZEDC_STREAM_ERROR; } /** * @brief Initialize inflate state * @param strm Common zedc parameter set. */ static void __inflateInit_state(zedc_streamp strm) { strm->total_in = strm->total_out = 0; /* initialize workspace */ strm->wsp_page = 0; /* reset toggle input / output area */ strm->dict_len = 0; /* ensure empty dictionary */ strm->obytes_in_dict = 0; strm->out_dict_offs = 0; /* initialize GZIP/ZLIB returns */ strm->file_crc32 = 0; strm->file_adler32 = 0; strm->dict_adler32 = 0; /* initialize inflate */ strm->total_in = 0; strm->total_out = 0; strm->crc32 = 0; strm->adler32 = 1; strm->eob_seen = 0; strm->havedict = 0; strm->in_hdr_scratch_len = 0; strm->in_hdr_bits = 0; strm->hdr_ib = 0; strm->scratch_ib = 0; strm->scratch_bits = 0; /* HW.... was missing */ strm->inp_processed = 0; strm->outp_returned = 0; strm->proc_bits = 0; strm->infl_stat = 0; strm->hdr_start = 0; strm->out_hdr_bits = 0; strm->out_hdr_start_bits = 0; strm->copyblock_len = 0; strm->tree_bits = 0; strm->pad_bits = 0; strm->pre_scratch_bits = 0; strm->inp_data_offs = 0; strm->in_data_used = 0; /* Reset prefix and postfix buffers */ strm->prefx_len = 0; strm->prefx_idx = 0; memset(strm->prefx, 0, sizeof(strm->prefx)); strm->xlen = 0; strm->header_state = HEADER_START; strm->postfx_len = 0; strm->postfx_idx = 0; memset(strm->postfx, 0, sizeof(strm->postfx)); ddcb_cmd_init(&strm->cmd); /* clear completely */ } /** * @brief inflate initialization * @param strm common zedc parameter set * @return 0 if success */ int zedc_inflateInit2(zedc_streamp strm, int windowBits) { int rc; zedc_handle_t zedc; if (!strm) return ZEDC_STREAM_ERROR; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; if (!is_zedc(zedc)) return ZEDC_ERR_ILLEGAL_APPID; rc = zedc_alloc_workspace(strm); if (rc != ZEDC_OK) return rc; /* initialize inflate */ strm->windowBits = windowBits; __inflateInit_state(strm); /* initialize Save & Restore */ rc = zedc_format_init(strm); if (rc != ZEDC_OK) { /* presets for DEFLATE, GZIP, ZLIB */ zedc_free_workspace(strm); return rc; } return ZEDC_OK; } /** * @brief Reset inflate stream. Do not deallocate memory. * @param strm Common zedc parameter set * @param dictionary Alternate dictionary data to be used. * @return ZEDC_OK on success, else failure. */ int zedc_inflateSetDictionary(zedc_streamp strm, const uint8_t *dictionary, unsigned int dictLength) { uint32_t a32; if (strm == NULL) return ZEDC_STREAM_ERROR; if (dictLength > ZEDC_DICT_LEN) return ZEDC_STREAM_ERROR; if (strm->format == ZEDC_FORMAT_ZLIB) { a32 = __adler32(1, dictionary, dictLength); if (a32 != strm->dict_adler32) return ZEDC_DATA_ERROR; } memcpy(&strm->wsp->dict[0], dictionary, dictLength); strm->dict_len = dictLength; strm->havedict = 1; /* just need this once */ strm->adler32 = 1; /* back to default again */ return ZEDC_OK; } /** * @brief Get current input dictionary * @param strm Stream * @param dictionary dictionary buffer, 32KiB, used if not NULL * @param dictLength length of dictionary, returned if not NULL * @return ZEDC_OK on success, else failure. */ int zedc_inflateGetDictionary(zedc_streamp strm, uint8_t *dictionary, unsigned int *dictLength) { unsigned int p; uint8_t *in_dict; if (!strm) return ZEDC_STREAM_ERROR; if (dictLength) *dictLength = strm->dict_len; if (dictionary == NULL) return ZEDC_OK; p = strm->wsp_page; in_dict = strm->wsp->dict[p] + strm->out_dict_offs; memcpy(dictionary, in_dict, strm->dict_len); return ZEDC_OK; } /** * @brief Reset inflate stream. Do not deallocate memory. * @param strm Common zedc parameter set. * @return ZEDC_OK on success, else failure. */ int zedc_inflateReset(zedc_streamp strm) { int rc; if (!strm) return ZEDC_STREAM_ERROR; __inflateInit_state(strm); rc = zedc_format_init(strm); if (rc != ZEDC_OK) return rc; return ZEDC_OK; } int zedc_inflateReset2(zedc_streamp strm, int windowBits) { int rc; if (!strm) return ZEDC_STREAM_ERROR; __inflateInit_state(strm); strm->windowBits = windowBits; rc = zedc_format_init(strm); if (rc != ZEDC_OK) return rc; return ZEDC_OK; } /** * @brief End inflate (de-compress). * @param strm Common zedc parameter set. * @return ZEDC_OK on success, else failure. */ int zedc_inflateEnd(zedc_streamp strm) { zedc_handle_t zedc; if (!strm) return ZEDC_STREAM_ERROR; zedc = (zedc_handle_t)strm->device; if (!zedc) return ZEDC_STREAM_ERROR; zedc_free_workspace(strm); return ZEDC_OK; } int zedc_inflateGetHeader(zedc_streamp strm, gzedc_headerp head) { strm->gzip_head = head; head->done = 0; return ZEDC_OK; } genwqe-user-4.0.18/lib/libcard.c000066400000000000000000001550201303345043000163560ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Generic support to enqueue DDCB commands. Card maintenace functions * for bitstream and VPD support as well as register access for * debugging purposes. * * The DDCB commands can contain references to user memory. There are * two different ways to describe memory within DDCBs. The first one * is raw contignous memory allocated with help of the driver. Since * some OSes do cannot guaranteee more than one memory page * (e.g. 4KiB) contigously, there is a 2nd way to describe a memory * reference. This is done by passing the virtual user-space address * to the device driver and instructing it to build up a scatter * gather list, which describes the data. This can be done dynamically * or optimized by previously pinning the memory area used for data * processing. The unpinning is done when the file-descriptor is * closed or when the unpin function is called. * * When the card handle is opened with the GENWQE_MODE_ASYNC flag set, * the library will enable SIGIO generation by device driver. This is * needed to recover gracefully when a card died. * * In addition the library will start a health checking pthread, which * checks periodically every 1 sec if the file descriptors to use the * card are still usable. If not the broken file descriptors are * closed. When the last descriptor is closed the library will stop * the health thread. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "card_defs.h" #include "libcard.h" //#define CONFIG_USE_SIGNAL #undef CONFIG_USE_SIGNAL #define CRC32_POLYNOMIAL 0x20044009 #define MAX_GENWQ_CARDS 16 #define MAX_VFUNCTIONS 16 #define MAX_FUNC_NUM (MAX_GENWQ_CARDS * MAX_VFUNCTIONS) #define INVALID_FD -1 #define CONFIG_RETRY_TIMEOUT 30 /* timeout in sec in Multi Card Mode */ enum dev_state {DEV_ALLOC, DEV_HAVE_FD, DEV_REQ_CLOSE, DEV_FREE}; /* This is the internal structure for each Stream */ struct card_dev_t { int card_no; /* The Card# or GENWQE_CARD_REDUNDANT */ enum dev_state dev_state; /* The state of this Stream */ int mode; /* GENWQE_MODE_ASYNC | GENWQE_MODE_RDWR | * GENWQE_MODE_NONBLOCK */ int omode; int fd_s; /* fd for Single mode */ int drv_rc; /* driver return codes */ int drv_errno; /* driver errno */ uint64_t slu_id; /* service layer version */ uint64_t app_id; /* app version */ struct fd_node *m_fd_ptr; /* Ptr. to current fd in Multi fd list */ struct card_dev_t *next; /* Next Card (Single Mode only) */ struct card_dev_t *prev; /* Prev. Card (Single Mode only) */ struct card_dev_t *verify; /* My address again to verify */ }; enum genwqe_fd_state {CARD_CLOSED, CARD_OPEN}; enum inotify_ev {INOTIFY_IDLE, INOTIFY_ATTRIB}; struct lib_data_t { uint32_t crc32_tab[256]; /* CRC32 calculation table */ pthread_t thread_id; /* Thread id of Health thread or -1 */ sem_t health_sem; /* Sem to post healt thread */ int thread_rc; pthread_mutex_t fds_mutex; /* Lock Mutex */ int fd_s_count; /* # of open fd's in card_dev_t */ int fd_m_count; /* # of fd's in fd_m_list */ int m_mode_save; /* Mode when i open the device * in Multi Mode */ /* State of each card */ enum genwqe_fd_state genwqe_state[MAX_FUNC_NUM]; #if defined(CONFIG_USE_SIGNAL) struct sigaction oldact; /* Used for sigio */ struct sigaction newact; /* Used for sigio */ #endif /* more data for inotify */ int inotify_rc; /* rc form inotify_thread */ int inotify_fd; /* fd form inotify_create */ int inotify_wd; /* from inotify_add_watch and delete */ int inotify_card; /* the decimal card num (0..256) */ pthread_t inotify_tid; /* tid form _inotify_thread */ enum inotify_ev inotify_event; /* IDLE or CREATE or DELETE */ }; /* This is a list of fd's when i run in Multi (Redundant) Mode */ struct fd_node { int card_num; /* The Card Number */ int m_fd; /* fd for Multi (Redundant) mode */ struct fd_node *next; struct fd_node *prev; }; static struct lib_data_t lib_data; static struct dev_card_t *s_dev_head = NULL; /* Head for Single Mode */ static struct dev_card_t *m_dev_head = NULL; /* Head for Multi Mode */ static struct fd_node *__fd_m_list = NULL; /* statistics */ #define NUM_CARDS 16 /* max number of GenWQE cards in system */ static unsigned int card_completed_ddcbs[NUM_CARDS] = { 0, }; static unsigned int card_retried_ddcbs[NUM_CARDS] = { 0, }; #if defined(CONFIG_USE_SIGNAL) static unsigned int card_health_signal = 0; #endif static int _dbg_flag; static const char * const card_errlist[] = { [ABS(GENWQE_OK)] = "success", [ABS(GENWQE_ERRNO)] = "system error, please see errno", [ABS(GENWQE_ERR_CARD)] = "problem detected with card, please see errno and returned data", [ABS(GENWQE_ERR_OPEN)] = "could not get card handle", [ABS(GENWQE_ERR_VERS_MISMATCH)] = "libzcard version mismatch", [ABS(GENWQE_ERR_INVAL)] = "invalid parameter", [ABS(GENWQE_ERR_FLASH_VERIFY)] = "verification of flash failed", [ABS(GENWQE_ERR_FLASH_READ)] = "reading flash failed", [ABS(GENWQE_ERR_FLASH_UPDATE)] = "updating card failed", [ABS(GENWQE_ERR_GET_STATE)] = "cannot get state of card", [ABS(GENWQE_ERR_SIM)] = "simulation of card had a problem", [ABS(GENWQE_ERR_EXEC_DDCB)] = "error on ddcb execution occurred, please see errno and returned data", [ABS(GENWQE_ERR_PINNING)] = "memory buffer pinning error, see errno", [ABS(GENWQE_ERR_TESTMODE)] = "problem in testmode", [ABS(GENWQE_ERR_APPID)] = "not supported application id", }; static const int card_nerr __attribute__((unused)) = ARRAY_SIZE(card_errlist); const char *card_strerror(int errnum) { if (ABS(errnum) >= card_nerr) return NULL; return card_errlist[ABS(errnum)]; } static const char * const retc_errlist[] = { [ABS(DDCB_RETC_IDLE)] = "unexecuted/untouched DDCB", [ABS(DDCB_RETC_PENDING)] = "pending execution", [ABS(DDCB_RETC_COMPLETE)] = "command complete. no error", [ABS(DDCB_RETC_FAULT)] = "application error, recoverable, please see ATTN and PROGR", [ABS(DDCB_RETC_ERROR)] = "application error, non-recoverable, please see ATTN and PROGR", [ABS(DDCB_RETC_FORCED_ERROR)] = "overwritten by driver", [ABS(DDCB_RETC_UNEXEC)] = "unexecuted/removed from queue", [ABS(DDCB_RETC_TERM)] = "terminated", }; static const int retc_nerr __attribute__((unused)) = ARRAY_SIZE(retc_errlist); const char *retc_strerror(int errnum) { if (ABS(errnum) >= retc_nerr) return NULL; return retc_errlist[ABS(errnum)]; } static int __genwqe_card_get_state(int fd, enum genwqe_card_state *state) { return ioctl(fd, GENWQE_GET_CARD_STATE, state); } static int __mode_2_omode(int mode) { int omode = 0; // Create Open mode from mode if (mode & GENWQE_MODE_RDONLY) omode |= O_RDONLY; if (mode & GENWQE_MODE_WRONLY) omode |= O_WRONLY; if (mode & GENWQE_MODE_RDWR) omode |= O_RDWR; if (mode & GENWQE_MODE_NONBLOCK) omode |= O_NONBLOCK; /* Remove this checking, FASYNC will be set later on with fcntl */ //if (mode & GENWQE_MODE_ASYNC) //omode |= FASYNC; return omode; } static int __genwqe_dev_open(int card_no, int mode) { int fd; int omode; char card_dev[256]; // temp dev name omode = __mode_2_omode(mode); snprintf(card_dev, sizeof(card_dev) - 1, CARD_DEVICE, card_no); fd = open(card_dev, omode); if (fd < 0) return INVALID_FD; #if defined(CONFIG_USE_SIGNAL) if (GENWQE_MODE_ASYNC & mode) { int oflags; /* * Set FASYNC flag to catch the SIGIO when a card gets * removed. */ fcntl(fd, F_SETOWN, getpid()); oflags = fcntl(fd, F_GETFL); fcntl(fd, F_SETFL, oflags | FASYNC); } #endif pr_info("__genwqe_dev_open: %s OK fd: %d (omode: 0x%x mode: 0x%x)\n", card_dev, fd, omode, mode); return fd; } static struct fd_node *__fd_m_new(struct fd_node *parent, int fd, int card) { struct fd_node *node; node = malloc(sizeof(struct fd_node)); if (node) { node->m_fd = fd; node->card_num = card; node->next = NULL; node->prev = parent; if (parent) parent->next = node; } else pr_err("malloc failed\n"); return node; } static void __fd_m_head_all(void); static void __fd_m_add(int fd, int card) { struct fd_node *head; if (NULL == __fd_m_list) { __fd_m_list = __fd_m_new(NULL, fd, card); /* i am adding the first fd to my list, set pointer to head */ __fd_m_head_all(); } else { head = __fd_m_list; while (NULL != head->next) head = head->next; __fd_m_new(head, fd, card); } return; } static void __fd_m_del(int fd) { struct fd_node *this = __fd_m_list; while (this) { if (this->m_fd == fd) { if ((NULL == this->next) && (NULL == this->prev)) { /* Erase last */ __fd_m_list = NULL; } else if (NULL == this->next) { /* Erase tail */ this->prev->next = NULL; } else if (NULL == this->prev) { /* Erase 1st */ this->next->prev = NULL; __fd_m_list = this->next; } else { /* Erase middle */ this->prev->next = this->next; this->next->prev = this->prev; } free(this); return; } this = this->next; } pr_err("fd: %d not found in fd_m_list: %p\n", fd, __fd_m_list); return; } /* * Function: __fd_m_head_all() * * @brief: This function is called in the case whe a fd fails in Multi * fd mode. The card will then be removed from the list and it can be * tat any device using the fd_list is trying to get the next fd and * this fd is removed in the meantime. I go over all devices and reset * then m_fd_ptr to the head of the List. */ static void __fd_m_head_all(void) { struct card_dev_t *dev = (struct card_dev_t *)m_dev_head; while (dev) { dev->m_fd_ptr = __fd_m_list; // Set Head dev = dev->next; } } static int __fd_m_head(struct card_dev_t *dev) { struct fd_node *now; int fd = INVALID_FD; now = __fd_m_list; dev->m_fd_ptr = now; // Set Head if (now) fd = now->m_fd; pr_info("__fd_m_head at: %p fd: %d\n", now, fd); return fd; } /* * Note: fds_mutex must be held. Get a fd in multi fd (Redundant) * Mode and increment to next fd. */ static int __fd_m_get_and_inc(struct card_dev_t *dev, int *card_num) { struct fd_node *now, *next; int fd = INVALID_FD; // Set to INVALID now = dev->m_fd_ptr; // Get current Position of fd list if (now) { fd = now->m_fd; // Take this fd if (card_num) *card_num = now->card_num; next = now->next; // Next if (NULL == next) // Check for end next = __fd_m_list; // Reset to Head dev->m_fd_ptr = next; // and save } return fd; } static int __fd_get(struct card_dev_t *dev, int *card_num) { struct lib_data_t *ld = &lib_data; int fd; pthread_mutex_lock(&ld->fds_mutex); if (GENWQE_CARD_REDUNDANT == dev->card_no) fd = __fd_m_get_and_inc(dev, card_num); else { fd = dev->fd_s; // Normal Mode, return fd_s if (card_num) *card_num = dev->card_no; } pthread_mutex_unlock(&ld->fds_mutex); return fd; } static int __m_open_add(int card_no, int mode) { int fd; fd = __genwqe_dev_open(card_no, mode); if (INVALID_FD != fd) { __fd_m_add(fd, card_no); return 1; // Good } return 0; // Can not Open } /* * Function: __genwqe_filter() * * @brief Filter for scandir as helper function for __m_open_all() * @parm Ptr. to name in dev * @return 1 if name matches any of my genwqe devices */ static int __genwqe_filter(const struct dirent *name) { if (0 == strncmp(name->d_name, GENWQE_DEVNAME, 6)) return 1; return 0; } /* * Function: __m_open_all() * * @brief opens all genwqe cards * @param Pointer to dev * @return Number of opend fd's or 0 for bad. */ static int __m_open_all(struct lib_data_t *ld) { int found_cards = 0; int card_no; int n, rc; struct dirent **namelist; n = scandir("/dev", &namelist, __genwqe_filter, NULL); if (n < 0) return 0; while (n--) { rc = sscanf(namelist[n]->d_name, GENWQE_DEVNAME"%u_card", &card_no); if ((1 == rc) && (card_no >= 0) && (card_no < 256)) { switch (ld->genwqe_state[card_no]) { case CARD_CLOSED: // Try to Open if (__m_open_add(card_no, ld->m_mode_save)) { ld->fd_m_count++; ld->genwqe_state[card_no] = CARD_OPEN; found_cards++; } break; case CARD_OPEN: // card is already open found_cards++; break; default: break; } } free(namelist[n]); } free(namelist); return found_cards; } /* * Function: __node_create() * * Creates (allocates) memory for a new node */ static struct card_dev_t *__node_create(int card_no, int mode) { struct card_dev_t *new_dev; new_dev = malloc(sizeof(struct card_dev_t)); if (new_dev) { new_dev->card_no = card_no; new_dev->dev_state = DEV_ALLOC; /* Add Data to Node */ new_dev->mode = mode; new_dev->omode = __mode_2_omode(mode); new_dev->slu_id = 0; new_dev->app_id = 0; new_dev->fd_s = INVALID_FD; /* Set Single fd to Invalid */ new_dev->m_fd_ptr = NULL; new_dev->next = NULL; new_dev->prev = NULL; new_dev->verify = new_dev; } else pr_err("Malloc failed for card %d\n", card_no); return new_dev; } /* * Function __node_add() * * allocates a new card object. Its only a control * Block with a fd for one card. The new Object will ba added * to the end of the list. */ static struct card_dev_t *__node_add(int card_no, void **head, int mode) { struct card_dev_t *parent, *new_dev; new_dev = __node_create(card_no, mode); if (NULL == new_dev) return NULL; parent = (struct card_dev_t*)*head; if (NULL == *head) *head = (void*)new_dev; else { while (NULL != parent->next) parent = parent->next; parent->next = new_dev; } new_dev->prev = parent; return new_dev; } /* * Function: __node_delete() * * called form __s_node_delete and __m__node_delete() * Deletes a node form List. */ static void __node_delete(struct card_dev_t *node, void **head) { if (node->verify != node) { pr_err("Invalid Dev: %p to delete.\n", node); return; } node->dev_state = DEV_FREE; if ((NULL == node->next) && (NULL == node->prev)) { /* Delete Last Element clears also root_node */ *head = NULL; } else if (NULL == node->next) { /* Delete Tail Element, no change on root_node */ node->prev->next = NULL; } else if (NULL == node->prev) { /* Delete Head Element, need to change root_node */ node->next->prev = NULL; *head = (void*)node->next; } else { /* something in the middle, root_node stays */ node->prev->next = node->next; node->next->prev = node->prev; } free(node); } /* ------------------------- START of Health Function's -------------------- */ #if defined(CONFIG_USE_SIGNAL) /** * FIXME The next task we need to solve is to figure out which * file-descriptor is actually broken, when we are receiving SIGIO. * This descriptor must than be closed and not used again e.g. set to * -1. For recovery it might be reopened using an alternate card, or * when the currently unusable card should reappear after successful * recovery. */ static void __health_sa_sigaction(int sig, siginfo_t *si, void *data) { struct lib_data_t *ld = &lib_data; /* global variable */ pr_warn("[%s] sig=%d si=%p data=%p si_fd=%d si_code=%d\n" " FIXME The next task we need to solve is to figure\n" " out which file-descriptor is actually broken, when\n" " we are receiving SIGIO.\n\n", __func__, sig, si, data, si->si_fd, si->si_code); card_health_signal++; sem_post(&ld->health_sem); } #endif /* * Function: __inotify_handle() * runs in health thread and adds a new card * after the ATTRIB notification was send to me. */ static void __inotify_handle(struct lib_data_t *ld) { int card; if (INOTIFY_ATTRIB == ld->inotify_event) { card = ld->inotify_card; pr_info("%s Open Card: %d\n", __func__, card); if (__m_open_add(card, ld->m_mode_save)) { pr_info("%s Open Card: %d OK\n", __func__, card); ld->fd_m_count++; ld->genwqe_state[card] = CARD_OPEN; ld->inotify_event = INOTIFY_IDLE; // go back to IDLE } } } /* Helper function to check the multi fd list */ static int __mhealth_check(struct lib_data_t *ld) { int fd, card_no; struct card_dev_t *dev, *dev_next; enum genwqe_card_state card_state; enum genwqe_fd_state state; struct fd_node *fd_list, *fd_list_next; pr_info("%s Enter %d open Fd's.\n", __func__, ld->fd_m_count); __inotify_handle(ld); // handle events from inotify /* Delete pending Close dev's */ dev = (struct card_dev_t *)m_dev_head; while (dev) { dev_next = dev->next; pr_info("%s Dev: %p State: %d\n", __func__, dev, dev->dev_state); if (DEV_REQ_CLOSE == dev->dev_state) __node_delete(dev, (void*)&m_dev_head); dev = dev_next; } /* Check if all entries in the dev list are gone */ if (NULL == m_dev_head) { /* If so, i go over my list of the fds and close this as well */ fd_list = __fd_m_list; while (fd_list) { fd_list_next = fd_list->next; card_no = fd_list->card_num; fd = fd_list->m_fd; pr_info("Close: %p Card: %d fd: %d\n", fd_list, card_no, fd); close(fd); ld->genwqe_state[card_no] = CARD_CLOSED; __fd_m_del(fd); /* Remove from List */ ld->fd_m_count--; fd_list = fd_list_next; } pr_info("%s Close Exit Count: %d (Must be 0 !)\n", __func__, ld->fd_m_count); return 0; } /* take all Open fd's in list and check if they are alive */ fd_list = __fd_m_list; while (fd_list) { fd_list_next = fd_list->next; fd = fd_list->m_fd; card_no = fd_list->card_num; state = ld->genwqe_state[card_no]; if (CARD_OPEN == state) { __genwqe_card_get_state(fd, &card_state); if (GENWQE_CARD_USED != card_state) { pr_info("%s delete from List: %p Card: %d " "fd: %d\n", __func__, fd_list, card_no, fd); __fd_m_del(fd); /* Remove from List */ __fd_m_head_all(); /* Reset all Devs to head * fd list */ close(fd); /* Close */ ld->genwqe_state[card_no] = CARD_CLOSED; ld->fd_m_count--; } } fd_list = fd_list_next; } pr_info("%s EXIT: %p fd_list: %p with %d Entry's.\n", __func__, m_dev_head, __fd_m_list, ld->fd_m_count); if (m_dev_head) return 1; // Keep Going return 0; // Exit } /* * Function: __shealth_check() * * Helper function to check the Single fd list */ static int __shealth_check(struct lib_data_t *ld) { struct card_dev_t *dev, *dev_next; enum genwqe_card_state card_state; int fd; /* Process Single Mode Chain */ dev = (struct card_dev_t*)s_dev_head; while (dev) { dev_next = dev->next; fd = dev->fd_s; pr_info("%s: Node: %p fd: %d State: %d\n", __func__, dev, fd, dev->dev_state); if(DEV_REQ_CLOSE == dev->dev_state) { __node_delete(dev, (void*)&s_dev_head); if (INVALID_FD != fd) { close(fd); ld->fd_s_count--; } } else { if (INVALID_FD != fd) { __genwqe_card_get_state(fd, &card_state); if (GENWQE_CARD_USED != card_state) { __node_delete(dev, (void*)&s_dev_head); close(fd); ld->fd_s_count--; } } } dev = dev_next; } if (s_dev_head) return 1; return 0; } #if defined(CONFIG_USE_SIGNAL) static void __inotify_termination_handler(int signum) { /* struct lib_data_t *ld = &lib_data; */ /* global variable */ pr_info("%s Signum: %d \n", __func__, signum); } #endif /* * Function: __inotify_handle_event() * Called from: __inotify_thread() * Handels data from inotify read. */ static void __inotify_handle_event(int len, char *buf, struct lib_data_t *ld) { struct inotify_event *ie; int i, card, n; i = 0; pr_info("__inotify_handle_event %d\n", len); while (i < len) { ie = (struct inotify_event*) &buf[i]; if ((ie->mask & IN_ATTRIB) && (ie->len > 0)) { n = sscanf(ie->name, GENWQE_DEVNAME"%d", &card); if (1 == n) { /* Make sure that the new card */ /* was gone before adding back in */ if (CARD_CLOSED == ld->genwqe_state[card]) { /* Create was done, ATTRIB, */ /* was set, Post Health Sem to Open again */ ld->inotify_card = card; ld->inotify_event = INOTIFY_ATTRIB; /* post __inotify_handle */ usleep(50000); /* !!! need some delay */ pr_info("%s Start Health " "Thread for new " "Card: %s\n", __func__, ie->name); sem_post(&ld->health_sem); } } } i += sizeof(struct inotify_event) + ie->len; } } /* * Function: __inotify_thread() * This thread waits for Delete and Create events for * genwqe*_card in /dev */ static void *__inotify_thread(void *data) { int len, rc; struct lib_data_t *ld = (struct lib_data_t *)data; char buf[sizeof(struct inotify_event) + PATH_MAX]; fd_set rfds; sigset_t sig_empty_mask; #if defined(CONFIG_USE_SIGNAL) struct sigaction action; sigset_t sigmask; sigemptyset(&sigmask); sigaddset(&sigmask, SIGUSR1); sigprocmask(SIG_BLOCK, &sigmask, NULL); action.sa_handler = __inotify_termination_handler; sigemptyset( &action.sa_mask ); action.sa_flags = 0; sigaction(SIGUSR1, &action, NULL); /* set SIGUSR1 to kill me */ sigemptyset(&sig_empty_mask); #endif while (1) { /* Exit because of sig handler */ FD_ZERO(&rfds); FD_SET(ld->inotify_fd, &rfds); /* Set fd */ rc = pselect(FD_SETSIZE, &rfds, NULL, NULL, NULL, &sig_empty_mask); if (rc > 0) { len = read(ld->inotify_fd, buf, sizeof(buf)); if (-1 == len) { /* Read fails, set rc and Exit */ ld->inotify_rc = 100; break; } __inotify_handle_event(len, buf, ld); } else { if (rc < 0) { /* EINTR: Select was killed by SIGUSR1 */ ld->inotify_rc = 200; break; } } } pr_info("%s exit fd: %d wd: %d\n", __func__, ld->inotify_fd, ld->inotify_wd); pthread_exit(&ld->inotify_rc); } /* * Function: __inotify_create() * This functions creates the inotify event handler thread * only for multiple mode */ static void __inotify_create(struct lib_data_t *ld) { int fd, wd; if ((pthread_t)-1 != ld->inotify_tid) return; // already Running fd = inotify_init(); if (fd < 0) { pr_err("Failed to initialize inotify instance %d\n", errno); return; } /* i use ATTRIB to watch only*/ wd = inotify_add_watch(fd, "/dev", IN_ATTRIB); if (wd < 0) { pr_err("Failed to add inotify watch. %d\n", errno); return; } ld->inotify_event = INOTIFY_IDLE; ld->inotify_fd = fd; ld->inotify_wd = wd; /* Create thread */ if (0 == pthread_create(&ld->inotify_tid, NULL, &__inotify_thread, ld)) return; pr_err("%s failed!\n", __func__); return; } static void __fixup_fd_lists(struct lib_data_t *ld) { pr_info("%s fd_s_count: %d fd_m_count: %d\n", __func__, ld->fd_s_count, ld->fd_m_count); /* Check m_list if there, only check s_list if no m_list */ if (m_dev_head) __mhealth_check(ld); else if (s_dev_head) __shealth_check(ld); } /* * Function: __health_thread() * runs every 10 second or when it gets a post from * sighandler or inotify. The thread ends if there is no * fd left open. */ static void *__health_thread(void *data) { struct lib_data_t *ld = (struct lib_data_t *)data; while (1) { /* int rc; */ struct timespec ts; /* INOTIFY: Block, inotify and signal handler will post me */ if (clock_gettime(CLOCK_REALTIME, &ts) == -1) perror("clock_gettime"); ts.tv_sec += 4; sem_timedwait(&ld->health_sem, &ts); /* * fprintf(stderr, "sem_timedwait ... returned %d: %s\n", * rc, rc == -1 ? strerror(errno) : "OK"); */ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); pthread_mutex_lock(&ld->fds_mutex); __fixup_fd_lists(ld); pthread_mutex_unlock(&ld->fds_mutex); pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); } pr_info("%s exit S: %p:%d M: %p:%d\n", __func__, s_dev_head, ld->fd_s_count, m_dev_head, ld->fd_m_count); pthread_exit(&ld->thread_rc); } /** * @brief Install signal handler for SIGIO to enable us to react on * problems with one of the card. In case of problems we need to close * any open filedescriptor for those cards in trouble, to enable them * to go through our recovery processes which involves the associated * devices to go away and come back when the problem got resolved. * * If the file descriptors of a broken card are not closed 4 sec after * receiving the SIGIO, the process will be killed with a SIGKILL. * * Start healthchecking if usage is more than 0. We do not want to * waste resources if there are no cards in use. */ static int __health_thread_start(struct lib_data_t *ld) { int rc; if ((pthread_t)-1 != ld->thread_id) return 0; // Thread already running rc = sem_init(&ld->health_sem, 0, 0); if (0 != rc) goto err_out; rc = pthread_create(&ld->thread_id, NULL, &__health_thread, ld); if (0 != rc) goto err_out; #if defined(CONFIG_USE_SIGNAL) sigemptyset(&ld->newact.sa_mask); ld->newact.sa_sigaction = __health_sa_sigaction; ld->newact.sa_flags = SA_SIGINFO; if (0 == sigaction(SIGIO, &ld->newact, &ld->oldact)) return 0; #endif return 0; err_out: ld->thread_id = -1; pr_err("%s failed rc=%d\n", __func__, rc); return -1; } /* ---------------------------- END of Health Function's ------------------- */ static void __card_get_app(struct card_dev_t *dev) { /* Read and save SLU_ID and APP_ID */ dev->slu_id = genwqe_card_read_reg64(dev, IO_SLU_UNITCFG, NULL); dev->app_id = genwqe_card_read_reg64(dev, IO_APP_UNITCFG, NULL); } /* * Function: __genwqe_open_one() * * open one genwqe card */ static int __genwqe_open_one(struct card_dev_t *dev) { int card_no_masked = dev->card_no & GENWQE_TESTMODE_MASK; int fd; struct lib_data_t *ld = &lib_data; fd = __genwqe_dev_open(card_no_masked, dev->mode); dev->drv_errno = errno; if (INVALID_FD != fd) { dev->drv_rc = fd; dev->fd_s = fd; __card_get_app(dev); ld->fd_s_count++; return fd; } return INVALID_FD; } /* * Function: __genwqe_open_all() * * @brief open all genwqe cards * @param Pointer to dev * @return Number of opend fd's or INVALID for bad. */ static int __genwqe_open_all(struct card_dev_t *dev) { int cards; struct lib_data_t *ld = &lib_data; int fd = INVALID_FD; /* I make sure that all opens do have the same mode for open */ if (-1 == ld->m_mode_save) ld->m_mode_save = dev->mode; /* Save mode in case i need to reopen */ else if (ld->m_mode_save != dev->mode) dev->mode = ld->m_mode_save; /* Keep Old mode and overwrite */ cards = __m_open_all(ld); if (cards) { fd = __fd_m_head(dev); // Set to Head pr_info("%s %d Cards with %d fd's, use fd: %d first.\n", __func__, cards, ld->fd_m_count, fd); dev->fd_s = fd; /* Take fd and save to dev */ __card_get_app(dev); /* Get SLU and APP form my 1st Card */ __inotify_create(ld); } return fd; } /** * Check correctness of the application id. This function must not be * verbose. It already returns a meaningful return code to indicate * that the id was not right. */ static int __card_check_app(struct card_dev_t *dev, uint64_t app_id, uint64_t mask) { if ((dev->app_id & mask) != (app_id & mask)) { pr_info("Wrong AppID: %016llx Expect: %016llx Mask: %016llx " "on fd %d\n", (unsigned long long)dev->app_id, (unsigned long long)app_id, (unsigned long long)mask, dev->fd_s); return GENWQE_ERR_APPID; } return GENWQE_OK; } /** * @brief enable or disable debug outputs from library. * pr_info() is activated or disabled * * @param onoff if 0 -> no outputs * * @return - */ void genwqe_card_lib_debug(int onoff) { _dbg_flag = onoff; } /** * @brief setup CRC32 table (crc32_tab) for fast calculation */ static void ddcb_setup_crc32(struct lib_data_t *d) { int i, j; uint32_t crc; for (i = 0; i < 256; i++) { crc = i << 24; for (j = 0; j < 8; j++) { if (crc & 0x80000000) crc = (crc << 1) ^ CRC32_POLYNOMIAL; else crc = (crc << 1); } d->crc32_tab[i] = crc; } } /** * @brief generate 32-bit crc as required for DDCBs * polynomial = x^32 + x^29 + x^18 + x^14 + x^3 + 1 * - example: * 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffffffff * should result in a crc32 of 0xf33cb7d3 * * @param buff pointer to data buffer * @param len leongth of data for calculation * @param init initial crc (0xffffffff at start) * * @return crc32 checksum in big endian format ! */ uint32_t genwqe_ddcb_crc32(uint8_t *buff, size_t len, uint32_t init) { int i; uint32_t crc; crc = init; while (len--) { i = ((crc >> 24) ^ *buff++) & 0xFF; crc = (crc << 8) ^ lib_data.crc32_tab[i]; } return crc; } int genwqe_get_drv_rc(card_handle_t dev) { return dev->drv_rc; } int genwqe_get_drv_errno(card_handle_t dev) { return dev->drv_errno; } int genwqe_card_get_state(card_handle_t dev, enum genwqe_card_state *state) { int fd; if (NULL == dev) return GENWQE_ERR_INVAL; fd = __fd_get(dev, NULL); dev->drv_rc = __genwqe_card_get_state(fd, state); if (0 == dev->drv_rc) return GENWQE_OK; return GENWQE_ERR_GET_STATE; } /** * @brief reads 64-bit register number 'offs' from GENWQE card * @param offs mmio offset as defined in genwqe_io.h * @return register content */ uint64_t genwqe_card_read_reg64(card_handle_t dev, uint32_t offs, int *rc) { struct genwqe_reg_io io; if (rc) *rc = GENWQE_ERR_CARD; if (NULL == dev) return GENWQE_ERR_INVAL; io.num = offs; io.val64 = 0; dev->drv_rc = ioctl(dev->fd_s, GENWQE_READ_REG64, &io); dev->drv_errno = errno; if (dev->drv_rc < 0) io.val64 = 0; else { if (rc) /* FIXME Strange? ... */ *rc = GENWQE_OK; } return io.val64; } /** * @brief reads 32-bit register number 'offs' from GENWQE card * @param offs mmio offset as defined in genwqe_io.h * @return register content */ uint32_t genwqe_card_read_reg32(card_handle_t dev, uint32_t offs, int *rc) { struct genwqe_reg_io io; if (rc) *rc = GENWQE_ERR_CARD; if (NULL == dev) return GENWQE_ERR_INVAL; /* i am only allowing this in not redundant mode */ io.num = offs; /* register offset for 32-bit pointer */ io.val64 = 0; dev->drv_rc = ioctl(dev->fd_s, GENWQE_READ_REG32, &io); dev->drv_errno = errno; if (dev->drv_rc < 0) io.val64 = 0; else { if (rc) /* FIXME Strange? ... */ *rc = GENWQE_OK; } return (uint32_t)io.val64; } /** * @brief writes 64-bit register number 'offs' with value 'val' * @param offs 32-bit mmio offset as defined in genwqe_io.h * @return register content */ int genwqe_card_write_reg64(card_handle_t dev, uint32_t offs, uint64_t val) { struct genwqe_reg_io io; if (NULL == dev) return GENWQE_ERR_INVAL; /* i am only allowing this in not redundant mode */ io.num = offs; io.val64 = val; dev->drv_rc = ioctl(dev->fd_s, GENWQE_WRITE_REG64, &io); dev->drv_errno = errno; if (0 == dev->drv_rc) return GENWQE_OK; return GENWQE_ERR_CARD; } #define MAX_GENWQ_CARDS 16 #define MAX_VFUNCTIONS 16 #define MAX_FUNC_NUM (MAX_GENWQ_CARDS * MAX_VFUNCTIONS) /** * @brief writes 32-bit register number 'offs' with value 'val' * @param card [in] card handle * @param offs [in] 32-bit mmio offset as defined in genwqe_io.h * @return register content */ int genwqe_card_write_reg32(card_handle_t dev, uint32_t offs, uint32_t val) { struct genwqe_reg_io io; if (NULL == dev) return GENWQE_ERR_INVAL; /* i am only allowing this in not redundant mode */ io.num = (__u64)offs; io.val64 = (__u64)val; dev->drv_rc = ioctl(dev->fd_s, GENWQE_WRITE_REG32, &io); dev->drv_errno = errno; if (0 == dev->drv_rc) return GENWQE_OK; return GENWQE_ERR_CARD; } /** * @brief initialization of the Genwqe card and the GENWQE library * * allocates and presets required memory, sets version numbers * and opens a card device. * * @param card_no card number to use if > 0, * GENWQE_CARD_REDUNDANT pick free card and recover if * card is unavailable. * @param mode select different characteristics e.g. use SIGIO, ... * @return 0 if success */ card_handle_t genwqe_card_open(int card_no, int mode, int *err_code, uint64_t card_app_id, uint64_t card_app_id_mask) { card_handle_t dev; struct lib_data_t *ld = &lib_data; int fd; int rc; pthread_mutex_lock(&ld->fds_mutex); pr_info("%s Enter Card: %d\n", __func__, card_no); if (GENWQE_CARD_REDUNDANT == card_no) dev = __node_add(card_no, (void*)&m_dev_head, mode); else dev = __node_add(card_no, (void*)&s_dev_head, mode); __health_thread_start(ld); /* Needs Mutex protection */ if (NULL == dev) { pthread_mutex_unlock(&ld->fds_mutex); if (err_code) *err_code = GENWQE_ERRNO; return NULL; } if (GENWQE_CARD_REDUNDANT == card_no) fd = __genwqe_open_all(dev); else fd = __genwqe_open_one(dev); /* Check if i do have an fd */ if (INVALID_FD != fd) { rc = __card_check_app(dev, card_app_id, card_app_id_mask); if (err_code) *err_code = rc; if (GENWQE_OK == rc) { dev->dev_state = DEV_HAVE_FD; pr_info("%s Exit Card: %d Dev: %p\n", __func__, card_no, dev); pthread_mutex_unlock(&ld->fds_mutex); return dev; } } if (err_code) *err_code = GENWQE_ERR_OPEN; pr_info("%s Err Dev: %p Card: %d fd: %d\n", __func__, dev, card_no, fd); genwqe_card_close(dev); pthread_mutex_unlock(&ld->fds_mutex); return NULL; } /** * @brief end GENWQE library accesses * close all open files, free memory * * @param card pointer to the opened device descriptor * * @return GENWQE_OK if everything is ok. */ int genwqe_card_close(card_handle_t dev) { struct lib_data_t *ld = &lib_data; int rc = GENWQE_ERR_INVAL; if (dev) { if (dev->verify == dev) { dev->dev_state = DEV_REQ_CLOSE; pr_info("%s Request %p fd: %d\n", __func__, dev, dev->fd_s); sem_post(&ld->health_sem); rc = GENWQE_OK; } } return rc; } /** * @brief retrieve operating systems file handle * * @param card pointer to the opened device descriptor * * @return file handle */ int genwqe_card_fileno(card_handle_t dev) { int fd = GENWQE_ERR_INVAL; if (dev) fd = __fd_get(dev, NULL); return fd; } /** * @brief Prepare buffer to do DMA transactions. The driver will * create DMA mappings for this buffer and will allocate memory to * hold and sglist which describes the buffer. When executing DDCBs * the driver will use the cached entry before it tries to dynamically * allocate a new one. The intend is to speed up performance. The * resources are freed on device close or when calling the unpin * function. * * @param [in] dev .dev handle * @param [in] addr user space address of memory buffer * @param [in] size size of user space memory buffer * @param [in] direction 0: read/1: read and write * @return GENWQE_LIB_OK on success or negative error code. */ int genwqe_pin_memory(card_handle_t dev, const void *addr, size_t size, int direction) { int rc, fd; struct genwqe_mem m; m.addr = (unsigned long)addr; m.size = size; m.direction = direction; pr_info("pin: addr=%016lx size=%08lx dir=%d Card=%p ", (unsigned long)addr, (unsigned long)size, direction, dev); if (dev) { if (dev == dev->verify) { fd = __fd_get(dev, NULL); pr_info("Card %d\n", dev->card_no); dev->drv_rc = rc = ioctl(fd, GENWQE_PIN_MEM, &m); dev->drv_errno = errno; if (0 == rc) return GENWQE_OK; } } pr_err("Dev: %p Fault: %d addr=%p size=%lld dir=%d\n", dev, dev->drv_errno, addr, (long long)size, direction); return GENWQE_ERR_PINNING; } int genwqe_unpin_memory(card_handle_t dev, const void *addr, size_t size) { int rc, fd; struct genwqe_mem m; m.addr = (unsigned long)addr; m.size = size; m.direction = 0; pr_info("unpin: addr=%016lx size=%08lx card=%p", (unsigned long)addr, (unsigned long)size, dev); if (dev) { if (dev == dev->verify) { fd = __fd_get(dev, NULL); pr_info("Card %d fd %d\n", dev->card_no, fd); dev->drv_rc = rc = ioctl(fd, GENWQE_UNPIN_MEM, &m); dev->drv_errno = errno; if (0 == rc) return GENWQE_OK; } } pr_err("Dev: %p Fault: %d addr=%p size=%lld\n", dev, dev->drv_errno, addr, (long long)size); return GENWQE_ERR_PINNING; } static int __genwqe_card_execute(card_handle_t dev, struct genwqe_ddcb_cmd *req, int func) { int rc, fd, fd2, card_num; struct genwqe_ddcb_cmd *cmd; struct timeval ts, te; /* Start and End time */ struct lib_data_t *ld = &lib_data; if (NULL == dev) return GENWQE_ERR_EXEC_DDCB; if (dev != dev->verify) return GENWQE_ERR_EXEC_DDCB; gettimeofday(&ts, NULL); fd = __fd_get(dev, &card_num); cmd = req; while (cmd != NULL) { retry: /* wait until DDCB is processed */ rc = ioctl(fd, func, cmd); dev->drv_errno = errno; dev->drv_rc = rc; if (rc < 0) { /* * Check all filedescriptors and close the * non-working ones. Retrying makes only sense * with a valid list of working cards. If this * is not done, it happened that we retried * with an card in trouble ... */ sem_post(&ld->health_sem); if (GENWQE_CARD_REDUNDANT == dev->card_no) { /* * We can try to use next card in case * of Busy or error if Multi mode was * enabled. */ gettimeofday(&te, NULL); if ((te.tv_sec - ts.tv_sec) > CONFIG_RETRY_TIMEOUT) { /* Timeout */ pr_warn("%s exit Timeout fault: %d " "fd: %d\n", __func__, errno, fd); return GENWQE_ERR_EXEC_DDCB; } /* next fd from queue */ fd2 = __fd_get(dev, &card_num); if (fd2 != fd) /* if there is a new fd */ fd = fd2; /* swap to new fd */ else usleep(1000000);/* no fd in queue */ card_retried_ddcbs[card_num]++; goto retry; /* and retry again */ } if (errno == EBUSY) { card_retried_ddcbs[card_num]++; goto retry; } pr_err("%s exit fault: %d fd: %d rc: %d card_no: %d\n", __func__, errno, fd, rc, dev->card_no); return GENWQE_ERR_EXEC_DDCB; } card_completed_ddcbs[card_num]++; cmd = (struct genwqe_ddcb_cmd *)(unsigned long)cmd->next_addr; } return GENWQE_OK; } /** * @brief Execute a DDCB request with no DMA buffer translations. * @param card handle returned from 'card_open()' * @param req req describes the DDCB which should be executed. */ int genwqe_card_execute_raw_ddcb(card_handle_t dev, struct genwqe_ddcb_cmd *req) { return __genwqe_card_execute(dev, req, GENWQE_EXECUTE_RAW_DDCB); } /** * @brief Execute a DDCB request with automatic DMA buffer translations. * @param card handle returned from 'card_open()' * @param req req describes the DDCB which should be executed. */ int genwqe_card_execute_ddcb(card_handle_t dev, struct genwqe_ddcb_cmd *req) { return __genwqe_card_execute(dev, req, GENWQE_EXECUTE_DDCB); } /** * @brief Get contiguous DMAable memory (usage instead of SG list) * * Allocating memory via the driver will always result in page alinged * memory. Since this is a feature, we use memalign to mimic the same * for simulation mode. */ void *genwqe_card_malloc(card_handle_t dev, size_t size) { void *buf; if (NULL == dev) return NULL; if (dev != dev->verify) return NULL; if (GENWQE_CARD_REDUNDANT == dev->card_no) return NULL; if (INVALID_FD != dev->fd_s) /* normal operation */ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev->fd_s, 0); else { /* simulation mode */ unsigned int page_size = sysconf(_SC_PAGESIZE); buf = memalign(page_size, size); } if (buf == MAP_FAILED) { pr_err("%s size %d errno: %d/%s\n", __func__, (int)size, errno, strerror(errno)); return NULL; } return buf; } /** * @brief Free driver/device allocated DMAable memory. */ int genwqe_card_free(card_handle_t dev __attribute__((unused)), void *ptr, size_t size) { int rc; if (NULL == dev) return GENWQE_ERR_INVAL; if (GENWQE_CARD_REDUNDANT == dev->card_no) return GENWQE_ERR_INVAL; if (INVALID_FD != dev->fd_s) { /* normal operation */ rc = munmap(ptr, size); if (rc == -1) { pr_err("%s: %p Size: %d Errno: %d\n", __func__, ptr, (int)size, errno); return GENWQE_ERRNO; } } else /* simulation mode */ free(ptr); return 0; } void *genwqe_card_alloc_scb(card_handle_t card, size_t size) { void *scb; scb = genwqe_card_malloc(card, size); if (scb == NULL) return NULL; memset(scb, 0, size); return scb; } /** * card_set_ats_flags() - Set ATS flags correctly for data/pointers * at offset offs. * * Each 4-bit in the ATS array corresponds to 8 bytes in the scb. The * 1st ATS bits describe the ATS array itself and must therefore be * plain data read-only: ATS_TYPE_DATA. The remaining bits can * identify plan data read-only or rw, sgl version 1 or sgl version 2, * or even a scb read-only or rw itself. Recursion avoidance when * parsing this is likely a good idea to avoid loops. */ int genwqe_card_set_ats_flags(void *scb, size_t size, size_t offs, int flags) { uint8_t *ats_array = scb; /* ATS fields start at the beginning */ unsigned int ats, idx; const uint8_t mask[2] = { 0xf0, 0x0f }; if (((unsigned long)scb % 8) || /* 8 byte start addr alignment */ (size % 8) || /* 8 byte size alignment required */ (offs % 8) || /* 8 byte offset alignment required */ (offs > size - 8)) return GENWQE_ERR_INVAL; /* offset must not exceed size */ /* * Let's try to represent ATS[n] a byte array. Each * byte/8-bits contain 2 4-bit entries in this case. * IBM bit notation requires starting at the MSB first. * This should result in the following example: * * offs mask ATS IDX * ----------------------------------------------------------------- * 0 0xf000_0000_0000_0000 0x0000_0000_0000_0000 ... 0 0 * 8 0x0f00_0000_0000_0000 0x0000_0000_0000_0000 ... 0 1 * 16 0x00f0_0000_0000_0000 0x0000_0000_0000_0000 ... 1 0 * 24 0x000f_0000_0000_0000 0x0000_0000_0000_0000 ... 1 1 * 32 0x0000_f000_0000_0000 0x0000_0000_0000_0000 ... 2 0 * 40 0x0000_0f00_0000_0000 0x0000_0000_0000_0000 ... 2 1 * ... */ ats = offs / 16; idx = (offs / 8) & 0x1; ats_array[ats] &= ~mask[idx]; /* while out previous setting */ if (idx == 0) /* shift flags to correct position */ ats_array[ats] |= (0x0f & flags) << 4; else ats_array[ats] |= (0x0f & flags); return GENWQE_OK; } int genwqe_card_free_scb(card_handle_t card, void *scb, size_t size) { return genwqe_card_free(card, scb, size); } static void __hexdump(FILE *fp, const void *buff, unsigned int size) { unsigned int i; const uint8_t *b = (uint8_t *)buff; for (i = 0; i < size; i++) { if ((i & 0x0f) == 0x00) fprintf(fp, " %p: ", &b[i]); fprintf(fp, " %02x", b[i]); if ((i & 0x0f) == 0x0f) fprintf(fp, "\n"); } fprintf(fp, "\n"); } /** * @brief DDCB dump function * * @param fp error device * @param buff DDCB buffer * @param size size of bytes to dump */ void genwqe_hexdump(FILE *fp, const void *buff, unsigned int size) { unsigned int i; const uint8_t *b = (uint8_t *)buff; char ascii[17]; char str[2] = { 0x0, }; for (i = 0; i < size; i++) { if ((i & 0x0f) == 0x00) { fprintf(fp, " %08x:", i); memset(ascii, 0, sizeof(ascii)); } fprintf(fp, " %02x", b[i]); str[0] = isalnum(b[i]) ? b[i] : '.'; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } /* print trailing up to a 16 byte boundary. */ for (; i < ((size + 0xf) & ~0xf); i++) { fprintf(fp, " "); str[0] = ' '; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } fprintf(fp, "\n"); } static int __genwqe_flash_read(card_handle_t dev, char partition, uint8_t *buf, int buflen, uint16_t *retc, uint16_t *attn, uint32_t *progr) { struct genwqe_bitstream load; if (NULL == dev) return GENWQE_ERR_CARD; if (GENWQE_CARD_REDUNDANT == dev->card_no) return GENWQE_ERR_CARD; memset(&load, 0, sizeof(load)); load.target_addr = 0x0; /* addr to start flashing */ load.uid = 0x01; /* get data from host */ load.partition = (uint8_t)partition; /* '0', '1', 'v' */ load.data_addr = (unsigned long)buf; /* vaddr of data to flash */ load.size = buflen; memset(buf, 0, buflen); /* ensure buffer is filled up with 0s */ dev->drv_rc = ioctl(dev->fd_s, GENWQE_SLU_READ, &load); dev->drv_errno = errno; /* copy potential results even before we check the return code */ if (retc) *retc = load.retc; if (attn) *attn = load.attn; if (progr) *progr = load.progress; if (dev->drv_rc != 0) return GENWQE_ERR_CARD; return GENWQE_OK; } int genwqe_flash_read(card_handle_t dev, struct card_upd_params *upd) { int rc, fd, buflen; uint8_t *buf; unsigned int page_size = sysconf(_SC_PAGESIZE); /* we need page aligned start and length */ buflen = (upd->flength + page_size) & ~(page_size - 1); buf = memalign(page_size, buflen); if (!buf) return GENWQE_ERRNO; /* open image file */ fd = open(upd->fname, O_EXCL|O_CREAT|O_WRONLY|O_TRUNC, 0644); if (fd < 0) { rc = GENWQE_ERR_FLASH_READ; goto err_exit; } rc = __genwqe_flash_read(dev, upd->partition, buf, buflen, &upd->retc, &upd->attn, &upd->progress); if (rc < 0) goto err_exit; rc = (int)write(fd, buf, (size_t)upd->flength); close(fd); if (rc != (int)upd->flength) { rc = GENWQE_ERR_FLASH_READ; goto err_exit; } rc = GENWQE_OK; err_exit: free(buf); /* buffer is needed until DDCB is processed */ return rc; } static int __genwqe_flash_update(card_handle_t card, char partition, const uint8_t *buf, int buflen, uint16_t *retc, uint16_t *attn, uint32_t *progr) { struct genwqe_bitstream load; memset(&load, 0, sizeof(load)); load.target_addr = 0x0; /* addr to start flashing */ load.uid = 0x01; /* get data from host */ load.partition = (uint8_t)partition; /* '0', '1', 'v' */ load.data_addr = (unsigned long)buf; /* vaddr of data to flash */ load.size = buflen; card->drv_rc = ioctl(card->fd_s, GENWQE_SLU_UPDATE, &load); card->drv_errno = errno; /* copy potential results even before we check the return code */ if (retc) *retc = load.retc; if (attn) *attn = load.attn; if (progr) *progr = load.progress; if (card->drv_rc < 0) return GENWQE_ERR_CARD; return GENWQE_OK; } int genwqe_flash_update(card_handle_t dev, struct card_upd_params *upd, int verify) { int rc = GENWQE_OK; struct stat filestat; struct genwqe_bitstream load; uint8_t *buf; int fd, buflen; unsigned int page_size = sysconf(_SC_PAGESIZE); if (NULL == dev) return GENWQE_ERR_INVAL; if (GENWQE_CARD_REDUNDANT == dev->card_no) return GENWQE_ERR_INVAL; /* Cannot do this in Redundant mode */ memset(&load, 0, sizeof(load)); upd->flength = 0; fd = open(upd->fname, O_RDONLY); if (fd < 0) return GENWQE_ERRNO; rc = fstat(fd, &filestat); if (rc < 0) { close(fd); return GENWQE_ERRNO; } upd->flength = filestat.st_size; /* setup page aligned buffer for image data */ /* we need page aligned start and length */ buflen = (filestat.st_size + page_size) & ~(page_size - 1); buf = memalign(page_size, 2 * buflen); if (!buf) { close(fd); return GENWQE_ERRNO; } memset(buf, 0, 2 * buflen); /* ensure buffer is filled up with 0s */ load.slu_id = upd->slu_id; load.app_id = upd->app_id; load.target_addr = 0x0; /* addr to start flashing */ load.uid = 0x01; /* get data from host */ load.partition = (uint8_t)upd->partition; /* '0', '1', 'v' */ load.data_addr = (unsigned long)buf; /* vaddr of data to flash */ load.size = filestat.st_size; /* size of data to flash */ /* read image file */ rc = (int)read(fd, (void *)(unsigned long)load.data_addr, (size_t)load.size); close(fd); if (rc != (int)load.size) { free(buf); return GENWQE_ERRNO; } /* checksum across complete file */ load.crc = genwqe_ddcb_crc32((void *)(unsigned long)load.data_addr, load.size, (uint32_t)-1); dev->drv_rc = rc = ioctl(dev->fd_s, GENWQE_SLU_UPDATE, &load); dev->drv_errno = errno; /* copy potential results even before we check the return code */ upd->retc = load.retc; upd->attn = load.attn; upd->progress = load.progress; if (rc < 0) { free(buf); return GENWQE_ERRNO; } if (verify) { unsigned int i; uint8_t *vbuf = buf + buflen; /* we allocated more space */ rc = __genwqe_flash_read(dev, upd->partition, vbuf, buflen, &upd->retc, &upd->attn, &upd->progress); if (rc < 0) { free(buf); return rc; } for (i = 0; i < upd->flength; i++) { if (buf[i] != vbuf[i]) { pr_err("compare mismatch offs %d:\n", i); __hexdump(stderr, &buf[i], 32); pr_err("read:\n"); __hexdump(stderr, &vbuf[i], 32); free(buf); return GENWQE_ERR_FLASH_VERIFY; } } } pr_info("%s update done\n", __func__); free(buf); /* buffer is needed until DDCB is processed */ return rc; } int genwqe_read_vpd(card_handle_t card, genwqe_vpd *vpd) { int rc; uint16_t retc, attn; uint32_t progr; size_t buflen; uint8_t *buf; unsigned int page_size = sysconf(_SC_PAGESIZE); if (NULL == card) return GENWQE_ERR_INVAL; if (GENWQE_CARD_REDUNDANT == card->card_no) return GENWQE_ERR_INVAL; /* Cannot do this in Redundant mode */ /* buffer for __genwqe_flash_read() must be page aligned */ buflen = (sizeof(*vpd) + page_size) & ~(page_size - 1); buf = memalign(page_size, buflen); if (!buf) return GENWQE_ERRNO; rc = __genwqe_flash_read(card, 'v', buf, buflen, &retc, &attn, &progr); if (rc < 0) { pr_err("reading VPD failed retc=%03x attn=%x progr=%x " "rc=%d drv_rc=%d drv_errno=%d\n", retc, attn, progr, rc, card->drv_rc, card->drv_errno); goto err_exit; } memcpy(vpd, buf, sizeof(*vpd)); err_exit: free(buf); return rc; } int genwqe_write_vpd(card_handle_t card, const genwqe_vpd *vpd) { int rc; uint16_t retc, attn; uint32_t progr; size_t buflen; uint8_t *buf; unsigned int page_size = sysconf(_SC_PAGESIZE); if (NULL == card) return GENWQE_ERR_INVAL; if (GENWQE_CARD_REDUNDANT == card->card_no) return GENWQE_ERR_INVAL; /* Cannot do this in Redundant mode */ /* buffer for __genwqe_flash_read() must be page aligned */ buflen = (sizeof(*vpd) + page_size) & ~(page_size - 1); buf = memalign(page_size, buflen); if (!buf) return GENWQE_ERRNO; memcpy(buf, vpd, sizeof(*vpd)); rc = __genwqe_flash_update(card, 'v', buf, buflen, &retc, &attn, &progr); if (rc < 0) { pr_err("writing VPD failed retc=%03x attn=%x progr=%x " "rc=%d drv_rc=%d drv_errno=%d\n", retc, attn, progr, rc, card->drv_rc, card->drv_errno); } free(buf); return rc; } /** * @brief extended error handling * print versions and dump DDCB data */ void genwqe_print_debug_data(FILE *fp, struct genwqe_debug_data *debug_data, int flags) { if (debug_data == NULL) return; if (flags & GENWQE_DD_IDS) fprintf(fp, "driver:%s SLU/APP: %016llx.%016llx\n\n", debug_data->driver_version, (long long)debug_data->slu_unitcfg, (long long)debug_data->app_unitcfg); if (flags & GENWQE_DD_DDCB_BEFORE) { fprintf(fp, "ddcb before processing:\n"); genwqe_hexdump(fp, &debug_data->ddcb_before, sizeof(debug_data->ddcb_before)); } if (flags & GENWQE_DD_DDCB_PREVIOUS) { fprintf(fp, "ddcb previous:\n"); genwqe_hexdump(fp, &debug_data->ddcb_prev, sizeof(debug_data->ddcb_prev)); } if (flags & GENWQE_DD_DDCB_PROCESSED) { fprintf(fp, "ddcb processed:\n"); genwqe_hexdump(fp, &debug_data->ddcb_finished, sizeof(debug_data->ddcb_finished)); } } static void libcard_init(void) __attribute__((constructor)); static void libcard_exit(void) __attribute__((destructor)); /* constructor */ static void libcard_init(void) { int rc, i; struct lib_data_t *ld = &lib_data; ddcb_setup_crc32(ld); rc = pthread_mutex_init(&ld->fds_mutex, NULL); if (rc != 0) pr_err("initializing mutex failed!\n"); /* Init the rest so i do not need to call memset */ ld->thread_rc = 0; ld->thread_id = -1; // No tid ld->fd_m_count = 0; // No Multi fd's in list ld->fd_s_count = 0; // No Single fd's in List ld->m_mode_save = -1; // Not Set yet for (i = 0; i < MAX_FUNC_NUM; i++) // Clear out Multi List ld->genwqe_state[i] = CARD_CLOSED; m_dev_head = NULL; s_dev_head = NULL; /* some more for inotify */ ld->inotify_rc = 1; /* Set to some other value than 0 */ ld->inotify_tid = -1; /* No thread id */ ld->inotify_event = INOTIFY_IDLE; } /* destructor */ static void libcard_exit(void) { struct lib_data_t *ld = &lib_data; card_handle_t dev; pr_info("%s Enter (s:%p m:%p fd:%p)\n", __func__, s_dev_head, m_dev_head, __fd_m_list); pthread_mutex_lock(&ld->fds_mutex); dev = (struct card_dev_t *)s_dev_head; while (dev) { pr_info("Request Single List: %p to close.\n", dev); dev->dev_state = DEV_REQ_CLOSE; dev = dev->next; } dev = (struct card_dev_t *)m_dev_head; while (dev) { pr_info("Request Multi List: %p to close.\n", dev); dev->dev_state = DEV_REQ_CLOSE; dev = dev->next; } pthread_mutex_unlock(&ld->fds_mutex); if (ld->inotify_tid != (pthread_t)-1) { /* Send kill Signal to inotify thread */ pthread_cancel(ld->inotify_tid); /* and wait to Join */ pthread_join(ld->inotify_tid, NULL); ld->inotify_tid = -1; inotify_rm_watch(ld->inotify_fd, ld->inotify_wd); } if (ld->thread_id != (pthread_t)-1) { /* Send kill Signal to inotify thread */ pthread_cancel(ld->thread_id); /* and wait to Join */ pthread_join(ld->thread_id, NULL); ld->thread_id = -1; } pthread_mutex_lock(&ld->fds_mutex); __fixup_fd_lists(ld); pthread_mutex_unlock(&ld->fds_mutex); pthread_mutex_destroy(&ld->fds_mutex); pr_info("%s EXIT (s:%p m:%p fd:%p)\n", __func__, s_dev_head, m_dev_head, __fd_m_list); } /* ** @brief Overwrite slu id in Card control block * * @param card the Pointer to the card device * @param slu_id Slu id value for overwrite */ void card_overwrite_slu_id(card_handle_t dev, uint64_t slu_id) { if (dev) dev->slu_id = slu_id; } /* ** @brief Overwrite appl id in card control block * * @param card the pointer to card device * @param app_id Value of app id for Overwrite */ void card_overwrite_app_id(card_handle_t dev, uint64_t app_id) { if (dev) dev->app_id = app_id; } /* ** @brief Get Card appl id from card control block * * @param card the pointer to card device * @return App ID from this dev (e.g. 0x00000002475a4950) */ uint64_t card_get_app_id(card_handle_t dev) { if (dev) return dev->app_id; return 0; } int genwqe_dump_statistics(FILE *fp) { int card_num; fprintf(fp, "GenWQE card statistics\n"); for (card_num = 0; card_num < NUM_CARDS; card_num++) { if ((card_completed_ddcbs[card_num] == 0) && (card_retried_ddcbs[card_num] == 0)) continue; fprintf(fp, " genwqe%u_card completed DDCBs: %5d retried: %5d\n", card_num, card_completed_ddcbs[card_num], card_retried_ddcbs[card_num]); } #if defined(CONFIG_USE_SIGNAL) fprintf(fp," Health SIGIO: %d\n", card_health_signal); #endif return 0; } genwqe-user-4.0.18/lib/libddcb.c000066400000000000000000000316501303345043000163430ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Generic support to enqueue DDCB commands. * * Might be a cool thing if we could use libcard.h directly as * absorber. Need to do more experiments to figure out if that is * possible to limit changes we need to perform to get in a CAPI * capable version. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef CONFIG_DONT_USE_INOTIFY #include #endif #include #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0])) #endif #ifndef ABS # define ABS(a) (((a) < 0) ? -(a) : (a)) #endif /* This is the internal structure for each Stream */ struct card_dev_t { int card_no; /* card id: FIXEM do we need card_dev? */ int card_type; /* type of card: GenWQE, CAPI, CAPIsim, ... */ int mode; void *card_data; /* private data from underlying layer */ int card_rc; /* return code from lower level */ int card_errno; /* errno from lower level */ struct ddcb_accel_funcs *accel; /* supported set of functions */ }; static unsigned int ddcb_trace = 0x0; #define ddcb_gather_statistics(accel) \ (ddcb_trace & DDCB_FLAG_STATISTICS) static struct ddcb_accel_funcs *accel_list = NULL; int libddcb_verbose = 0; FILE *libddcb_fd_out; static inline uint64_t get_usec(void) { struct timeval t; gettimeofday(&t, NULL); return t.tv_sec * 1000000 + t.tv_usec; } static struct ddcb_accel_funcs *find_accelerator(int card_type) { struct ddcb_accel_funcs *accel; for (accel = accel_list; accel != NULL; accel = accel->priv_data) { if (accel->card_type == card_type) return accel; } return NULL; } static const char * const retc_errlist[] = { [ABS(DDCB_RETC_IDLE)] = "unexecuted/untouched DDCB", [ABS(DDCB_RETC_PENDING)] = "pending execution", [ABS(DDCB_RETC_COMPLETE)] = "command complete. no error", [ABS(DDCB_RETC_FAULT)] = "application error, recoverable, please see ATTN and PROGR", [ABS(DDCB_RETC_ERROR)] = "application error, non-recoverable, please see ATTN and PROGR", [ABS(DDCB_RETC_FORCED_ERROR)] = "overwritten by driver", [ABS(DDCB_RETC_UNEXEC)] = "unexecuted/removed from queue", [ABS(DDCB_RETC_TERM)] = "terminated", }; static const int retc_nerr __attribute__((unused)) = ARRAY_SIZE(retc_errlist); const char *ddcb_retc_strerror(int errnum) { if (ABS(errnum) >= retc_nerr) return "unknown error code"; return retc_errlist[ABS(errnum)]; } static const char * const ddcb_errlist[] = { [ABS(DDCB_ERRNO)] = "libc call went wrong", [ABS(DDCB_ERR_CARD)] = "problems accessing accelerator", [ABS(DDCB_ERR_OPEN)] = "cannot open accelerator", [ABS(DDCB_ERR_VERS_MISMATCH)] = "library version mismatch", [ABS(DDCB_ERR_INVAL)] = "illegal parameters", [ABS(DDCB_ERR_EXEC_DDCB)] = "ddcb execution failed", [ABS(DDCB_ERR_APPID)] ="application id wrong", [ABS(DDCB_ERR_NOTIMPL)] = "function not implemented", [ABS(DDCB_ERR_ENOMEM)] = "out of memory", [ABS(DDCB_ERR_ENOENT)] = "entry not found", [ABS(DDCB_ERR_IRQTIMEOUT)] = "timeout waiting on irq event", [ABS(DDCB_ERR_EVENTFAIL)] = "failed waiting on expected event", }; static const int ddcb_nerr __attribute__((unused)) = ARRAY_SIZE(ddcb_errlist); const char *ddcb_strerror(int errnum) { if (ABS(errnum) >= ddcb_nerr) return "unknown error code"; return ddcb_errlist[ABS(errnum)]; } void ddcb_hexdump(FILE *fp, const void *buff, unsigned int size) { unsigned int i; const uint8_t *b = (uint8_t *)buff; char ascii[17]; char str[2] = { 0x0, }; if (fp == NULL) return; for (i = 0; i < size; i++) { if ((i & 0x0f) == 0x00) { fprintf(fp, " %08x:", i); memset(ascii, 0, sizeof(ascii)); } fprintf(fp, " %02x", b[i]); str[0] = isalnum(b[i]) ? b[i] : '.'; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } /* print trailing up to a 16 byte boundary. */ for (; i < ((size + 0xf) & ~0xf); i++) { fprintf(fp, " "); str[0] = ' '; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } fprintf(fp, "\n"); } void ddcb_debug(int verbosity) { libddcb_verbose = verbosity; } void ddcb_set_logfile(FILE *fd_out) { libddcb_fd_out = fd_out; } accel_t accel_open(int card_no, unsigned int card_type, unsigned int mode, int *err_code, uint64_t appl_id, uint64_t appl_id_mask) { int rc = DDCB_OK; struct card_dev_t *card; struct ddcb_accel_funcs *accel; uint64_t s = 0, e = 0; if (ddcb_gather_statistics()) s = get_usec(); card = calloc(1, sizeof(*card)); if (card == NULL) { rc = DDCB_ERR_ENOMEM; goto err_out; } accel = find_accelerator(card_type); if (accel == NULL) { rc = DDCB_ERR_ENOENT; goto err_free; } card->card_no = card_no; card->card_type = card_type; card->mode = mode; card->accel = accel; if (card->accel->card_open == NULL) { rc = DDCB_ERR_NOTIMPL; goto err_free; } card->card_data = card->accel->card_open(card_no, mode, &card->card_rc, appl_id, appl_id_mask); if (card->card_data == NULL) { rc = DDCB_ERR_CARD; goto err_free; } if (err_code) *err_code = DDCB_OK; if (ddcb_gather_statistics()) { e = get_usec(); pthread_mutex_lock(&accel->slock); accel->num_open++; accel->time_open += (e - s); pthread_mutex_unlock(&accel->slock); } return card; err_free: free(card); err_out: if (err_code) *err_code = rc; return NULL; } int accel_close(accel_t card) { int rc; struct ddcb_accel_funcs *accel; uint64_t s = 0, e = 0; if (card == NULL) return DDCB_ERR_INVAL; accel = card->accel; if (ddcb_gather_statistics()) s = get_usec(); if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_close == NULL) return DDCB_ERR_NOTIMPL; rc = accel->card_close(card->card_data); free(card); if (ddcb_gather_statistics()) { e = get_usec(); pthread_mutex_lock(&accel->slock); accel->num_close++; accel->time_close += (e - s); pthread_mutex_unlock(&accel->slock); } return rc; } const char *accel_strerror(accel_t card, int card_rc) { struct ddcb_accel_funcs *accel; if (card == NULL) return "invalid accelerator"; accel = card->accel; if (accel == NULL) return "invalid accelerator"; if (accel->card_strerror == NULL) return NULL; return accel->card_strerror(card->card_data, card_rc); } int accel_ddcb_execute(accel_t card, struct ddcb_cmd *req, int *card_rc, int *card_errno) { struct ddcb_accel_funcs *accel = card->accel; uint64_t s = 0, e = 0; if (ddcb_gather_statistics()) s = get_usec(); if (accel == NULL) return DDCB_ERR_INVAL; if (accel->ddcb_execute == NULL) return DDCB_ERR_NOTIMPL; card->card_rc = accel->ddcb_execute(card->card_data, req); card->card_errno = errno; if (card_rc != NULL) *card_rc = card->card_rc; if (card_errno != NULL) *card_errno = card->card_errno; if (card->card_rc < 0) return DDCB_ERR_CARD; if (ddcb_gather_statistics()) { e = get_usec(); pthread_mutex_lock(&accel->slock); accel->num_execute++; accel->time_execute += (e - s); pthread_mutex_unlock(&accel->slock); } return DDCB_OK; } uint64_t accel_read_reg64(accel_t card, uint32_t offs, int *card_rc) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) { if (card_rc != NULL) *card_rc = DDCB_ERR_INVAL; return 0; } if (accel->card_read_reg64 == NULL) { if (card_rc != NULL) *card_rc = DDCB_ERR_NOTIMPL; return 0; } return accel->card_read_reg64(card->card_data, offs, card_rc); } uint32_t accel_read_reg32(accel_t card, uint32_t offs, int *card_rc) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) { if (card_rc != NULL) *card_rc = DDCB_ERR_INVAL; return 0; } if (accel->card_read_reg32 == NULL) { if (card_rc != NULL) *card_rc = DDCB_ERR_NOTIMPL; return 0; } return accel->card_read_reg32(card->card_data, offs, card_rc); } int accel_write_reg64(accel_t card, uint32_t offs, uint64_t val) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_write_reg64 == NULL) return DDCB_ERR_NOTIMPL; return accel->card_write_reg64(card->card_data, offs, val); } int accel_write_reg32(accel_t card, uint32_t offs, uint32_t val) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_write_reg32 == NULL) return DDCB_ERR_NOTIMPL; return accel->card_write_reg32(card->card_data, offs, val); } uint64_t accel_get_app_id(accel_t card) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return 0; if (accel->card_get_app_id == NULL) return 0; return accel->card_get_app_id(card->card_data); } uint64_t accel_get_queue_work_time(accel_t card) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return 0; if (accel->card_get_queue_work_time == NULL) return 0; return accel->card_get_queue_work_time(card->card_data); } uint64_t accel_get_frequency(accel_t card) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return 0; if (accel->card_get_frequency == NULL) return 0; return accel->card_get_frequency(card->card_data); } void accel_dump_hardware_version(accel_t card, FILE *fp) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return; if (accel->card_dump_hardware_version == NULL) return; return accel->card_dump_hardware_version(card->card_data, fp); } int accel_pin_memory(accel_t card, const void *addr, size_t size, int dir) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_write_reg32 == NULL) return DDCB_ERR_NOTIMPL; return accel->card_pin_memory(card->card_data, addr, size, dir); } int accel_unpin_memory(accel_t card __attribute__((unused)), const void *addr __attribute__((unused)), size_t size __attribute__((unused))) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_unpin_memory == NULL) return DDCB_ERR_NOTIMPL; return accel->card_unpin_memory(card->card_data, addr, size); } void *accel_malloc(accel_t card, size_t size) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return NULL; if (accel->card_malloc == NULL) return NULL; return accel->card_malloc(card->card_data, size); } int accel_free(accel_t card, void *ptr, size_t size) { struct ddcb_accel_funcs *accel = card->accel; if (accel == NULL) return DDCB_ERR_INVAL; if (accel->card_free == NULL) return DDCB_ERR_NOTIMPL; return accel->card_free(card->card_data, ptr, size); } int accel_dump_statistics(struct ddcb_accel_funcs *accel, FILE *fp) { if (accel == NULL) return DDCB_ERR_INVAL; if (accel->dump_statistics == NULL) return DDCB_ERR_NOTIMPL; return accel->dump_statistics(fp); } int ddcb_register_accelerator(struct ddcb_accel_funcs *accel) { int rc; if (accel == NULL) return DDCB_ERR_INVAL; if ddcb_gather_statistics() { rc = pthread_mutex_init(&accel->slock, NULL); if (rc != 0) return DDCB_ERRNO; } accel->priv_data = accel_list; accel_list = accel; return DDCB_OK; } static void _init(void) __attribute__((constructor)); static void _init(void) { const char *ddcb_trace_env = getenv("DDCB_TRACE"); libddcb_fd_out = stderr; /* Default fd out for messages */ if (ddcb_trace_env != NULL) ddcb_trace = strtol(ddcb_trace_env, (char **)NULL, 0); } static void _done(void) __attribute__((destructor)); static void _done(void) { struct ddcb_accel_funcs *accel; for (accel = accel_list; accel != NULL; accel = accel->priv_data) { if (accel->num_open == 0) continue; if (ddcb_gather_statistics()) { fprintf(libddcb_fd_out, "libddcb statistics for %s\n" " open ; %5lld ; %8lld usec\n" " execute ; %5lld ; %8lld usec\n" " close ; %5lld ; %8lld usec\n", accel->card_name, (long long)accel->num_open, (long long)accel->time_open, (long long)accel->num_execute, (long long)accel->time_execute, (long long)accel->num_close, (long long)accel->time_close); pthread_mutex_destroy(&accel->slock); } accel_dump_statistics(accel, libddcb_fd_out); } return; } genwqe-user-4.0.18/lib/libzADC.map000066400000000000000000000022501303345043000165550ustar00rootroot00000000000000ZLIB_1.2.0 { global: compressBound; deflateBound; inflateBack; inflateBackEnd; inflateBackInit_; inflateCopy; local: deflate_copyright; inflate_copyright; inflate_fast; inflate_table; zcalloc; zcfree; z_errmsg; gz_error; gz_intmax; _*; }; ZLIB_1.2.0.2 { gzclearerr; gzungetc; zlibCompileFlags; } ZLIB_1.2.0; ZLIB_1.2.0.8 { deflatePrime; } ZLIB_1.2.0.2; ZLIB_1.2.2 { adler32_combine; crc32_combine; deflateSetHeader; inflateGetHeader; } ZLIB_1.2.0.8; ZLIB_1.2.2.3 { deflateTune; gzdirect; } ZLIB_1.2.2; ZLIB_1.2.2.4 { inflatePrime; } ZLIB_1.2.2.3; ZLIB_1.2.3.3 { adler32_combine64; crc32_combine64; gzopen64; gzseek64; gztell64; inflateUndermine; } ZLIB_1.2.2.4; ZLIB_1.2.3.4 { inflateReset2; inflateMark; } ZLIB_1.2.3.3; ZLIB_1.2.3.5 { gzbuffer; gzoffset; gzoffset64; gzclose_r; gzclose_w; } ZLIB_1.2.3.4; ZLIB_1.2.5.1 { deflatePending; } ZLIB_1.2.3.5; ZLIB_1.2.5.2 { deflateResetKeep; gzgetc_; inflateResetKeep; } ZLIB_1.2.5.1; ZLIB_1.2.7.1 { inflateGetDictionary; gzvprintf; } ZLIB_1.2.5.2; genwqe-user-4.0.18/lib/libzHW.c000066400000000000000000000441111303345043000161530ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @brief De/Compression supporting RFC1950, RFC1951 and RFC1952. * * IBM Accelerator Family 'GenWQE' */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hw_defs.h" FILE *zedc_log = NULL; int zedc_dbg = 0; /* lookup table for error text messages */ struct err_lookup { int num; /* error number */ const char *str; /* corresponding error string */ }; static struct err_lookup zedc_errlist[] = { { ZEDC_OK, "success" }, { ZEDC_ERRNO, "system error, please see errno" }, { ZEDC_STREAM_ERROR, "stream state was inconsistent (for example " "if next_in or next_out was NULL)" }, { ZEDC_DATA_ERROR, "invalid or incomplete inflate/deflate data" }, { ZEDC_MEM_ERROR, "out of memory" }, { ZEDC_BUF_ERROR, "no progress is possible (for example avail_in or " "avail_out was zero)" }, { ZEDC_ERR_CARD, "problem with the accelerator card detected, please " "see errno, carderr and returned data" }, { ZEDC_ERR_INVAL, "invalid parameter" }, { ZEDC_ERR_RETLEN, "returned invalid length" }, { ZEDC_ERR_RETOBITS, "hardware returned invalid output bytes" }, { ZEDC_ERR_TREE_OVERRUN, "hardware too many tree bits" }, { ZEDC_ERR_ZLIB_HDR, "illegal zlib header found" }, { ZEDC_ERR_ADLER32, "adler32 mismatch" }, { ZEDC_ERR_GZIP_HDR, "illegal gzip header found" }, { ZEDC_ERR_CRC32, "crc32 mismatch" }, { ZEDC_ERR_UNSUPPORTED, "currently unsupported function" }, { ZEDC_ERR_DICT_OVERRUN, "dictionary overrun" }, { ZEDC_ERR_INP_MISSING, "further input data missing" }, { ZEDC_ERR_ILLEGAL_APPID, "illegal application id" }, }; static int zedc_nerr = ARRAY_SIZE(zedc_errlist); const char *zedc_Version(void) { return GIT_VERSION; } void zedc_set_logfile(FILE *logfile) { zedc_log = logfile; } int zedc_clearerr(zedc_handle_t zedc) { if (!zedc) return ZEDC_ERR_INVAL; zedc->zedc_rc = 0; zedc->card_rc = 0; return ZEDC_OK; } /** * @brief provide error message for a corresponding error number * @param errnum error number * * @return pointer to error text message */ const char *zedc_strerror(int errnum) { int i; i = 0; while (i < zedc_nerr) { if (errnum == zedc_errlist[i].num) return zedc_errlist[i].str; i++; } return "unknown"; } int zedc_carderr(zedc_handle_t zedc) { if (!zedc) return ZEDC_ERR_INVAL; return zedc->card_rc; } int zedc_liberr(zedc_handle_t zedc) { if (!zedc) return ZEDC_ERR_INVAL; return zedc->zedc_rc; } struct ddcb_cmd *zedc_last_cmd(struct zedc_stream_s *strm) { if (!strm) return NULL; return &strm->cmd; } /** * @brief Print final compression/decompression status * FIXME Printing to stdout is normally not allowed! See zpipe use-case! * FIXME I think this should be removed from the library if possible. */ int zedc_pstatus(struct zedc_stream_s *strm, const char *task) { int c; c = fprintf(stdout, "%s finished (avail_in=%d avail_out=%d total_in=%ld " "total_out=%ld)\n", task, strm->avail_in, strm->avail_out, strm->total_in, strm->total_out); switch (strm->format) { case ZEDC_FORMAT_GZIP: c += fprintf(stdout, " GZIP CRC32=0x%08x (eval=0x%08x)\n", strm->file_crc32, strm->crc32); c += fprintf(stdout, " GZIP ISIZE=0x%x (%u)\n", strm->file_size, strm->file_size); break; case ZEDC_FORMAT_ZLIB: c += fprintf(stdout, " ZLIB ADLER32=0x%08x (eval=0x%08x)\n", strm->file_adler32, strm->adler32); break; default: break; } return c; } /** * @brief enable or disable debug outputs from library. * pr_info is activated or disabled * @param onoff if 0 -> no outputs */ void zedc_lib_debug(int onoff) { zedc_dbg = onoff; } /** * @brief print 'Application Specific Invariant' part of inflate DDCB * * @param asiv pointer to data area * */ void zedc_asiv_infl_print(zedc_streamp strm) { struct ddcb_cmd *cmd = &strm->cmd; struct zedc_wsp *wsp = strm->wsp; struct zedc_asiv_infl *asiv = (struct zedc_asiv_infl *)cmd->asiv; uint32_t out_buff_len = __be32_to_cpu(asiv->out_buff_len); uint32_t in_buff_len = __be32_to_cpu(asiv->in_buff_len); uint16_t in_hdr_bits = __be16_to_cpu(asiv->in_hdr_bits); pr_info("Inflate ASIV (sent):\n" " [20] IN_BUFF = 0x%llx\n" " [28] IN_BUFF_LEN = 0x%x (%d)\n" " [30] OUT_BUFF = 0x%llx\n" " [38] OUT_BUFF_LEN = 0x%x (%d)\n" " [40] IN_DICT = 0x%llx\n" " [60] IN_DICT_LEN = 0x%x (%d)\n" " [40] OUT_DICT = 0x%llx\n" " [60] OUT_DICT_LEN = 0x%x (%d)\n" " [50] IN_HDR_SCRATCH = 0x%llx\n" " [58] IN_SCRATCH_LEN = 0x%x (%u)\n" " [5c] IN_HDR_BITS = %u (%u bytes + %u bits)\n" " [5e] IN_HDR_IB = %u\n" " [5e] SCRATCH_IB = %u\n" " [2c] IN_CRC32 = 0x%08x\n" " [3c] IN_ADLER32 = 0x%08x\n", (long long)__be64_to_cpu(asiv->in_buff), in_buff_len, in_buff_len, (long long)__be64_to_cpu(asiv->out_buff), out_buff_len, out_buff_len, (long long)__be64_to_cpu(asiv->in_dict), __be32_to_cpu(asiv->in_dict_len), __be32_to_cpu(asiv->in_dict_len), (long long)__be64_to_cpu(asiv->out_dict), __be32_to_cpu(asiv->out_dict_len), __be32_to_cpu(asiv->out_dict_len), (long long)__be64_to_cpu(asiv->inp_scratch), __be32_to_cpu(asiv->in_scratch_len), __be32_to_cpu(asiv->in_scratch_len), in_hdr_bits, in_hdr_bits / 8, in_hdr_bits % 8, asiv->hdr_ib, asiv->scratch_ib, __be32_to_cpu(asiv->in_crc32), __be32_to_cpu(asiv->in_adler32)); pr_info("\n" " ATS = 0x%08llx\n" " CMD = 0x%02x\n" " CMDOPTS = 0x%02x\n", (long long)cmd->ats, cmd->cmd, cmd->cmdopts); if (zedc_dbg > 3) { pr_info("Workspace/Dict0:\n"); ddcb_hexdump(zedc_log, wsp->dict[0], ZEDC_DICT_LEN); pr_info("Workspace/Dict1:\n"); ddcb_hexdump(zedc_log, wsp->dict[1], ZEDC_DICT_LEN); pr_info("Workspace/Tree:\n"); ddcb_hexdump(zedc_log, wsp->tree, ZEDC_TREE_LEN); } } /** * @brief print 'Application Specific Invariant' part of deflate DDCB * * @param asiv pointer to data area * */ void zedc_asiv_defl_print(zedc_streamp strm, int dbg) { struct ddcb_cmd *cmd = &strm->cmd; struct zedc_asiv_defl *asiv = (struct zedc_asiv_defl *)cmd->asiv; uint32_t out_buff_len = __be32_to_cpu(asiv->out_buff_len); uint32_t in_buff_len = __be32_to_cpu(asiv->in_buff_len); pr_log(dbg, "Deflate ASIV (sent):\n" " [20] IN_BUFF = 0x%llx\n" " [28] IN_BUFF_LEN = 0x%x (%d)\n" " [2c] IN_CRC32 = 0x%08x\n" " [30] OUT_BUFF = 0x%llx\n" " [38] OUT_BUFF_LEN = 0x%x (%d)\n" " [3c] IN_ADLER32 = 0x%08x\n" " [40] IN_DICT = 0x%llx\n" " [48] IN_DICT_LEN = 0x%x (%d)\n" " [60] OUT_DICT = 0x%llx\n" " [68] OUT_DICT_LEN = 0x%x (%d)\n" " [7f] INUMBITS = 0x%x\n", (long long)__be64_to_cpu(asiv->in_buff), in_buff_len, in_buff_len, __be32_to_cpu(asiv->in_crc32), (long long)__be64_to_cpu(asiv->out_buff), out_buff_len, out_buff_len, __be32_to_cpu(asiv->in_adler32), (long long)__be64_to_cpu(asiv->in_dict), __be32_to_cpu(asiv->in_dict_len), __be32_to_cpu(asiv->in_dict_len), (long long)__be64_to_cpu(asiv->out_dict), __be32_to_cpu(asiv->out_dict_len), __be32_to_cpu(asiv->out_dict_len), asiv->inumbits); pr_log(dbg, "\n" " ATS = 0x%08llx\n" " CMD = 0x%02x\n" " CMDOPTS = 0x%02x\n", (long long)cmd->ats, cmd->cmd, cmd->cmdopts); pr_log(dbg, " [7f] IBITS: %02x %02x %02x %02x %02x %02x %02x\n", asiv->ibits[0], asiv->ibits[1], asiv->ibits[2], asiv->ibits[3], asiv->ibits[4], asiv->ibits[5], asiv->ibits[6]); } /** * @brief print 'Application Specific Variant' part of deflate DDCB * @param asv pointer to data area */ void zedc_asv_defl_print(zedc_streamp strm, int dbg) { struct ddcb_cmd *cmd = &strm->cmd; struct zedc_asv_defl *asv = (struct zedc_asv_defl *)cmd->asv; uint32_t inp_processed = __be32_to_cpu(asv->inp_processed); uint32_t outp_returned = __be32_to_cpu(asv->outp_returned); pr_log(dbg, "Deflate ASV (received):\n" " [80] OUT_DICT_USED = 0x%x (%d)\n" " [87] ONUMBITS = 0x%x (%u)\n" " [90] OUT_CRC32 = 0x%08x\n" " [94] OUT_ADLER32 = 0x%08x\n" " [98] INP_PROCESSED = 0x%x (%d)\n" " [9c] OUTP_RETURNED = 0x%x (%d)\n" " [b8] OUT_DICT_OFFS = 0x%x (%d)\n", __be16_to_cpu(asv->out_dict_used), __be16_to_cpu(asv->out_dict_used), asv->onumbits, asv->onumbits, __be32_to_cpu(asv->out_crc32), __be32_to_cpu(asv->out_adler32), inp_processed, inp_processed, outp_returned, outp_returned, asv->out_dict_offs, asv->out_dict_offs); pr_log(dbg, "\n" " ATS = 0x%08llx\n" " CMD = 0x%02x\n" " CMDOPTS = 0x%02x\n", (long long)cmd->ats, cmd->cmd, cmd->cmdopts); if (dbg) { pr_log(dbg, " OBITS:\n"); ddcb_hexdump(zedc_log, asv->obits, ZEDC_ONUMBYTES_v1); pr_log(dbg, " OBITS_EXTRA:\n"); ddcb_hexdump(zedc_log, asv->obits_extra, ZEDC_ONUMBYTES_EXTRA); } } /** * @brief print 'Application Specific Variant' part of inflate DDCB * @param asv pointer to data area */ void zedc_asv_infl_print(zedc_streamp strm) { struct ddcb_cmd *cmd = &strm->cmd; struct zedc_wsp *wsp = strm->wsp; struct zedc_asv_infl *asv = (struct zedc_asv_infl *)cmd->asv; uint32_t inp_processed = __be32_to_cpu(asv->inp_processed); uint32_t outp_returned = __be32_to_cpu(asv->outp_returned); uint16_t hdr_bits = __be16_to_cpu(asv->out_hdr_bits); pr_info("Inflate ASV (received):\n" " [80] OUT_DICT_USED = 0x%x (%u)\n" " [82] COPYBLOCK_LEN = 0x%x (%u)\n" " [85] INFL_STAT = 0x%x\n" " [87] PROC_BITS = 0x%x\n" " [88] HDR_START = 0x%x\n" " [8d] HDR_START_BITS = 0x%x\n" " [8e] OUT_HDR_BITS = 0x%x (%u) (%u bytes + %u bits)\n" " [90] OUT_CRC32 = 0x%08x\n" " [94] OUT_ADLER32 = 0x%08x\n" " [98] INP_PROCESSED = 0x%x (%u)\n" " [9c] OUTP_RETURNED = 0x%x (%u)\n" " [b8] OUT_DICT_OFFS = 0x%x (%u)\n" " [b8] OBYTES_IN_DICT = 0x%x (%u)\n", __be16_to_cpu(asv->out_dict_used), __be16_to_cpu(asv->out_dict_used), __be16_to_cpu(asv->copyblock_len), __be16_to_cpu(asv->copyblock_len), asv->infl_stat, asv->proc_bits, __be32_to_cpu(asv->hdr_start), asv->hdr_start_bits, hdr_bits, hdr_bits, hdr_bits / 8, hdr_bits % 8, __be32_to_cpu(asv->out_crc32), __be32_to_cpu(asv->out_adler32), inp_processed, inp_processed, outp_returned, outp_returned, asv->out_dict_offs, asv->out_dict_offs, __be16_to_cpu(asv->obytes_in_dict), __be16_to_cpu(asv->obytes_in_dict)); pr_info("\n" " ATS = 0x%08llx\n" " CMD = 0x%02x\n" " CMDOPTS = 0x%02x\n", (long long)cmd->ats, cmd->cmd, cmd->cmdopts); if (zedc_dbg > 3) { pr_info("Workspace/Dict0:\n"); ddcb_hexdump(zedc_log, wsp->dict[0], ZEDC_DICT_LEN); pr_info("Workspace/Dict1:\n"); ddcb_hexdump(zedc_log, wsp->dict[1], ZEDC_DICT_LEN); pr_info("Workspace/Tree:\n"); ddcb_hexdump(zedc_log, wsp->tree, ZEDC_TREE_LEN); } } /**************************************************************************** * ZEDC Compression/Decompression device support ***************************************************************************/ void zedc_overwrite_slu_id(zedc_handle_t zedc __attribute__((unused)), uint64_t slu_id __attribute__((unused))) { /* FIXME disable for now */ /* card_overwrite_slu_id(zedc->card, slu_id); */ } void zedc_overwrite_app_id(zedc_handle_t zedc __attribute__((unused)), uint64_t app_id __attribute__((unused))) { /* FIXME disable for now */ /* card_overwrite_app_id(zedc->card, app_id); */ } /** * @brief initialization of the ZEDC library. * allocates and presets required memory, sets version * numbers and opens a zedc device. * @param dev_no card number * @param mode SIGIO mode * @param err_code pointer to error code (return) * * @return 0 if success */ zedc_handle_t zedc_open(int dev_no, int dev_type, int mode, int *err_code) { char *env; zedc_handle_t zedc; uint64_t app_id = DDCB_APPL_ID_GZIP; uint64_t app_id_mask = DDCB_APPL_ID_MASK; zedc = malloc(sizeof(*zedc)); if (!zedc) { *err_code = ZEDC_ERRNO; return NULL; } memset(zedc, 0, sizeof(*zedc)); zedc->mode = mode; /* Check Appl id GZIP Version 2 */ if (dev_no == ACCEL_REDUNDANT) { app_id = DDCB_APPL_ID_GZIP2; app_id_mask = DDCB_APPL_ID_MASK_VER; } /* Check Appl id GZIP Version 2 */ zedc->card = accel_open(dev_no, dev_type, mode, &zedc->card_rc, app_id, app_id_mask); if (zedc->card == NULL) { *err_code = ZEDC_ERR_CARD; goto free_zedc; } zedc->card_rc = 0; /* FIXME */ env = getenv("DDCB_DEBUG"); if (env) zedc_dbg = atoi(env); *err_code = 0; return zedc; free_zedc: free(zedc); return NULL; } /** * @brief manage execution of an inflate or a deflate job * @param zedc ZEDC device handle * @param cmd pointer to command descriptor */ int zedc_execute_request(zedc_handle_t zedc, struct ddcb_cmd *cmd) { int rc = accel_ddcb_execute(zedc->card, cmd, &zedc->card_rc, &zedc->card_errno); pr_info(" DDCB returned rc=%d card_rc=%d " "(RETC=%03x ATTN=%04x PROGR=%x) %s\n", rc, zedc->card_rc, cmd->retc, cmd->attn, cmd->progress, cmd->retc == 0x102 ? "" : "ERR"); return rc; } /** * @brief end ZEDC library accesses close all open files, free memory * @param zedc pointer to the opened device descriptor * @return ZEDC_OK if everything is ok. */ int zedc_close(zedc_handle_t zedc) { if (!zedc) return ZEDC_ERR_INVAL; accel_close(zedc->card); free(zedc); return ZEDC_OK; } /** * @brief Memory allocation for compression/decompression buffers. */ void *zedc_memalign(zedc_handle_t zedc, size_t size, enum zedc_mtype mtype) { void *ptr; unsigned int page_size = sysconf(_SC_PAGESIZE); if (!zedc) return NULL; /* normal operation */ if ((mtype & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) { ptr = accel_malloc(zedc->card, size); if (ptr == MAP_FAILED) return NULL; return ptr; } ptr = memalign(page_size, size); if (ptr == MAP_FAILED) return NULL; if (mtype & DDCB_DMA_PIN_MEMORY) { zedc->card_rc = accel_pin_memory(zedc->card, ptr, size, 1); if (zedc->card_rc != DDCB_OK) { free(ptr); return NULL; } } return ptr; } /** * @brief Use driver to free memory. */ int zedc_free(zedc_handle_t zedc, void *ptr, size_t size, enum zedc_mtype mtype) { int rc; if (!zedc) return ZEDC_ERR_INVAL; if (ptr == NULL) return 0; /* normal operation */ if ((mtype & DDCB_DMA_TYPE_MASK) == DDCB_DMA_TYPE_FLAT) { rc = accel_free(zedc->card, ptr, size); if (rc != DDCB_OK) return ZEDC_ERRNO; return 0; } if (mtype & DDCB_DMA_PIN_MEMORY) { zedc->card_rc = accel_unpin_memory(zedc->card, ptr, size); if (zedc->card_rc != DDCB_OK) { free(ptr); return ZEDC_ERR_CARD; } } free(ptr); return 0; } int zedc_pin_memory(zedc_handle_t zedc, const void *addr, size_t size, int dir) { if (!zedc) return ZEDC_ERR_INVAL; zedc->card_rc = accel_pin_memory(zedc->card, addr, size, dir); if (zedc->card_rc != DDCB_OK) return ZEDC_ERR_CARD; return ZEDC_OK; } int zedc_unpin_memory(zedc_handle_t zedc, const void *addr, size_t size) { if (!zedc) return ZEDC_ERR_INVAL; zedc->card_rc = accel_unpin_memory(zedc->card, addr, size); if (zedc->card_rc != DDCB_OK) return ZEDC_ERR_CARD; return ZEDC_OK; } /** * @brief Prepare format specific deflate header when user * calls initializes decompression. * provided window_bits: * -8 ... -15: DEFLATE / RFC1951 (window size 2^8 ... 2^15) * 8 ... 15: ZLIB / RFC1950 (window size 2^8 ... 2^15) * 16 ... 23: GZIP / RFC1952 * 24 ... 31: GZIP/ZLIB AUTOPROBE * FIXME We do not do autoprobing at this point in time. */ int zedc_format_init(struct zedc_stream_s *strm) { if ((strm->windowBits <= -8) && (strm->windowBits >= -15)) { strm->format = ZEDC_FORMAT_DEFL; return ZEDC_OK; } if ((strm->windowBits >= 8) && (strm->windowBits <= 15)) { strm->format = ZEDC_FORMAT_ZLIB; return ZEDC_OK; } if ((strm->windowBits >= 16) && (strm->windowBits <= 23)) { strm->format = ZEDC_FORMAT_GZIP; return ZEDC_OK; } if ((strm->windowBits >= 24) && (strm->windowBits <= 31)) { strm->format = ZEDC_FORMAT_GZIP; return ZEDC_OK; } /* pr_err("window_bits invalid (%d)\n", strm->windowBits); */ return ZEDC_DATA_ERROR; } int zedc_alloc_workspace(zedc_streamp strm) { zedc_handle_t zedc = (zedc_handle_t)strm->device; strm->wsp = zedc_memalign(zedc, sizeof(struct zedc_wsp), strm->dma_type[ZEDC_WS]); if (strm->wsp == NULL) return ZEDC_MEM_ERROR; /* FIXME valgrind complained about this memory piece not being initialized */ memset(strm->wsp, 0, sizeof(struct zedc_wsp)); return ZEDC_OK; } int zedc_free_workspace(zedc_streamp strm) { int rc; zedc_handle_t zedc = (zedc_handle_t)strm->device; rc = zedc_free(zedc, strm->wsp, sizeof(struct zedc_wsp), strm->dma_type[ZEDC_WS]); strm->wsp = NULL; return rc; } #define BASE 65521 /* largest prime smaller than 65536 */ unsigned long __adler32(unsigned long adler, const unsigned char *buf, int len) { unsigned long s1 = adler & 0xffff; unsigned long s2 = (adler >> 16) & 0xffff; int n; for (n = 0; n < len; n++) { s1 = (s1 + buf[n]) % BASE; s2 = (s2 + s1) % BASE; } return (s2 << 16) + s1; } genwqe-user-4.0.18/lib/software.c000066400000000000000000000524741303345043000166210ustar00rootroot00000000000000/* * Copyright 2015, 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define _LARGEFILE64_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include /* older zlibs might not have this */ #ifndef z_off64_t # define z_off64_t off64_t #endif #if defined(CONFIG_DLOPEN_MECHANISM) typedef void * __attribute__ ((__may_alias__)) pvoid_t; #define register_sym(name) \ do { \ dlerror(); /* Clear any existing error */ \ /* sw_trace("loading [%s]\n", #name); */ \ *(pvoid_t *)(&p_##name) = dlsym(handle, #name); \ if ((error = dlerror()) != NULL) { \ sw_trace("%s\n", error); \ /* exit(EXIT_FAILURE); */ \ } \ } while (0) #define check_sym(name, rc) \ do { \ if ((name) == NULL) { \ pr_err("%s not loadable, consider using a " \ "newer libz version.\n", #name); \ return (rc); \ } \ } while (0) static void *handle = NULL; int (* p_deflateInit2_)(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size); int z_deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size) { int rc; check_sym(p_deflateInit2_, Z_STREAM_ERROR); rc = (* p_deflateInit2_)(strm, level, method, windowBits, memLevel, strategy, version, stream_size); return rc; } static int (* p_deflateParams)(z_streamp strm, int level, int strategy); int z_deflateParams(z_streamp strm, int level, int strategy) { check_sym(p_deflateParams, Z_STREAM_ERROR); return (* p_deflateParams)(strm, level, strategy); } static uLong (* p_deflateBound)(z_streamp strm, uLong sourceLen); uLong z_deflateBound(z_streamp strm, uLong sourceLen) { check_sym(p_deflateBound, Z_STREAM_ERROR); return (* p_deflateBound)(strm, sourceLen); } static int (* p_deflateReset)(z_streamp strm); int z_deflateReset(z_streamp strm) { check_sym(p_deflateReset, Z_STREAM_ERROR); return (* p_deflateReset)(strm); } static int (* p_deflateSetDictionary)(z_streamp strm, const Bytef *dictionary, uInt dictLength); int z_deflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength) { check_sym(p_deflateSetDictionary, Z_STREAM_ERROR); return (* p_deflateSetDictionary)(strm, dictionary, dictLength); } static int (* p_deflateSetHeader)(z_streamp strm, gz_headerp head); int z_deflateSetHeader(z_streamp strm, gz_headerp head) { check_sym(p_deflateSetHeader, Z_STREAM_ERROR); return p_deflateSetHeader(strm, head); } static int (* p_deflatePrime)(z_streamp strm, int bits, int value); int z_deflatePrime(z_streamp strm, int bits, int value) { check_sym(p_deflatePrime, Z_STREAM_ERROR); return (* p_deflatePrime)(strm, bits, value); } static int (* p_deflateCopy)(z_streamp dest, z_streamp source); int z_deflateCopy(z_streamp dest, z_streamp source) { check_sym(p_deflateCopy, Z_STREAM_ERROR); return (* p_deflateCopy)(dest, source); } static int (* p_deflate)(z_streamp strm, int flush); int z_deflate(z_streamp strm, int flush) { check_sym(p_deflate, Z_STREAM_ERROR); return (* p_deflate)(strm, flush); } static int (* p_deflateEnd)(z_streamp strm); int z_deflateEnd(z_streamp strm) { check_sym(p_deflateEnd, Z_STREAM_ERROR); return (* p_deflateEnd)(strm); } static int (* p_inflateInit2_)(z_streamp strm, int windowBits, const char *version, int stream_size); int z_inflateInit2_(z_streamp strm, int windowBits, const char *version, int stream_size) { int rc; check_sym(p_inflateInit2_, Z_STREAM_ERROR); rc = (* p_inflateInit2_)(strm, windowBits, version, stream_size); return rc; } static int (* p_inflateReset)(z_streamp strm); int z_inflateReset(z_streamp strm) { check_sym(p_inflateReset, Z_STREAM_ERROR); return (* p_inflateReset)(strm); } static int (* p_inflateReset2)(z_streamp strm, int windowBits); int z_inflateReset2(z_streamp strm, int windowBits) { check_sym(p_inflateReset2, Z_STREAM_ERROR); return (* p_inflateReset2)(strm, windowBits); } static int (* p_inflateSetDictionary)(z_streamp strm, const Bytef *dictionary, uInt dictLength); int z_inflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength) { check_sym(p_inflateSetDictionary, Z_STREAM_ERROR); return (* p_inflateSetDictionary)(strm, dictionary, dictLength); } /** * No warning in this case since we try to emulate this in the * functions above. */ static int (* p_inflateGetDictionary)(z_streamp strm, const Bytef *dictionary, uInt *dictLength); int z_inflateGetDictionary(z_streamp strm, const Bytef *dictionary, uInt *dictLength) { if (p_inflateGetDictionary == NULL) return Z_STREAM_ERROR; return (* p_inflateGetDictionary)(strm, dictionary, dictLength); } bool z_hasGetDictionary(void) { return (p_inflateGetDictionary != NULL); } static int (* p_inflateGetHeader)(z_streamp strm, gz_headerp head); int z_inflateGetHeader(z_streamp strm, gz_headerp head) { check_sym(p_inflateGetHeader, Z_STREAM_ERROR); return (* p_inflateGetHeader)(strm, head); } static int (* p_inflatePrime)(z_streamp strm, int bits, int value); int z_inflatePrime(z_streamp strm, int bits, int value) { check_sym(p_inflatePrime, Z_STREAM_ERROR); return (* p_inflatePrime)(strm, bits, value); } static int (* p_inflateSync)(z_streamp strm); int z_inflateSync(z_streamp strm) { check_sym(p_inflateSync, Z_STREAM_ERROR); return (* p_inflateSync)(strm); } static int (* p_inflate)(z_streamp strm, int flush); int z_inflate(z_streamp strm, int flush) { check_sym(p_inflate, Z_STREAM_ERROR); return (* p_inflate)(strm, flush); } static int (* p_inflateEnd)(z_streamp strm); int z_inflateEnd(z_streamp strm) { check_sym(p_inflateEnd, Z_STREAM_ERROR); return (* p_inflateEnd)(strm); } static int (* p_inflateBackInit_)(z_streamp strm, int windowBits, unsigned char *window, const char *version, int stream_size); int z_inflateBackInit_(z_streamp strm, int windowBits, unsigned char *window, const char *version, int stream_size) { check_sym(p_inflateBackInit_, Z_STREAM_ERROR); return (* p_inflateBackInit_)(strm, windowBits, window, version, stream_size); } static int (* p_inflateBack)(z_streamp strm, in_func in, void *in_desc, out_func out, void *out_desc); int z_inflateBack(z_streamp strm, in_func in, void *in_desc, out_func out, void *out_desc) { check_sym(p_inflateBack, Z_STREAM_ERROR); return (* p_inflateBack)(strm, in, in_desc, out, out_desc); } static int (* p_inflateBackEnd)(z_streamp strm); int z_inflateBackEnd(z_streamp strm) { check_sym(p_inflateBackEnd, Z_STREAM_ERROR); return (* p_inflateBackEnd)(strm); } static uLong (* p_adler32)(uLong adler, const Bytef *buf, uInt len); uLong z_adler32(uLong adler, const Bytef *buf, uInt len) { check_sym(p_adler32, Z_STREAM_ERROR); return (* p_adler32)(adler, buf, len); } static uLong (* p_adler32_combine)(uLong adler1, uLong adler2, z_off_t len2); uLong z_adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { check_sym(p_adler32_combine, Z_STREAM_ERROR); return (* p_adler32_combine)(adler1, adler2, len2); } static uLong (* p_crc32)(uLong crc, const Bytef *buf, uInt len); uLong z_crc32(uLong crc, const Bytef *buf, uInt len) { check_sym(p_crc32, Z_STREAM_ERROR); return (* p_crc32)(crc, buf, len); } static uLong (* p_crc32_combine)(uLong crc1, uLong crc2, z_off_t len2); uLong z_crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { check_sym(p_crc32_combine, Z_STREAM_ERROR); return (* p_crc32_combine)(crc1, crc2, len2); } static const char *(* p_zError)(int err); const char *z_zError(int err) { check_sym(p_zError, NULL); return (* p_zError)(err); } static uLong (* p_zlibCompileFlags)(void); uLong z_zlibCompileFlags(void) { return p_zlibCompileFlags(); } static const char * (* p_zlibVersion)(void); const char *z_zlibVersion(void) { check_sym(p_zlibVersion, NULL); return (* p_zlibVersion)(); } static gzFile (* p_gzopen)(const char *path, const char *mode); gzFile gzopen(const char *path, const char *mode) { zlib_stats_inc(&zlib_stats.gzopen); check_sym(p_gzopen, NULL); return (* p_gzopen)(path, mode); } static gzFile (* p_gzdopen)(int fd, const char *mode); gzFile gzdopen(int fd, const char *mode) { zlib_stats_inc(&zlib_stats.gzdopen); check_sym(p_gzdopen, NULL); return (* p_gzdopen)(fd, mode); } static int (* p_gzwrite)(gzFile file, voidpc buf, unsigned len); int gzwrite(gzFile file, voidpc buf, unsigned len) { zlib_stats_inc(&zlib_stats.gzwrite); check_sym(p_gzwrite, -1); return (* p_gzwrite)(file, buf, len); } static int (* p_gzread)(gzFile file, voidp buf, unsigned len); int gzread(gzFile file, voidp buf, unsigned len) { zlib_stats_inc(&zlib_stats.gzread); check_sym(p_gzread, -1); return (* p_gzread)(file, buf, len); } static int (* p_gzclose)(gzFile file); int gzclose(gzFile file) { zlib_stats_inc(&zlib_stats.gzclose); check_sym(p_gzread, Z_STREAM_ERROR); return (* p_gzclose)(file); } static int (* p_gzungetc)(int c, gzFile file); int gzungetc(int c, gzFile file) { zlib_stats_inc(&zlib_stats.gzungetc); check_sym(p_gzungetc, -1); return (* p_gzungetc)(c, file); } static int (* p_gzflush)(gzFile file, int flush); int gzflush(gzFile file, int flush) { zlib_stats_inc(&zlib_stats.gzflush); check_sym(p_gzflush, Z_STREAM_ERROR); return (* p_gzflush)(file, flush); } static int (* p_gzeof)(gzFile file); int gzeof(gzFile file) { zlib_stats_inc(&zlib_stats.gzeof); check_sym(p_gzeof, 0); return (* p_gzeof)(file); } static z_off_t (* p_gztell)(gzFile file); z_off_t gztell(gzFile file) { zlib_stats_inc(&zlib_stats.gztell); check_sym(p_gztell, -1ll); return (* p_gztell)(file); } static const char * (* p_gzerror)(gzFile file, int *errnum); const char *gzerror(gzFile file, int *errnum) { zlib_stats_inc(&zlib_stats.gzerror); check_sym(p_gzerror, NULL); return (* p_gzerror)(file, errnum); } static z_off_t (* p_gzseek)(gzFile file, z_off_t offset, int whence); z_off_t gzseek(gzFile file, z_off_t offset, int whence) { zlib_stats_inc(&zlib_stats.gzseek); check_sym(p_gzseek, -1ll); return (* p_gzseek)(file, offset, whence); } static int (* p_gzrewind)(gzFile file); int gzrewind(gzFile file) { zlib_stats_inc(&zlib_stats.gzrewind); check_sym(p_gzrewind, -1); return (* p_gzrewind)(file); } static char * (* p_gzgets)(gzFile file, char *buf, int len); char * gzgets(gzFile file, char *buf, int len) { zlib_stats_inc(&zlib_stats.gzgets); check_sym(p_gzgets, NULL); return (* p_gzgets)(file, buf, len); } static int (* p_gzputc)(gzFile file, int c); int gzputc(gzFile file, int c) { zlib_stats_inc(&zlib_stats.gzputc); check_sym(p_gzputc, -1); return (* p_gzputc)(file, c); } /*FIXME gzgetc is potentially a macro ... */ static int (* p_gzgetc)(gzFile file); #undef gzgetc int gzgetc(gzFile file) { zlib_stats_inc(&zlib_stats.gzgetc); check_sym(p_gzgetc, -1); return (* p_gzgetc)(file); } static int (* p_gzputs)(gzFile file, const char *s); int gzputs(gzFile file, const char *s) { zlib_stats_inc(&zlib_stats.gzputs); check_sym(p_gzputs, -1); return (* p_gzputs)(file, s); } static int (* p_gzprintf)(gzFile file, const char *format, ...); int gzprintf(gzFile file, const char *format, ...) { int count; va_list ap; zlib_stats_inc(&zlib_stats.gzprintf); check_sym(p_gzprintf, -1); va_start(ap, format); count = (* p_gzprintf)(file, format, ap); va_end(ap); return count; } static int (* p_compress)(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); int compress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen) { zlib_stats_inc(&zlib_stats.compress); check_sym(p_compress, Z_STREAM_ERROR); return (* p_compress)(dest, destLen, source, sourceLen); } static int (* p_compress2)(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level); int compress2(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level) { zlib_stats_inc(&zlib_stats.compress2); check_sym(p_compress2, Z_STREAM_ERROR); return (* p_compress2)(dest, destLen, source, sourceLen, level); } static uLong (* p_compressBound)(uLong sourceLen); uLong compressBound(uLong sourceLen) { zlib_stats_inc(&zlib_stats.compressBound); check_sym(p_compressBound, Z_STREAM_ERROR); return (* p_compressBound)(sourceLen); } static int (* p_uncompress)(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen); int uncompress(Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen) { zlib_stats_inc(&zlib_stats.uncompress); check_sym(p_uncompress, Z_STREAM_ERROR); return (* p_uncompress)(dest, destLen, source, sourceLen); } #if ZLIB_VERNUM >= 0x1270 static int (* p_gzbuffer)(gzFile file, unsigned size); int gzbuffer(gzFile file, unsigned size) { zlib_stats_inc(&zlib_stats.gzbuffer); check_sym(p_gzbuffer, -1); return (* p_gzbuffer)(file, size); } static uLong (* p_adler32_combine64)(uLong adler1, uLong adler2, z_off64_t len2); uLong adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { zlib_stats_inc(&zlib_stats.adler32_combine64); check_sym(p_adler32_combine64, Z_STREAM_ERROR); return (* p_adler32_combine64)(adler1, adler2, len2); } static uLong (* p_crc32_combine64)(uLong crc1, uLong crc2, z_off64_t len2); uLong crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { zlib_stats_inc(&zlib_stats.crc32_combine64); check_sym(p_crc32_combine64, Z_STREAM_ERROR); return (* p_crc32_combine64)(crc1, crc2, len2); } static gzFile (* p_gzopen64)(const char *path, const char *mode); gzFile gzopen64(const char *path, const char *mode) { zlib_stats_inc(&zlib_stats.gzopen64); check_sym(p_gzopen64, NULL); return (* p_gzopen64)(path, mode); } static z_off64_t (* p_gztell64)(gzFile file); z_off64_t gztell64(gzFile file) { zlib_stats_inc(&zlib_stats.gztell64); check_sym(p_gztell64, -1ll); return (* p_gztell64)(file); } static z_off_t (* p_gzseek64)(gzFile file, z_off64_t offset, int whence); z_off_t gzseek64(gzFile file, z_off64_t offset, int whence) { zlib_stats_inc(&zlib_stats.gzseek64); check_sym(p_gzseek64, -1ll); return (* p_gzseek64)(file, offset, whence); } static z_off_t (* p_gzoffset)(gzFile file); z_off_t gzoffset(gzFile file) { zlib_stats_inc(&zlib_stats.gzoffset); check_sym(p_gzoffset, -1ll); return (* p_gzoffset)(file); } static z_off64_t (* p_gzoffset64)(gzFile file); z_off_t gzoffset64(gzFile file) { zlib_stats_inc(&zlib_stats.gzoffset64); check_sym(p_gzoffset64, -1ll); return (* p_gzoffset64)(file); } static const z_crc_t *(* p_get_crc_table)(void); const z_crc_t *get_crc_table() { zlib_stats_inc(&zlib_stats.get_crc_table); check_sym(p_get_crc_table, NULL); return (* p_get_crc_table)(); } #endif /** * NOTE: We had different variants trying to find the right libz.so.1 * in our system. Unfortunately this can differ for the various * distributions: * * RHEL7.2: * $ ldconfig -p | grep libz.so.1 | cut -d' ' -f4 | head -n1 * /lib64/libz.so.1 * * Ubuntu 15.10: * $ ldconfig -p | grep libz.so.1 | cut -d' ' -f4 | head -n1 * /lib/powerpc64le-linux-gnu/libz.so.1 * * Intel with RHEL6.7: * $ ldconfig -p | grep libz.so.1 | cut -d' ' -f4 | head -n1 * /lib64/libz.so.1 * * We are setting this via config.mk option and allow the * distributions to overwrite this via rpm spec file. * * NOTE: We tried loading "libz.so.1" without full path, but that * turned out to be dangerous. We tried to load our own lib, which was * leading to endless recursive calls and segfaults as results. */ void zedc_sw_init(void) { char *error; const char *zlib_path = getenv("ZLIB_PATH"); /* User has setup environment variable to find libz.so.1 */ if (zlib_path != NULL) { sw_trace("Loading software zlib \"%s\"\n", zlib_path); dlerror(); handle = dlopen(zlib_path, RTLD_LAZY); if (handle != NULL) goto load_syms; } /* We saw dlopen returning non NULL value in case of passing ""! */ if (strcmp(CONFIG_ZLIB_PATH, "") == 0) { pr_err(" Empty CONFIG_ZLIB_PATH \"%s\"\n", CONFIG_ZLIB_PATH); return; } /* Loading private zlib.so.1 using CONFIG_ZLIB_PATH */ sw_trace("Loading software zlib \"%s\"\n", CONFIG_ZLIB_PATH); dlerror(); handle = dlopen(CONFIG_ZLIB_PATH, RTLD_LAZY); if (handle == NULL) { pr_err(" %s\n", dlerror()); return; } load_syms: register_sym(zlibVersion); sw_trace(" ZLIB_VERSION=%s (header) zlibVersion()=%s (code)\n", ZLIB_VERSION, z_zlibVersion()); if (strcmp(ZLIB_VERSION, z_zlibVersion()) != 0) { pr_err("libz.so.1=%s and zlib.h=%s do not match!\n", z_zlibVersion(), ZLIB_VERSION); return; } register_sym(deflateInit2_); register_sym(deflateParams); register_sym(deflateBound); register_sym(deflateReset); register_sym(deflatePrime); register_sym(deflateCopy); register_sym(deflate); register_sym(deflateSetDictionary); register_sym(deflateSetHeader); register_sym(deflateEnd); register_sym(inflateInit2_); register_sym(inflateSync); register_sym(inflatePrime); register_sym(inflate); register_sym(inflateReset); register_sym(inflateReset2); register_sym(inflateSetDictionary); register_sym(inflateGetDictionary); register_sym(inflateGetHeader); register_sym(inflateEnd); register_sym(inflateBackInit_); register_sym(inflateBack); register_sym(inflateBackEnd); register_sym(gzopen); register_sym(gzdopen); register_sym(gzwrite); register_sym(gzread); register_sym(gzclose); register_sym(gzflush); register_sym(gzungetc); register_sym(gzeof); register_sym(gztell); register_sym(gzerror); register_sym(gzseek); register_sym(gzrewind); register_sym(gzputs); register_sym(gzputc); register_sym(gzgetc); register_sym(gzputs); register_sym(gzprintf); register_sym(compress); register_sym(compress2); register_sym(compressBound); register_sym(uncompress); register_sym(zError); register_sym(zlibCompileFlags); register_sym(adler32); register_sym(adler32_combine); register_sym(crc32); register_sym(crc32_combine); #if ZLIB_VERNUM >= 0x1270 register_sym(gzbuffer); register_sym(gzopen64); register_sym(gzseek64); register_sym(gztell64); register_sym(gzoffset); register_sym(gzoffset64); register_sym(adler32_combine64); register_sym(crc32_combine64); register_sym(get_crc_table); #endif } void zedc_sw_done(void) { if (handle != NULL) { sw_trace("Closing software zlib\n"); dlclose(handle); } } #else /* * Prefixing symbols has nasty side effects. One of them is that libc * symbols get prefixed too. Such that we see z_free and not free * anymore. Let us fix this up here to see if it works in general. */ void *z_malloc(size_t size); void *z_malloc(size_t size) { return malloc(size); } void z_free(void *ptr); void z_free(void *ptr) { free(ptr); } void *z_memcpy(void *dest, const void *src, size_t n); void *z_memcpy(void *dest, const void *src, size_t n) { return memcpy(dest, src, n); } size_t z_strlen(const char *s); size_t z_strlen(const char *s) { return strlen(s); } void *z_memset(void *s, int c, size_t n); void *z_memset(void *s, int c, size_t n) { return memset(s, c, n); } int z_close(int fd); int z_close(int fd) { return close(fd); } int z_open(const char *pathname, int flags, mode_t mode); int z_open(const char *pathname, int flags, mode_t mode) { return open(pathname, flags, mode); } ssize_t z_read(int fd, void *buf, size_t count); ssize_t z_read(int fd, void *buf, size_t count) { return read(fd, buf, count); } ssize_t z_write(int fd, const void *buf, size_t count); ssize_t z_write(int fd, const void *buf, size_t count) { return write(fd, buf, count); } long long z_lseek64(int fd, long long offset, int whence); long long z_lseek64(int fd, long long offset, int whence) { return lseek64(fd, offset, whence); } int z_snprintf(char *str, size_t size, const char *format, ...); int z_snprintf(char *str, size_t size, const char *format, ...) { int rc; va_list ap; va_start(ap, format); rc = snprintf(str, size, format, ap); va_end(ap); return rc; } int z_vsnprintf(char *str, size_t size, const char *format, va_list ap); int z_vsnprintf(char *str, size_t size, const char *format, va_list ap) { int rc; rc = vsnprintf(str, size, format, ap); return rc; } extern int *z___errno_location (void); extern int *z___errno_location (void) { return __errno_location(); } void *z_memchr(const void *s, int c, size_t n); void *z_memchr(const void *s, int c, size_t n) { return memchr(s, c, n); } char *z_strerror(int errnum); char *z_strerror(int errnum) { return strerror(errnum); } void zedc_sw_init(void) { sw_trace("Using z_ prefixed libz.a\n"); sw_trace(" ZLIB_VERSION %s (header version)\n", ZLIB_VERSION); sw_trace(" zlibVersion %s (libz.so version)\n", z_zlibVersion()); if (strcmp(ZLIB_VERSION, z_zlibVersion()) != 0) { pr_err("libz.so %s and zlib.h %s do not match!\n", z_zlibVersion(), ZLIB_VERSION); return; } } void zedc_sw_done(void) { sw_trace("Closing software zlib\n"); } #endif /* CONFIG_DLOPEN_MECHANSIM */ genwqe-user-4.0.18/lib/wrapper.c000066400000000000000000001136261303345043000164440ustar00rootroot00000000000000/* * Copyright 2015, 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include /* standard interface */ #include "libddcb.h" #include "wrapper.h" /* * Functionality to switch between hardware and software zlib * implementations. Enhanced by tracing functionality for debugging * and workload analysis. Hardware performs best with sufficiently * large input and output buffers. * * FIXME setDictionary code needs to be properly tested and reviewed. * Most likely it is not working right and needs to be fixed too. * * The 1st implementation to fallback to SW if the input buffer for * inflate is too small was to delay the h_inflateInit until init was * called. This finally resulted in a solution where in inflateReset I * called inflateEnd, which was causing inflateInit in inflate. This * might call obsolete memory allocations and freeing. Therefore I * gave up this approach and try to do the inflateEnd and new * inflateInit only if the fallback occurs. */ /* * Select default setting for accelerated zlib. Older version used * software as default. Since the library is packaged as extra * libz.so, we assume that users of it like to use hardware as * default. */ #define CONFIG_INFLATE_IMPL (ZLIB_HW_IMPL | ZLIB_FLAG_OMIT_LAST_DICT) #define CONFIG_DEFLATE_IMPL (ZLIB_HW_IMPL | ZLIB_FLAG_OMIT_LAST_DICT) #ifndef DEF_WBITS # define DEF_WBITS MAX_WBITS #endif /* default windowBits for decompression. MAX_WBITS is for compression only */ #if MAX_MEM_LEVEL >= 8 # define DEF_MEM_LEVEL 8 #else # define DEF_MEM_LEVEL MAX_MEM_LEVEL #endif /* default memLevel */ #define ZLIB_MAXDICTLEN (32 * 1024) /* Good values are something like 8KiB or 16KiB */ #define CONFIG_INFLATE_THRESHOLD (16 * 1024) /* 0: disabled */ int zlib_trace = 0x0; /* no trace by default */ FILE *zlib_log = NULL; /* default is stderr, unless overwritten */ int zlib_accelerator = DDCB_TYPE_GENWQE; int zlib_card = -1; /* Using redundant now as default */ unsigned int zlib_inflate_impl = (CONFIG_INFLATE_IMPL & ZLIB_IMPL_MASK); unsigned int zlib_deflate_impl = (CONFIG_DEFLATE_IMPL & ZLIB_IMPL_MASK); unsigned int zlib_inflate_flags = (CONFIG_INFLATE_IMPL & ~ZLIB_IMPL_MASK); unsigned int zlib_deflate_flags = (CONFIG_DEFLATE_IMPL & ~ZLIB_IMPL_MASK); static unsigned int zlib_inflate_threshold = CONFIG_INFLATE_THRESHOLD; pthread_mutex_t zlib_stats_mutex; /* mutex to protect global stats */ struct zlib_stats zlib_stats; /* global statistics */ /** * wrapper internal_state, hw/sw have different view of what * internal_state is. * * NOTE: Since we change the way the software zlib code is invoked, * from statically linking a z_ prefixed version to a version which * tries to load the code va dlopen/dlsym, we have now situations, * where the software libz calls functions like * inflate/deflateReset(2). In those cases the strm->state pointer * does not point to our own struct _internal_state, but to the * software internal state. As temporary or even final circumvention * we add here MAGIC0 and MAGIC1 to figure out the difference. If the * magic numbers are not setup right, we call the software variant. */ #define MAGIC0 0x1122334455667788ull #define MAGIC1 0xaabbccddeeff00aaull struct _internal_state { uint64_t magic0; enum zlib_impl impl; /* hardware or software implementation */ void *priv_data; /* state from level below */ bool allow_switching; /* For delayed inflateInit2() we need to remember parameters */ int level; int method; int windowBits; int memLevel; int strategy; const char *version; int stream_size; gz_headerp gzhead; uint64_t magic1; Bytef *dictionary; /* backlevel support for sw zlib < 1.2.8 */ uInt dictLength; }; static int has_wrapper_state(z_streamp strm) { struct _internal_state *w; if (strm == NULL) return 0; w = (struct _internal_state *)strm->state; if (w == NULL) return 0; return ((w->magic0 == MAGIC0) && (w->magic1 == MAGIC1)); } void zlib_set_accelerator(const char *accel, int card_no) { if (strncmp(accel, "CAPI", 4) == 0) zlib_accelerator = DDCB_TYPE_CAPI; else zlib_accelerator = DDCB_TYPE_GENWQE; zlib_card = card_no; } void zlib_set_inflate_impl(enum zlib_impl impl) { zlib_inflate_impl = impl; } void zlib_set_deflate_impl(enum zlib_impl impl) { zlib_deflate_impl = impl; } /** * str_to_num - Convert string into number and copy with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; else { num = ULLONG_MAX; errno = ERANGE; } return num; } /** * Pretty print libz return codes for tracing. */ const char *ret_to_str(int ret) { switch (ret) { case Z_OK: return "Z_OK"; case Z_STREAM_END: return "Z_STREAM_END"; case Z_NEED_DICT: return "Z_NEED_DICT"; case Z_ERRNO: return "Z_ERRNO"; case Z_STREAM_ERROR: return "Z_STREAM_ERROR"; case Z_DATA_ERROR: return "Z_DATA_ERROR"; case Z_MEM_ERROR: return "Z_MEM_ERROR"; case Z_BUF_ERROR: return "Z_BUF_ERROR"; case Z_VERSION_ERROR: return "Z_BUF_ERROR"; default: return "UNKNOWN"; } } /** * Pretty print flush codes for tracing. */ const char *flush_to_str(int flush) { switch (flush) { case Z_NO_FLUSH: return "Z_NO_FLUSH"; case Z_PARTIAL_FLUSH: return "Z_PARTIAL_FLUSH"; case Z_SYNC_FLUSH: return "Z_SYNC_FLUSH"; case Z_FULL_FLUSH: return "Z_FULL_FLUSH"; case Z_FINISH: return "Z_FINISH"; case Z_BLOCK: return "Z_BLOCK"; #if defined(Z_TREES) /* older zlibs do not have this */ case Z_TREES: return "Z_TREES"; #endif default: return "UNKNOWN"; } } static void _init(void) __attribute__((constructor)); /** * FIXME With the new zlib load mechanism a new problem arose: How do * I prevent us from loading ourselves? */ static void _init(void) { int rc; const char *trace, *inflate_impl, *deflate_impl, *method; const char *zlib_logfile = NULL; char *inflate_threshold; zlib_logfile = getenv("ZLIB_LOGFILE"); if (zlib_logfile != NULL) { zlib_log = fopen(zlib_logfile, "a+"); if (zlib_log == NULL) zlib_log = stderr; } else zlib_log = stderr; trace = getenv("ZLIB_TRACE"); if (trace != NULL) zlib_trace = strtol(trace, (char **)NULL, 0); deflate_impl = getenv("ZLIB_DEFLATE_IMPL"); if (deflate_impl != NULL) { zlib_deflate_impl = strtol(deflate_impl, (char **)NULL, 0); zlib_deflate_flags = zlib_deflate_impl & ~ZLIB_IMPL_MASK; zlib_deflate_impl &= ZLIB_IMPL_MASK; if (zlib_deflate_impl >= ZLIB_MAX_IMPL) zlib_deflate_impl = ZLIB_SW_IMPL; } inflate_impl = getenv("ZLIB_INFLATE_IMPL"); if (inflate_impl != NULL) { zlib_inflate_impl = strtol(inflate_impl, (char **)NULL, 0); zlib_inflate_flags = zlib_inflate_impl & ~ZLIB_IMPL_MASK; zlib_inflate_impl &= ZLIB_IMPL_MASK; if (zlib_inflate_impl >= ZLIB_MAX_IMPL) zlib_inflate_impl = ZLIB_SW_IMPL; } inflate_threshold = getenv("ZLIB_INFLATE_THRESHOLD"); if (inflate_threshold != NULL) zlib_inflate_threshold = str_to_num(inflate_threshold); /* * Do it similar like zOS did it, such that we can share * test-cases and documentation. If _HZC_COMPRESSION_METHOD is * matching the string "software" we enforce software * operation. */ method = getenv("_HZC_COMPRESSION_METHOD"); if ((method != NULL) && (strcmp(method, "software") == 0)) { zlib_inflate_impl = ZLIB_SW_IMPL; zlib_deflate_impl = ZLIB_SW_IMPL; } pr_trace("%s: BUILD=%s ZLIB_TRACE=%x ZLIB_INFLATE_IMPL=%d " "ZLIB_DEFLATE_IMPL=%d ZLIB_INFLATE_THRESHOLD=%d\n", __func__, GIT_VERSION, zlib_trace, zlib_inflate_impl, zlib_deflate_impl, zlib_inflate_threshold); if (zlib_gather_statistics()) { rc = pthread_mutex_init(&zlib_stats_mutex, NULL); if (rc != 0) pr_err("initializing phtread_mutex failed!\n"); } /* Software is done first such that zlibVersion already work */ zedc_sw_init(); zedc_hw_init(); } static void __deflate_update_totals(z_streamp strm) { unsigned int total_in_slot, total_out_slot; if (strm->total_in) { total_in_slot = strm->total_in / 4096; if (total_in_slot >= ZLIB_SIZE_SLOTS) total_in_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.deflate_total_in[total_in_slot]++; } if (strm->total_out) { total_out_slot = strm->total_out / 4096; if (total_out_slot >= ZLIB_SIZE_SLOTS) total_out_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.deflate_total_out[total_out_slot]++; } } static void __inflate_update_totals(z_streamp strm) { unsigned int total_in_slot, total_out_slot; if (strm->total_in) { total_in_slot = strm->total_in / 4096; if (total_in_slot >= ZLIB_SIZE_SLOTS) total_in_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.inflate_total_in[total_in_slot]++; } if (strm->total_out) { total_out_slot = strm->total_out / 4096; if (total_out_slot >= ZLIB_SIZE_SLOTS) total_out_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.inflate_total_out[total_out_slot]++; } } /** * Some statistics we print always, others we just print if someone * actually called the function. Print out variable if it is not * 0. Use variable name as string for the description. */ #define __sss(s...) #s #define __stringify(s...) __sss(s) #define pr_stat(s, var) do { \ if ((s)->var) \ pr_info("%s: %lu\n", __stringify(var), (s)->var); \ } while (0) /** * __print_stats(): When library is not used any longer, print out * statistics e.g. when trace flag is set. This function is not * locking stats_mutex. */ static void __print_stats(void) { unsigned int i; struct zlib_stats *s = &zlib_stats; pthread_mutex_lock(&zlib_stats_mutex); pr_info("deflateInit: %ld\n", s->deflateInit); pr_info("deflate: %ld sw: %ld hw: %ld\n", s->deflate[ZLIB_SW_IMPL] + s->deflate[ZLIB_HW_IMPL], s->deflate[ZLIB_SW_IMPL], s->deflate[ZLIB_HW_IMPL]); for (i = 0; i < ARRAY_SIZE(s->deflate_avail_in); i++) { if (s->deflate_avail_in[i] == 0) continue; pr_info(" deflate_avail_in %4i KiB: %ld\n", (i + 1) * 4, s->deflate_avail_in[i]); } for (i = 0; i < ARRAY_SIZE(s->deflate_avail_out); i++) { if (s->deflate_avail_out[i] == 0) continue; pr_info(" deflate_avail_out %4i KiB: %ld\n", (i + 1) * 4, s->deflate_avail_out[i]); } for (i = 0; i < ARRAY_SIZE(s->deflate_total_in); i++) { if (s->deflate_total_in[i] == 0) continue; pr_info(" deflate_total_in %4i KiB: %ld\n", (i + 1) * 4, s->deflate_total_in[i]); } for (i = 0; i < ARRAY_SIZE(s->deflate_total_out); i++) { if (s->deflate_total_out[i] == 0) continue; pr_info(" deflate_total_out %4i KiB: %ld\n", (i + 1) * 4, s->deflate_total_out[i]); } pr_stat(s, deflateReset); pr_stat(s, deflateParams); pr_stat(s, deflateBound); pr_stat(s, deflateSetDictionary); pr_stat(s, deflateSetHeader); pr_stat(s, deflatePrime); pr_stat(s, deflateCopy); pr_info("deflateEnd: %ld\n", s->deflateEnd); pr_info("inflateInit: %ld\n", s->inflateInit); pr_info("inflate: %ld sw: %ld hw: %ld\n", s->inflate[ZLIB_SW_IMPL] + s->inflate[ZLIB_HW_IMPL], s->inflate[ZLIB_SW_IMPL], s->inflate[ZLIB_HW_IMPL]); for (i = 0; i < ARRAY_SIZE(s->inflate_avail_in); i++) { if (s->inflate_avail_in[i] == 0) continue; pr_info(" inflate_avail_in %4i KiB: %ld\n", (i + 1) * 4, s->inflate_avail_in[i]); } for (i = 0; i < ARRAY_SIZE(s->inflate_avail_out); i++) { if (s->inflate_avail_out[i] == 0) continue; pr_info(" inflate_avail_out %4i KiB: %ld\n", (i + 1) * 4, s->inflate_avail_out[i]); } for (i = 0; i < ARRAY_SIZE(s->inflate_total_in); i++) { if (s->inflate_total_in[i] == 0) continue; pr_info(" inflate_total_in %4i KiB: %ld\n", (i + 1) * 4, s->inflate_total_in[i]); } for (i = 0; i < ARRAY_SIZE(s->inflate_total_out); i++) { if (s->inflate_total_out[i] == 0) continue; pr_info(" inflate_total_out %4i KiB: %ld\n", (i + 1) * 4, s->inflate_total_out[i]); } pr_stat(s, inflateReset); pr_stat(s, inflateReset2); pr_stat(s, inflateSetDictionary); pr_stat(s, inflateGetDictionary); pr_stat(s, inflateGetHeader); pr_stat(s, inflateSync); pr_stat(s, inflatePrime); pr_stat(s, inflateCopy); pr_info("inflateEnd: %ld\n", s->inflateEnd); pr_stat(s, adler32); pr_stat(s, adler32_combine); pr_stat(s, crc32); pr_stat(s, crc32_combine); pr_stat(s, adler32_combine64); pr_stat(s, crc32_combine64); pr_stat(s, get_crc_table); pr_stat(s, gzopen64); pr_stat(s, gzopen); pr_stat(s, gzdopen); pr_stat(s, gzbuffer); pr_stat(s, gztell64); pr_stat(s, gztell); pr_stat(s, gzseek64); pr_stat(s, gzseek); pr_stat(s, gzwrite); pr_stat(s, gzread); pr_stat(s, gzclose); pr_stat(s, gzoffset64); pr_stat(s, gzoffset); pr_stat(s, gzrewind); pr_stat(s, gzputs); pr_stat(s, gzgets); pr_stat(s, gzputc); pr_stat(s, gzgetc); pr_stat(s, gzungetc); pr_stat(s, gzprintf); pr_stat(s, gzerror); pr_stat(s, gzeof); pr_stat(s, gzflush); pr_stat(s, compress); pr_stat(s, compress2); pr_stat(s, compressBound); pr_stat(s, uncompress); pthread_mutex_unlock(&zlib_stats_mutex); } /** * If there is no hardware available we retry automatically the * software version. */ static int __deflateInit2_(z_streamp strm, struct _internal_state *w) { int rc = Z_OK; int retries = 0; /* drop to SW mode, HW does not support level 0 */ if (w->level == Z_NO_COMPRESSION) w->impl = ZLIB_SW_IMPL; do { pr_trace("[%p] __deflateInit2_: w=%p level=%d method=%d " "windowBits=%d memLevel=%d strategy=%d version=%s/%s " "stream_size=%d impl=%d\n", strm, w, w->level, w->method, w->windowBits, w->memLevel, w->strategy, w->version, zlibVersion(), w->stream_size, w->impl); rc = w->impl ? h_deflateInit2_(strm, w->level, w->method, w->windowBits, w->memLevel, w->strategy, w->version, w->stream_size) : z_deflateInit2_(strm, w->level, w->method, w->windowBits, w->memLevel, w->strategy, w->version, w->stream_size); if (rc != Z_OK) { pr_trace("[%p] %s: fallback to software (rc=%d)\n", strm, __func__, rc); w->impl = ZLIB_SW_IMPL; retries++; } } while ((retries < 2) && (rc != Z_OK)); return rc; } /** * deflateInit2_() - Initialize deflate context. If the hardware * implementation fails for some reason the code tries the software * version. */ int deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size) { int rc = Z_OK; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.deflateInit); w = calloc(1, sizeof(*w)); if (w == NULL) return Z_ERRNO; w->magic0 = MAGIC0; w->magic1 = MAGIC1; w->level = level; w->method = method; w->windowBits = windowBits; w->memLevel = memLevel; w->strategy = strategy; w->version = version; w->stream_size = stream_size; w->priv_data = NULL; w->impl = zlib_deflate_impl; /* try default first */ rc = __deflateInit2_(strm, w); if (rc != Z_OK) { free(w); } else { w->priv_data = strm->state; /* backup sublevel state */ strm->state = (void *)w; } return rc; } int deflateInit_(z_streamp strm, int level, const char *version, int stream_size) { return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size); } int deflateReset(z_streamp strm) { int rc; struct _internal_state *w; if (!has_wrapper_state(strm)) return z_deflateReset(strm); if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; pr_trace("[%p] deflateReset w=%p impl=%d\n", strm, w, w->impl); if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); zlib_stats.deflateReset++; __deflate_update_totals(strm); pthread_mutex_unlock(&zlib_stats_mutex); } strm->state = w->priv_data; rc = w->impl ? h_deflateReset(strm) : z_deflateReset(strm); strm->state = (void *)w; return rc; } int deflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; pr_trace("[%p] deflateSetDictionary: dictionary=%p dictLength=%d " "adler32=%08llx\n", strm, dictionary, dictLength, (long long)z_adler32(1, dictionary, dictLength)); zlib_stats_inc(&zlib_stats.deflateSetDictionary); strm->state = w->priv_data; rc = w->impl ? h_deflateSetDictionary(strm, dictionary, dictLength) : z_deflateSetDictionary(strm, dictionary, dictLength); strm->state = (void *)w; return rc; } int deflateSetHeader(z_streamp strm, gz_headerp head) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; pr_trace("[%p] deflateSetHeader\n", strm); zlib_stats_inc(&zlib_stats.deflateSetHeader); strm->state = w->priv_data; rc = w->impl ? h_deflateSetHeader(strm, head) : z_deflateSetHeader(strm, head); strm->state = (void *)w; return rc; } int deflatePrime(z_streamp strm, int bits, int value) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.deflatePrime); strm->state = w->priv_data; rc = w->impl ? Z_UNSUPPORTED : z_deflatePrime(strm, bits, value); strm->state = (void *)w; return rc; } int deflateCopy(z_streamp dest, z_streamp source) { int rc; struct _internal_state *w_source; struct _internal_state *w_dest; pr_trace("[%p] deflateCopy: dest=%p source=%p\n", source, dest, source); if ((dest == NULL) || (source == NULL)) return Z_STREAM_ERROR; memcpy(dest, source, sizeof(*dest)); w_source = (struct _internal_state *)source->state; if (w_source == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.deflateCopy); w_dest = calloc(1, sizeof(*w_dest)); if (w_dest == NULL) return Z_ERRNO; memcpy(w_dest, w_source, sizeof(*w_dest)); source->state = w_source->priv_data; dest->state = NULL; /* this needs to be created */ rc = w_source->impl ? h_deflateCopy(dest, source): z_deflateCopy(dest, source); if (rc != Z_OK) { pr_err("[%p] deflateCopy returned %d\n", source, rc); free(w_dest); w_dest = NULL; goto err_out; } w_dest->priv_data = dest->state; dest->state = (void *)w_dest; err_out: source->state = (void *)w_source; return rc; } int deflate(z_streamp strm, int flush) { int rc = 0; struct _internal_state *w; unsigned int avail_in_slot, avail_out_slot; if (0 == has_wrapper_state(strm)) { rc = z_deflate(strm, flush); return rc; } if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); avail_in_slot = strm->avail_in / 4096; if (avail_in_slot >= ZLIB_SIZE_SLOTS) avail_in_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.deflate_avail_in[avail_in_slot]++; avail_out_slot = strm->avail_out / 4096; if (avail_out_slot >= ZLIB_SIZE_SLOTS) avail_out_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.deflate_avail_out[avail_out_slot]++; zlib_stats.deflate[w->impl]++; pthread_mutex_unlock(&zlib_stats_mutex); } pr_trace("[%p] deflate: flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_out=%ld crc/adler=%08lx " "impl=%d\n", strm, flush_to_str(flush), strm->next_in, strm->avail_in, strm->next_out, strm->avail_out, strm->total_out, strm->adler, w->impl); strm->state = w->priv_data; /* impl can only be ZLIB_HW_IMPL or ZLIB_SW_IMPL */ switch (w->impl) { case ZLIB_HW_IMPL: rc = h_deflate(strm, flush); break; case ZLIB_SW_IMPL: rc = z_deflate(strm, flush); break; default: pr_trace("[%p] deflate: impl (%d) is not valid for me\n", strm, w->impl); break; } strm->state = (void *)w; pr_trace("[%p] flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_out=%ld crc/adler=%08lx " "rc=%s\n", strm, flush_to_str(flush), strm->next_in, strm->avail_in, strm->next_out, strm->avail_out, strm->total_out, strm->adler, ret_to_str(rc)); return rc; } static int __deflateEnd(z_streamp strm, struct _internal_state *w) { int rc; if (strm == NULL) return Z_STREAM_ERROR; if (w == NULL) return Z_STREAM_ERROR; strm->state = w->priv_data; rc = w->impl ? h_deflateEnd(strm) : z_deflateEnd(strm); strm->state = NULL; return rc; } uLong deflateBound(z_streamp strm, uLong sourceLen) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.deflateBound); strm->state = w->priv_data; rc = w->impl ? h_deflateBound(strm, sourceLen) : z_deflateBound(strm, sourceLen); strm->state = (void *)w; return rc; } int deflateEnd(z_streamp strm) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); zlib_stats.deflateEnd++; __deflate_update_totals(strm); pthread_mutex_unlock(&zlib_stats_mutex); } rc = __deflateEnd(strm, w); free(w); pr_trace("[%p] deflateEnd w=%p rc=%d\n", strm, w, rc); return rc; } /** * HW283780 LIR 40774: java.lang.InternalError seen when NoCompression * is set in Hardware Mode on Linux P * * Once we are in HW compression mode the HW will always do the * same. There is not chance here, like it is in software to change * the level or strategy. We return Z_OK to keep the calling code * happy. If that code would start checking the resulting data, it * will see that the HW compression was not doing what it was supposed * to do e.g. do a sync and produce copy-blocks e.g. when level is set * to 0. */ int deflateParams(z_streamp strm, int level, int strategy) { int rc = Z_OK; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; /* Let us adjust level and strategy */ w->level = level; w->strategy = strategy; zlib_stats_inc(&zlib_stats.deflateParams); pr_trace("[%p] deflateParams level=%d strategy=%d impl=%d\n", strm, level, strategy, w->impl); strm->state = w->priv_data; switch (w->impl) { case ZLIB_HW_IMPL: /* * For the Z_NO_COMPRESSION case, implement fallback * to software. This is for the case where w->level * was have been setup by deflateParams(). */ if ((strm->total_in != 0) || (w->level != Z_NO_COMPRESSION)) { strm->state = (void *)w; return Z_OK; } /* Redo initialization in software mode */ pr_trace("[%p] Z_NO_COMPRESSION total_in=%ld\n", strm, strm->total_in); rc = __deflateEnd(strm, w); if (rc != Z_OK) goto err; strm->total_in = 0; strm->total_out = 0; rc = __deflateInit2_(strm, w); if (rc != Z_OK) goto err; w->priv_data = strm->state; /* backup sublevel state */ break; case ZLIB_SW_IMPL: rc = z_deflateParams(strm, level, strategy); break; default: pr_err("[%p] deflateParams impl=%d invalid\n", strm, w->impl); break; } err: strm->state = (void *)w; return rc; } static int __inflateInit2_(z_streamp strm, struct _internal_state *w) { int rc, retries; if (strm == NULL) return Z_STREAM_ERROR; if (w == NULL) return Z_STREAM_ERROR; retries = 0; do { pr_trace("[%p] inflateInit2_: w=%p windowBits=%d " "version=%s/%s stream_size=%d impl=%d\n", strm, w, w->windowBits, w->version, zlibVersion(), w->stream_size, w->impl); rc = w->impl ? h_inflateInit2_(strm, w->windowBits, w->version, w->stream_size) : z_inflateInit2_(strm, w->windowBits, w->version, w->stream_size); if (Z_OK == rc) break; /* OK, i Can exit now */ pr_trace("[%p] %s: fallback to software (rc=%d)\n", strm, __func__, rc); w->impl = ZLIB_SW_IMPL; w->allow_switching = false; retries++; } while (retries < 2); if (rc != Z_OK) goto err; w->priv_data = strm->state; /* backup sublevel state */ err: return rc; } int inflateInit2_(z_streamp strm, int windowBits, const char *version, int stream_size) { int rc = Z_OK; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; strm->total_in = 0; strm->total_out = 0; zlib_stats_inc(&zlib_stats.inflateInit); w = calloc(1, sizeof(*w)); if (w == NULL) return Z_MEM_ERROR; w->allow_switching = true; w->magic0 = MAGIC0; w->magic1 = MAGIC1; w->windowBits = windowBits; w->version = version; w->stream_size = stream_size; w->priv_data = NULL; w->impl = zlib_inflate_impl; /* try default first */ w->dictLength = 0; if (!z_hasGetDictionary()) { w->dictionary = calloc(1, ZLIB_MAXDICTLEN); if (w->dictionary == NULL) { rc = Z_MEM_ERROR; goto free_w; } } rc = __inflateInit2_(strm, w); if (rc == Z_OK) strm->state = (void *)w; else goto free_dict; return rc; free_dict: if (w->dictionary) { free(w->dictionary); w->dictionary = NULL; } free_w: free(w); return rc; } int inflateInit_(z_streamp strm, const char *version, int stream_size) { return inflateInit2_(strm, DEF_WBITS, version, stream_size); } int inflateReset(z_streamp strm) { int rc; struct _internal_state *w; if (!has_wrapper_state(strm)) return z_inflateReset(strm); if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; /* * We need to count even though priv_data could still be * NULL. Otherwise the statistics will not be right at the * end. */ pr_trace("[%p] inflateReset\n", strm); if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); zlib_stats.inflateReset++; __inflate_update_totals(strm); pthread_mutex_unlock(&zlib_stats_mutex); } w->allow_switching = true; w->gzhead = NULL; /* clear gz header */ w->dictLength = 0; /* clear cached dictionary */ strm->state = w->priv_data; rc = (w->impl) ? h_inflateReset(strm) : z_inflateReset(strm); strm->total_in = 0; strm->total_out = 0; strm->state = (void *)w; return rc; } extern int inflateReset2(z_streamp strm, int windowBits); int inflateReset2(z_streamp strm, int windowBits) { int rc; struct _internal_state *w; if (!has_wrapper_state(strm)) return z_inflateReset2(strm, windowBits); if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; /* * We need to count even though priv_data could still be * NULL. Otherwise the statistics will not be right at the * end. */ pr_trace("[%p] inflateReset2 impl=%d\n", strm, w->impl); if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); zlib_stats.inflateReset2++; __inflate_update_totals(strm); pthread_mutex_unlock(&zlib_stats_mutex); } w->allow_switching = true; w->dictLength = 0; /* clear cached dictionary */ strm->state = w->priv_data; rc = (w->impl) ? h_inflateReset2(strm, windowBits) : z_inflateReset2(strm, windowBits); strm->total_in = 0; strm->total_out = 0; strm->state = (void *)w; return rc; } int inflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.inflateSetDictionary); strm->state = w->priv_data; if (w->impl) rc = h_inflateSetDictionary(strm, dictionary, dictLength); else { rc = z_inflateSetDictionary(strm, dictionary, dictLength); /* Update a private copy for the case we have not inflateGetDict */ if (!z_hasGetDictionary()) { memcpy(w->dictionary, dictionary, MIN((uInt)ZLIB_MAXDICTLEN, dictLength)); w->dictLength = dictLength; } } strm->state = (void *)w; pr_trace("[%p] inflateSetDictionary: dictionary=%p dictLength=%d " "adler32=%08llx rc=%d\n", strm, dictionary, dictLength, (long long)z_adler32(1, dictionary, dictLength), rc); return rc; } /** * zlib older than 1.2.8 has no inflateGetDictionary. To get the * software/hardware switching working without this function we create * a copy of the dictionary. I a user has set it via * inflateSetDictionary, we have still a copy in this code which we * can use. */ extern int inflateGetDictionary(z_streamp strm, Bytef *dictionary, uInt *dictLength); int inflateGetDictionary(z_streamp strm, Bytef *dictionary, uInt *dictLength) { int rc = Z_OK; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.inflateGetDictionary); strm->state = w->priv_data; if (w->impl) rc = h_inflateGetDictionary(strm, dictionary, dictLength); else { if (z_hasGetDictionary()) rc = z_inflateGetDictionary(strm, dictionary, dictLength); else { memcpy(dictionary, w->dictionary, w->dictLength); if (dictLength) *dictLength = w->dictLength; } } strm->state = (void *)w; pr_trace("[%p] inflateGetDictionary: dictionary=%p &dictLength=%p " "rc=%d\n", strm, dictionary, dictLength, rc); return rc; } int inflateGetHeader(z_streamp strm, gz_headerp head) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; pr_trace("[%p] inflateGetHeader: head=%p\n", strm, head); zlib_stats_inc(&zlib_stats.inflateGetHeader); w->gzhead = head; strm->state = w->priv_data; rc = w->impl ? h_inflateGetHeader(strm, head) : z_inflateGetHeader(strm, head); strm->state = (void *)w; return rc; } int inflatePrime(z_streamp strm, int bits, int value) { int rc; struct _internal_state *w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.inflatePrime); strm->state = w->priv_data; rc = w->impl ? Z_UNSUPPORTED : z_inflatePrime(strm, bits, value); strm->state = (void *)w; return rc; } int inflateSync(z_streamp strm) { int rc; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; zlib_stats_inc(&zlib_stats.inflateSync); strm->state = w->priv_data; rc = w->impl ? Z_UNSUPPORTED : z_inflateSync(strm); strm->state = (void *)w; return rc; } static int __inflateEnd(z_streamp strm, struct _internal_state *w) { int rc; if (strm == NULL) return Z_STREAM_ERROR; if (w == NULL) return Z_STREAM_ERROR; strm->state = w->priv_data; rc = w->impl ? h_inflateEnd(strm) : z_inflateEnd(strm); strm->state = NULL; return rc; } int inflateEnd(z_streamp strm) { int rc = Z_OK; struct _internal_state *w; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); zlib_stats.inflateEnd++; __inflate_update_totals(strm); pthread_mutex_unlock(&zlib_stats_mutex); } rc = __inflateEnd(strm, w); if (w->dictionary) { free(w->dictionary); w->dictionary = NULL; } free(w); pr_trace("[%p] inflateEnd w=%p rc=%d\n", strm, w, rc); return rc; } int inflate(z_streamp strm, int flush) { int rc = Z_OK; struct _internal_state *w; unsigned int avail_in_slot, avail_out_slot; uint8_t dictionary[ZLIB_MAXDICTLEN]; unsigned int dictLength = 0; if (strm == NULL) return Z_STREAM_ERROR; w = (struct _internal_state *)strm->state; if (w == NULL) return Z_STREAM_ERROR; /* * Special situation triggered by strange JAVA zlib use-case: * If we do not have any data to decompress, return * Z_BUF_ERROR instead of trying to decypher 0 bytes. The * decision to use HW or SW is deferred until we see avail_in * != 0 for the first time. */ if ((strm->total_in == 0) && (w->allow_switching)) { /* Special case where there is no data. This occurs quite often in the JAVA use-case. */ if (strm->avail_in == 0) return Z_BUF_ERROR; if ((strm->avail_in < zlib_inflate_threshold) && (w->impl == ZLIB_HW_IMPL)) { pr_trace("[%p] inflate: avail_in=%d smaller " "%d switching to software mode!\n", strm, strm->avail_in, zlib_inflate_threshold); rc = inflateGetDictionary(strm, dictionary, &dictLength); if (rc != Z_OK) goto err; /* Free already allocated resources, but not w */ rc = __inflateEnd(strm, w); if (rc != Z_OK) goto err; /* Enforce software here! */ w->impl = ZLIB_SW_IMPL; /* Reinit but not w */ rc = __inflateInit2_(strm, w); if (rc != Z_OK) goto err; strm->state = (void *)w; if (w->gzhead != NULL) inflateGetHeader(strm, w->gzhead); if (dictLength != 0) { rc = inflateSetDictionary(strm, dictionary, dictLength); if (rc != Z_OK) { inflateEnd(strm); goto err; } } } else if ((strm->avail_in >= zlib_inflate_threshold) && (w->impl == ZLIB_SW_IMPL) && (zlib_inflate_impl == ZLIB_HW_IMPL)) { pr_trace("[%p] inflate: avail_in=%d bigger or equal " "%d switching to hardware mode!\n", strm, strm->avail_in, zlib_inflate_threshold); rc = inflateGetDictionary(strm, dictionary, &dictLength); if (rc != Z_OK) goto err; /* Free already allocated resources, but not w */ rc = __inflateEnd(strm, w); if (rc != Z_OK) goto err; /* Try hardware mode here! */ w->impl = zlib_inflate_impl; /* Reinit but not w */ rc = __inflateInit2_(strm, w); if (rc != Z_OK) goto err; strm->state = (void *)w; if (w->gzhead != NULL) inflateGetHeader(strm, w->gzhead); if (dictLength != 0) { rc = inflateSetDictionary(strm, dictionary, dictLength); if (rc != Z_OK) { inflateEnd(strm); goto err; } } } } if (zlib_gather_statistics()) { pthread_mutex_lock(&zlib_stats_mutex); avail_in_slot = strm->avail_in / 4096; if (avail_in_slot >= ZLIB_SIZE_SLOTS) avail_in_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.inflate_avail_in[avail_in_slot]++; avail_out_slot = strm->avail_out / 4096; if (avail_out_slot >= ZLIB_SIZE_SLOTS) avail_out_slot = ZLIB_SIZE_SLOTS - 1; zlib_stats.inflate_avail_out[avail_out_slot]++; zlib_stats.inflate[w->impl]++; pthread_mutex_unlock(&zlib_stats_mutex); } pr_trace("[%p] inflate: flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_in=%ld total_out=%ld " "crc/adler=%08lx\n", strm, flush_to_str(flush), strm->next_in, strm->avail_in, strm->next_out, strm->avail_out, strm->total_in, strm->total_out, strm->adler); strm->state = w->priv_data; rc = w->impl ? h_inflate(strm, flush) : z_inflate(strm, flush); /* stop switching after lowlevel inflate has been called */ w->allow_switching = false; strm->state = (void *)w; pr_trace("[%p] flush=%s next_in=%p avail_in=%d " "next_out=%p avail_out=%d total_in=%ld total_out=%ld " "crc/adler=%08lx rc=%s\n", strm, flush_to_str(flush), strm->next_in, strm->avail_in, strm->next_out, strm->avail_out, strm->total_in, strm->total_out, strm->adler, ret_to_str(rc)); err: return rc; } /* Implement the *Back() functions by using software only */ int inflateBack(z_streamp strm, in_func in, void *in_desc, out_func out, void *out_desc) { return z_inflateBack(strm, in, in_desc, out, out_desc); } /** * FIXME Implement fallback if hw is not available. */ int inflateBackInit_(z_streamp strm, int windowBits, unsigned char *window, const char *version, int stream_size) { return z_inflateBackInit_(strm, windowBits, window, version, stream_size); } int inflateBackEnd(z_streamp strm) { return z_inflateBackEnd(strm); } const char *zlibVersion(void) { return z_zlibVersion(); } uLong zlibCompileFlags(void) { return z_zlibCompileFlags(); } /* * adler32: Returns the value of the result of the z_ prefixed adler32 function * */ uLong adler32(uLong adler, const Bytef *buf, uInt len) { zlib_stats_inc(&zlib_stats.adler32); pr_trace("adler32(len=%lld)\n", (long long)len); return z_adler32(adler, buf, len); } /* * adler32_combine: Returns the value of the result of the z_ prefixed * adler32_combine function * */ uLong adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { zlib_stats_inc(&zlib_stats.adler32_combine); pr_trace("adler32_combine(len2=%lld)\n", (long long)len2); return z_adler32_combine(adler1, adler2, len2); } /* * crc32: Returns the value of the result of the z_ prefixed crc32 function * */ uLong crc32(uLong crc, const Bytef *buf, uInt len) { zlib_stats_inc(&zlib_stats.crc32); pr_trace("crc32(len=%lld)\n", (long long)len); return z_crc32(crc, buf, len); } /* * crc32_combine: Returns the value of the result of the z_ prefixed * crc32_combine function * */ uLong crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { zlib_stats_inc(&zlib_stats.crc32_combine); pr_trace("crc32_combine(len2=%lld)\n", (long long)len2); return z_crc32_combine(crc1, crc2, len2); } const char *zError(int err) { return z_zError(err); } static void _done(void) __attribute__((destructor)); static void _done(void) { if (zlib_gather_statistics()) { __print_stats(); pthread_mutex_destroy(&zlib_stats_mutex); } zedc_hw_done(); zedc_sw_done(); if (zlib_log != stderr) fclose(zlib_log); return; } genwqe-user-4.0.18/lib/wrapper.h000066400000000000000000000212531303345043000164430ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __WRAPPER_H__ #define __WRAPPER_H__ /* * Switching between software and hardware implementation of zlib. The * hardware implementation is not implementing the full set of * interfaces but enough to do commonly used functionality for * compression and decompression. * * The hardware implementation is using the h_ prefix, the software * implementation is using a z_ prefix. */ #include #include #include #include #include #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0])) #endif #ifndef MIN # define MIN(a,b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) #endif #ifndef __unused # define __unused __attribute__((unused)) #endif extern FILE *zlib_log; extern int zlib_trace; extern int zlib_accelerator; extern int zlib_card; extern unsigned int zlib_inflate_impl; extern unsigned int zlib_deflate_impl; extern unsigned int zlib_inflate_flags; extern unsigned int zlib_deflate_flags; #define zlib_trace_enabled() (zlib_trace & 0x1) #define zlib_hw_trace_enabled() (zlib_trace & 0x2) #define zlib_sw_trace_enabled() (zlib_trace & 0x4) #define zlib_gather_statistics() (zlib_trace & 0x8) /* Use in case of an error */ #define pr_err(fmt, ...) do { \ fprintf(zlib_log, "%s:%u: Error: " fmt, \ __FILE__, __LINE__, ## __VA_ARGS__); \ } while (0) /* Use in case of an warning */ #define pr_warn(fmt, ...) do { \ fprintf(zlib_log, "%s:%u: Warning: " fmt, \ __FILE__, __LINE__, ## __VA_ARGS__); \ } while (0) /* Informational printouts */ #define pr_info(fmt, ...) do { \ fprintf(zlib_log, "Info: " fmt, ## __VA_ARGS__); \ } while (0) /* Trace zlib wrapper code */ #define pr_trace(fmt, ...) do { \ if (zlib_trace_enabled()) \ fprintf(zlib_log, "### " fmt, ## __VA_ARGS__); \ } while (0) /* Trace zlib hardware implementation */ #define hw_trace(fmt, ...) do { \ if (zlib_hw_trace_enabled()) \ fprintf(zlib_log, "hhh " fmt, ## __VA_ARGS__); \ } while (0) /* Trace zlib software implementation */ #define sw_trace(fmt, ...) do { \ if (zlib_sw_trace_enabled()) \ fprintf(zlib_log, "sss " fmt, ## __VA_ARGS__); \ } while (0) #define Z_UNSUPPORTED (-7) #define ZLIB_SIZE_SLOTS 256 /* Each slot represents 4KiB, the last slot is represending everything which larger or equal 1024KiB */ struct zlib_stats { unsigned long deflateInit; unsigned long deflate[ZLIB_MAX_IMPL]; unsigned long deflate_avail_in[ZLIB_SIZE_SLOTS]; unsigned long deflate_avail_out[ZLIB_SIZE_SLOTS]; unsigned long deflateReset; unsigned long deflate_total_in[ZLIB_SIZE_SLOTS]; unsigned long deflate_total_out[ZLIB_SIZE_SLOTS]; unsigned long deflateSetDictionary; unsigned long deflateSetHeader; unsigned long deflateParams; unsigned long deflateBound; unsigned long deflatePrime; unsigned long deflateCopy; unsigned long deflateEnd; unsigned long inflateInit; unsigned long inflate[ZLIB_MAX_IMPL]; unsigned long inflate_avail_in[ZLIB_SIZE_SLOTS]; unsigned long inflate_avail_out[ZLIB_SIZE_SLOTS]; unsigned long inflateReset; unsigned long inflateReset2; unsigned long inflate_total_in[ZLIB_SIZE_SLOTS]; unsigned long inflate_total_out[ZLIB_SIZE_SLOTS]; unsigned long inflateSetDictionary; unsigned long inflateGetDictionary; unsigned long inflateGetHeader; unsigned long inflateSync; unsigned long inflatePrime; unsigned long inflateCopy; unsigned long inflateEnd; unsigned long adler32; unsigned long adler32_combine; unsigned long crc32; unsigned long crc32_combine; unsigned long gzopen64; unsigned long gzopen; unsigned long gzdopen; unsigned long gzbuffer; unsigned long gztell64; unsigned long gztell; unsigned long gzseek64; unsigned long gzseek; unsigned long gzwrite; unsigned long gzread; unsigned long gzclose; unsigned long gzoffset64; unsigned long gzoffset; unsigned long gzrewind; unsigned long gzputs; unsigned long gzgets; unsigned long gzputc; unsigned long gzgetc; unsigned long gzungetc; unsigned long gzprintf; unsigned long gzerror; unsigned long gzeof; unsigned long gzflush; unsigned long compress; unsigned long compress2; unsigned long compressBound; unsigned long uncompress; unsigned long adler32_combine64; unsigned long crc32_combine64; unsigned long get_crc_table; }; extern pthread_mutex_t zlib_stats_mutex; /* mutex to protect zlib_stats */ extern struct zlib_stats zlib_stats; static inline void zlib_stats_inc(unsigned long *count) { if (!zlib_gather_statistics()) return; pthread_mutex_lock(&zlib_stats_mutex); *count = *count + 1; pthread_mutex_unlock(&zlib_stats_mutex); } /* Hardware implementation */ int h_deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size); int h_deflateParams(z_streamp strm, int level, int strategy); uLong h_deflateBound(z_streamp strm, uLong sourceLen); int h_deflateReset(z_streamp strm); int h_deflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength); int h_deflateSetHeader(z_streamp strm, gz_headerp head); int h_deflate(z_streamp strm, int flush); int h_deflateEnd(z_streamp strm); int h_inflateInit2_(z_streamp strm, int windowBits, const char *version, int stream_size); int h_inflateReset(z_streamp strm); int h_inflateReset2(z_streamp strm, int windowBits); int h_inflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength); /* inflateGetDictionary is only available for 1.2.8 or later */ bool z_hasGetDictionary(void); int h_inflateGetDictionary(z_streamp strm, Bytef *dictionary, uInt *dictLength); int h_inflateGetHeader(z_streamp strm, gz_headerp head); int h_deflateCopy(z_streamp dest, z_streamp source); int h_inflate(z_streamp strm, int flush); int h_inflateEnd(z_streamp strm); /* Software implementation */ int z_deflateInit2_(z_streamp strm, int level, int method, int windowBits, int memLevel, int strategy, const char *version, int stream_size); int z_deflateParams(z_streamp strm, int level, int strategy); uLong z_deflateBound(z_streamp strm, uLong sourceLen); int z_deflateReset(z_streamp strm); int z_deflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength); int z_deflateSetHeader(z_streamp strm, gz_headerp head); int z_deflatePrime(z_streamp strm, int bits, int value); int z_deflateCopy(z_streamp dest, z_streamp source); int z_deflate(z_streamp strm, int flush); int z_deflateEnd(z_streamp strm); int z_inflateInit2_(z_streamp strm, int windowBits, const char *version, int stream_size); int z_inflateReset(z_streamp strm); int z_inflateReset2(z_streamp strm, int windowBits); int z_inflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength); int z_inflateGetDictionary(z_streamp strm, const Bytef *dictionary, uInt *dictLength); int z_inflateGetHeader(z_streamp strm, gz_headerp head); int z_inflatePrime(z_streamp strm, int bits, int value); int z_inflateSync(z_streamp strm); int z_inflate(z_streamp strm, int flush); int z_inflateEnd(z_streamp strm); int z_inflateBackInit_(z_streamp strm, int windowBits, unsigned char *window, const char *version, int stream_size); int z_inflateBack(z_streamp strm, in_func in, void *in_desc, out_func out, void *out_desc); int z_inflateBackEnd(z_streamp strm); uLong z_adler32(uLong adler, const Bytef *buf, uInt len); uLong z_adler32_combine(uLong adler1, uLong adler2, z_off_t len2); uLong z_crc32(uLong crc, const Bytef *buf, uInt len); uLong z_crc32_combine(uLong crc1, uLong crc2, z_off_t len2); const char *z_zError(int err); /* PCIe trigger function. Writes to register 0x0 which normally non-sense. */ void error_trigger(void); /* Constructors/destructors */ void zedc_hw_init(void); void zedc_hw_done(void); void zedc_sw_init(void); void zedc_sw_done(void); /* Circumvention for missing prototypes */ const char *z_zlibVersion(void); uLong z_zlibCompileFlags(void); /* Misc helper functions */ uint64_t str_to_num(char *str); const char *ret_to_str(int ret); const char *flush_to_str(int flush); #endif /* __WRAPPER_H__ */ genwqe-user-4.0.18/licenses/000077500000000000000000000000001303345043000156465ustar00rootroot00000000000000genwqe-user-4.0.18/licenses/cla-corporate.txt000066400000000000000000000161301303345043000211430ustar00rootroot00000000000000 International Business machines, Inc. Software Grant and Corporate Contributor License Agreement ("Agreement") http://www.github.org/ibm-genwqe/licenses/ Thank you for your interest in IBM’s ibm-genwqe project (“Hardware acceleration of deflate/zlib compression with IBM FPGA accelerators"). In order to clarify the intellectual property license granted with Contributions from any person or entity, IBM must have a Contributor License Agreement (CLA) on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of IBM and its users; it does not change your rights to use your own Contributions for any other purpose. This version of the Agreement allows an entity (the "Corporation") to submit Contributions to the Project, to authorize Contributions submitted by its designated employees to the Project, and to grant copyright and patent licenses thereto. If you have not already done so, please complete and sign, then scan and email a pdf file of this Agreement to . If necessary, send an original signed Agreement to: IBM Deutschland RD GmbH SCHOENAICHER STR. 220, BOEBLINGEN 71032 Germany Attn: Frank Haverkamp Please read this document carefully before signing and keep a copy for your records. Corporation name: ________________________________________________ Corporation address: ________________________________________________ ________________________________________________ Point of Contact: ________________________________________________ E-Mail: ________________________________________________ Telephone: _____________________ You accept and agree to the following terms and conditions for Your present and future Contributions submitted to the Project. Except for the license granted herein to IBM and recipients of software distributed by IBM, You reserve all right, title, and interest in and to Your Contributions. 1. Definitions. "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with IBM. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "Contribution" shall mean the code, documentation or other original works of authorship expressly identified in Schedule B, as well as any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to IBM for inclusion in, or documentation of, the Project managed by IBM (the "Work"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to IBM or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, IBM for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." 2. Grant of Copyright License. Subject to the terms and conditions of this Agreement, You hereby grant to IBM and to recipients of software distributed by IBM a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. 3. Grant of Patent License. Subject to the terms and conditions of this Agreement, You hereby grant to IBM and to recipients of software distributed by IBM a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) were submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. 4. You represent that You are legally entitled to grant the above license. You represent further that each employee of the Corporation designated on Schedule A below (or in a subsequent written modification to that Schedule) is authorized to submit Contributions on behalf of the Corporation. 5. You represent that each of Your Contributions is Your original creation (see section 7 for submissions on behalf of others). 6. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. 7. Should You wish to submit work that is not Your original creation, You may submit it to IBM separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which you are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". 8. It is your responsibility to notify IBM when any change is required to the list of designated employees authorized to submit Contributions on behalf of the Corporation, or to the Corporation's Point of Contact with IBM. Please sign: __________________________________ Date: _______________ Title: __________________________________ Corporation: __________________________________ Schedule A [Initial list of designated employees. NB: authorization is not tied to particular Contributions.] Schedule B [Identification of optional concurrent software grant. Would be left blank or omitted if there is no concurrent software grant.] genwqe-user-4.0.18/licenses/cla-individual.txt000066400000000000000000000155251303345043000213040ustar00rootroot00000000000000 International Business Machines, Inc. (IBM) Individual Contributor License Agreement ("Agreement") http://www.github.org/ibm-genwqe/licenses/ Thank you for your interest in the ibm-genwqe project ("Hardware acceleration of deflate/zlib compression with IBM FPGA accelerators"). In order to clarify the intellectual property license granted with Contributions from any person or entity, IBM must have a Contributor License Agreement ("CLA") on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of IBM and its customers; it does not change your rights to use your own Contributions for any other purpose. If you have not already done so, please complete and sign, then scan and email a pdf file of this Agreement to . The original signed agreement should be sent to: IBM Deutschland RD GmbH SCHOENAICHER STR. 220, BOEBLINGEN 71032 Germany Attn: Frank Haverkamp Please read this document carefully before signing and keep a copy for your records. Full name: ______________________________________________________ (optional) Public name: _________________________________________ Mailing Address: ________________________________________________ ________________________________________________ Country: ______________________________________________________ Telephone: ______________________________________________________ E-Mail: ______________________________________________________ You accept and agree to the following terms and conditions for Your present and future Contributions submitted to the Project. Except for the license granted herein to IBM and recipients of software distributed by IBM, You reserve all right, title, and interest in and to Your Contributions. 1. Definitions. "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with IBM. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "Contribution" shall mean any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to the Project for inclusion in, or documentation of, the Project (”the Work”). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Project or its representatives,including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Project for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." 2. Grant of Copyright License. Subject to the terms and conditions of this Agreement, You hereby grant to IBM and to recipients of software distributed by IBM a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. 3. Grant of Patent License. Subject to the terms and conditions of this Agreement, You hereby grant to IBM and to recipients of software distributed by IBM a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work to which Your Contribution(s) were submitted, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that your Contribution, or the Work to which you have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. 4. You represent that you are legally entitled to grant the above license. If your employer(s) has rights to intellectual property that you create that includes your Contributions, you represent that you have received permission to make Contributions on behalf of that employer, that your employer has waived such rights for your Contributions to the Project, or that your employer has executed a separate Corporate CLA with IBM. 5. You represent that each of Your Contributions is Your original creation (see section 7 for submissions on behalf of others). You represent that Your Contribution submissions include complete details of any third-party license or other restriction (including, but not limited to, related patents and trademarks) of which you are personally aware and which are associated with any part of Your Contributions. 6. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON- INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. 7. Should You wish to submit work that is not Your original creation, You may submit it to the Project separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which you are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". 8. You agree to notify IBM of any facts or circumstances of which you become aware that would make these representations inaccurate in any respect. Please sign: __________________________________ Date: ________________ genwqe-user-4.0.18/misc/000077500000000000000000000000001303345043000147745ustar00rootroot00000000000000genwqe-user-4.0.18/misc/Makefile000066400000000000000000000021261303345043000164350ustar00rootroot00000000000000# # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # include ../config.mk LDLIBS += -lzADC LDFLAGS += -L../lib -Wl,-rpath,lib zpipe_mt_libs = -lpthread projs = zpipe zpipe_mt zpipe_append zpipe_rnd all: $(projs) $(projs): $(libs) ### Deactivate existing implicit rule %: %.c %: %.sh ### Generic rule to build a tool %: %.o $(CC) $(LDFLAGS) $@.o $($(@)_objs) $($(@)_libs) $(LDLIBS) -o $@ %.o: %.c $(libs) $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ install uninstall: @echo "Nothing to be done here." clean distclean: $(RM) $(objs) $(projs) $(manpages) genwqe-user-4.0.18/misc/basic_hardware_tests.sh000077500000000000000000000065301303345043000215170ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Execute basic hardware tests for all available cards in a # test system. This runs for GENWQE as well as for CAPI GZIP cards. # It needs some test data to work on and prints out a message if the # test data is not available. Use for automated regression testing. # export PATH=`pwd`/tools:`pwd`/misc:$PATH export LD_LIBRARY_PATH=`pwd`/lib:$LD_LIBRARY_PATH # lock dirs/files LOCKDIR="/tmp/genwqe_hardware_tests.lock" PIDFILE="${LOCKDIR}/PID" # exit codes and text ENO_SUCCESS=0; ETXT[0]="ENO_SUCCESS" ENO_GENERAL=1; ETXT[1]="ENO_GENERAL" ENO_LOCKFAIL=2; ETXT[2]="ENO_LOCKFAIL" ENO_RECVSIG=3; ETXT[3]="ENO_RECVSIG" trap 'ECODE=$?; echo "Exit: ${ETXT[ECODE]}($ECODE)" >&2' 0 echo -n "Locking: " >&2 if mkdir "${LOCKDIR}" &>/dev/null; then # Lock succeeded, install signal handlers before storing the PID # just in case storing the PID fails. trap 'ECODE=$?; echo "Removing lock. Exit: ${ETXT[ECODE]}($ECODE)" >&2 rm -rf "${LOCKDIR}"' 0 echo "$$" >"${PIDFILE}" # The following handler will exit the script upon receiving these # signals the trap on "0" (EXIT) from above will be triggered by # this trap's "exit" command! trap 'echo "Killed by a signal." >&2 exit ${ENO_RECVSIG}' 1 2 3 15 else # If cat isn't able to read the file, another instance is probably # about to remove the lock -- exit, we're *still* locked. # Lock failed, check if the other PID is alive. OTHERPID="$(cat "${PIDFILE}")" if [ $? != 0 ]; then echo "Lock failed, PID ${OTHERPID} is active" >&2 exit ${ENO_LOCKFAIL} fi if ! kill -0 $OTHERPID &>/dev/null; then # lock is stale, remove it and restart echo "Removing stale lock of nonexistent PID ${OTHERPID}" >&2 rm -rf "${LOCKDIR}" exit ${ENO_LOCKFAIL} else # Lock is valid and OTHERPID is active - exit, we're locked! echo "Lock failed, PID ${OTHERPID} is active" >&2 exit ${ENO_LOCKFAIL} fi fi # Checks if [ ! -f cantrbry.tar.gz ]; then echo "We need test case data: cantrbry.tar.gz" echo "Get it by using:" echo " wget http://corpus.canterbury.ac.nz/resources/cantrbry.tar.gz" echo fi # Tests for accel in GENWQE CAPI ; do for card in `./tools/genwqe_find_card -A${accel}`; do echo "TESTING ${accel} CARD ${card}" zlib_test.sh -A${accel} -C${card} if [ $? -ne 0 ]; then echo "FAILED ${accel} CARD ${card}" exit 1 fi genwqe_mt_perf -A${accel} -C${card} if [ $? -ne 0 ]; then echo "FAILED ${accel} CARD ${card}" exit 1 fi genwqe_test_gz -A${accel} -C${card} -vv -i10 -t cantrbry.tar.gz if [ $? -ne 0 ]; then echo "FAILED ${accel} CARD ${card}" exit 1 fi echo "PASSED ${accel} CARD ${card}" done done dmesg -T > basic_hardware_test.dmesg exit 0; genwqe-user-4.0.18/misc/basic_software_tests.sh000077500000000000000000000031131303345043000215460ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Execute basic hardware tests for all available cards in a # test system. This runs for GENWQE as well as for CAPI GZIP cards. # It needs some test data to work on and prints out a message if the # test data is not available. Use for automated regression testing. # export PATH=`pwd`/tools:$PATH export LD_LIBRARY_PATH=`pwd`/lib:$LD_LIBRARY_PATH # Checks if [ ! -f cantrbry.tar.gz ]; then echo "We need test case data: cantrbry.tar.gz" echo "Get it by using:" echo " wget http://corpus.canterbury.ac.nz/resources/cantrbry.tar.gz" echo fi accel=SW card=0 echo "Testing fallback to software if there is no card available" echo "TESTING ${accel} CARD ${card}" genwqe_mt_perf -A${accel} -C${card} -M4 if [ $? -ne 0 ]; then echo "FAILED ${accel} CARD ${card}" exit 1 fi genwqe_test_gz -A${accel} -C${card} -vv -i10 -t cantrbry.tar.gz if [ $? -ne 0 ]; then echo "FAILED ${accel} CARD ${card}" exit 1 fi echo "PASSED ${accel} CARD ${card}" dmesg -T > basic_software_test.dmesg exit 0; genwqe-user-4.0.18/misc/netcat_test.sh000077500000000000000000000112621303345043000176520ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Test-script to measure and tune performance of zlib soft- or hardware # implementation. In combination with netcat and tar/gzip. # Unless being executed in a tmpfs/ramdisk this test also measure I/O # to read and write data. # # FIXME Adjust to your needs ... sender=tul2 function extract_real_time() { local file=$1; duration=`grep real $file | perl -e '$a=; $a=~m/([0-9]*)m([0-9]*)\.([0-9]*)/; $sec=$1*60+$2; $msec=$3; print "$sec,$msec"'` echo "; ${duration}" } function usage() { echo "netcat_test.sh [-sender|-receiver]" } if [ $1 = -receiver ]; then echo "Tidy up ..." rm -rf linux_from_tul2.* echo -n " (1) Receive plain tar ... " sync (time nc -w 10 ${sender} 7878 > linux_from_tul2.plain.tar) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (2) Receive plain hw.tar.gz ... " sync (time nc -w 10 ${sender} 7878 > linux_from_tul2.plain.hw.tar.gz) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (3) Receive plain sw.tar.gz ... " sync (time nc -w 10 ${sender} 7878 > linux_from_tul2.plain.sw.tar.gz) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (4) Receive generated tar ... " sync (time nc -w 10 ${sender} 7878 > linux_from_tul2.generated.tar) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (5) Receive tar and extract it ... " mkdir -p linux_from_tul2 sync (time nc -w 10 ${sender} 7878 | \ tar x -C linux_from_tul2 --strip-components=1) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (6) Receive sw.tar.gz ... " sync (time nc -w 10 ${sender} 7878 > linux_from_tul2.generated.sw.tar.gz) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (7) Receive hw.tar.gz ... " (time nc -w 10 ${sender} 7878 > linux_from_tul2.generated.hw.tar.gz) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (8) Receive and extract hw.tar.gz in hw ... " mkdir -p linux_from_tul2.hw sync (time nc -w 10 ${sender} 7878 | \ PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ tar xz -C linux_from_tul2.hw \ --strip-components=1) 2> output.stderr extract_real_time output.stderr sleep 5 echo -n " (9) Receive and extract hw.tar.gz in sw ... " sync mkdir -p linux_from_tul2.sw (time nc -w 10 ${sender} 7878 | \ tar xz -C linux_from_tul2.sw \ --strip-components=1) 2> output.stderr extract_real_time output.stderr sleep 5 exit 0 fi if [ $1 = -sender ]; then echo " (1a) Generate tar if it is not exsiting yet ..." if [ ! -d linux ]; then echo "linux directory missing, needed to perform this measurement!" exit 1 fi if [ ! -f linux.tar ]; then time tar cf linux.tar linux else echo " linux.tar already existing, skipping" fi du -ch linux.tar echo "Please start receiver now." echo " (1) Send tar ..." cat linux.tar | nc -q 1 -l -p 7878 echo " (2) Send hw.tar.gz ..." time PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ time genwqe_gzip -c linux.tar | nc -q 1 -l -p 7878 echo " (3) Send sw.tar.gz ..." time gzip -c linux.tar | nc -q 1 -l -p 7878 echo " (4) Generate and send tar ..." time tar c linux | nc -q 1 -l -p 7878 echo " (5) Generate and send tar ..." time tar c linux | nc -q 1 -l -p 7878 echo " (6) Generate and send sw.tar.gz ..." time tar cz linux | nc -q 1 -l -p 7878 echo " (7) Generate and send hw.tar.gz ..." time PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ tar cz linux | nc -q 1 -l -p 7878 echo " (8) Generate and send hw.tar.gz ..." time PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ tar cz linux | nc -q 1 -l -p 7878 echo " (9) Generate and send hw.tar.gz ..." time PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ tar cz linux | nc -q 1 -l -p 7878 exit 0 fi usage genwqe-user-4.0.18/misc/ratio_test.sh000077500000000000000000000040111303345043000175040ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Test-script to measure compression ratio # data=linux.tar function extract_real_time() { local file=$1; duration=`grep real $file | perl -e '$a=; $a=~m/([0-9]*)m([0-9]*)\.([0-9]*)/; $sec=$1*60+$2; $msec=$3; print "$sec,$msec"'` echo "; ${duration}" } function usage() { echo "ratio_test.sh" } echo "Generate tar if it is not exsiting yet ..." if [ ! -d linux ]; then echo "linux directory missing, needed to perform this measurement!" exit 1 fi if [ ! -f ${data} ]; then time tar cf ${data} linux else echo " ${data} already existing, skipping" fi du -ch ${data} echo -n "Hardware ... " rm -f linux.hw.tar.gz sync (time PATH=/usr/bin/genwqe:$PATH ZLIB_TRACE=0x0 ZLIB_ACCELERATOR=CAPI \ gzip -c ${data} > linux.hw.tar.gz) 2> output.stderr extract_real_time output.stderr du -ch linux.hw.tar.gz echo -n "Software default " rm -f linux.sw.default.tar.gz sync (time gzip -c ${data} > linux.sw.default.tar.gz) 2> output.stderr extract_real_time output.stderr du -ch linux.sw.default.tar.gz echo -n "Software fast " rm -f linux.sw.fast.tar.gz sync (time gzip -1 -c ${data} > linux.sw.fast.tar.gz) 2> output.stderr extract_real_time output.stderr du -ch linux.sw.fast.tar.gz echo -n "Software best " rm -f linux.sw.best.tar.gz sync (time gzip -9 -c ${data} > linux.sw.best.tar.gz) 2> output.stderr extract_real_time output.stderr du -ch linux.sw.best.tar.gz exit 0 genwqe-user-4.0.18/misc/samtools_test.sh000077500000000000000000000222061303345043000202350ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Frank Haverkamp # # Trying out samtools in a way where, we can repro what we did so far. # Let's see what is possible and what is not. We need some example BAM/SAM # files to get this test working. Our current set of example files were # contributed by a colleague. Thanks for providing them. # # /usr/bin/time did not want to accept environment variables before the # command. We are using build in time and hope that the -p switch is # supported to print the time in seconds, such that we can copy-paste # the resulting output into a csv file which we use to generate some # charts from. # # Usage example: # # for a in CAPI GENWQE SW ; do # ./misc/samtools_test.sh -A${a} -C0 -d /home/haver/genomics ; # done # export ZLIB_TRACE=0x0000 export ZLIB_ACCELERATOR=GENWQE export ZLIB_CARD=0 export ZLIB_INFLATE_IMPL=0x61 export ZLIB_INFLATE_THRESHOLD=4KiB export ZLIB_DEFLATE_IMPL=0x61 export PATH=/usr/bin/genwqe/bin/genwqe:/sbin:/usr/sbin:$PATH ## FIXME Files and directory are not obvious ... verbose=0 directory=/home/${USER}/genomics threads=`cat /proc/cpuinfo | grep processor | wc -l` homedir=`pwd` sadc=/usr/lib/sysstat/sadc ## FIXME Location might differ for various Linux distributions libz=/usr/lib64/genwqe/libz.so.1 # Measurement variables huge=0; function usage() { echo "Usage of $PROGRAM:" echo " [-A] use either GENWQE for the PCIe and CAPI for" echo " CAPI based solution available only on System p" echo " Use SW to use software compress/decompression" echo " [-C] set the compression card to use (0, 1, ... )." echo " RED (or -1) drive work to all available cards." echo " [-v ]" echo " [-t ]" echo " [-T ]" echo " [-C ] Card to be used for the test (-1 autoselect)" echo " [-d ] Directory containing the data" echo " [-D] Try all hw optimiaztions, if not set" echo " only the most promising config is tried." echo " [-H] Try with huge data, this takes a while ..." echo " [-h] Help." echo } while getopts "A:C:T:Hd:t:v:h" opt; do case $opt in A) export ZLIB_ACCELERATOR=$OPTARG; ;; C) export ZLIB_CARD=$OPTARG; ;; d) directory=$OPTARG; ;; T) threads=$OPTARG; ;; t) export ZLIB_TRACE=$OPTARG; ;; H) huge=1; ;; v) verbose=$OPTARG; ;; h) usage; exit 0; ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; esac done if [ $ZLIB_ACCELERATOR = "SW" ]; then export ZLIB_DEFLATE_IMPL=0x00; export ZLIB_INFLATE_IMPL=0x00; fi function p8_dma_sanity_check() { dmesg | grep dma | grep '\-1' if [ $? -ne 1 ]; then echo "WARNING: Check P8 DMA settings to get full PCIe performance!" echo " If we see DMA configuration problems with the KVM guest" echo " the performance will be way worse than it could be and the" echo " measurements will not be usable at the end." fi } function indexing() { local file=$1 touch sam.log rm -f ${file}.${ZLIB_ACCELERATOR}.bai (time -p LD_PRELOAD=${libz} samtools index ${file}) 2>> sam.log tail -n3 sam.log > time.log mv ${file}.bai ${file}.${ZLIB_ACCELERATOR}.bai sw=`grep real time.log | cut -d' ' -f2`; echo " ; $sw" } function viewing() { local file=$1 local t=$2 touch sam.log rm -f ${file}.${ZLIB_ACCELERATOR}.bam sync (time -p LD_PRELOAD=${libz} \ samtools view -@ ${t} -S -b ${file} > na1.${ZLIB_ACCELERATOR}.bam) \ 2>> sam.log tail -n3 sam.log > time.log sw=`grep real time.log | cut -d' ' -f2`; echo " $t ; $sw" } function bam2fq() { local file=$1 local t=$2 touch sam.log rm -f ${file}.${ZLIB_ACCELERATOR}.fastq sync (time -p LD_PRELOAD=${libz} \ samtools bam2fq ${file} > ${file}.${ZLIB_ACCELERATOR}.fastq) \ 2>> sam.log tail -n3 sam.log > time.log sw=`grep real time.log | cut -d' ' -f2`; echo " $t ; $sw" } function sam_sort() { local file=$1 local t=$2 touch sam.log rm -f ${file}.${ZLIB_ACCELERATOR}.sorted.bam ${file}.tmp sync if [ $verbose = 1 ]; then echo "EXECUTE:" echo " export ZLIB_TRACE=$ZLIB_TRACE" echo " export ZLIB_ACCELERATOR=$ZLIB_ACCELERATOR" echo " export ZLIB_CARD=$ZLIB_CARD" echo " export ZLIB_DEFLATE_IMPL=$ZLIB_DEFLATE_IMPL" echo " export ZLIB_INFLATE_IMPL=$ZLIB_INFLATE_IMPL" echo " LD_PRELOAD=${libz} \\" echo " samtools sort -@${t} -T ${file}.tmp -O bam \\" echo " -o ${file}.${ZLIB_ACCELERATOR}.sorted.bam ${file}" fi (time -p LD_PRELOAD=${libz} \ samtools sort -@${t} -T ${file}.tmp -O bam \ -o ${file}.${ZLIB_ACCELERATOR}.sorted.bam \ ${file}) 2>> sam.log tail -n3 sam.log > time.log sw=`grep real time.log | cut -d' ' -f2`; echo " $t ; $sw" } ############################################################################### # Preparations ############################################################################### ulimit -c unlimited pushd . cd $directory cpus=`cat /proc/cpuinfo | grep processor | wc -l` echo uname -a echo "Accelerator: ${ZLIB_ACCELERATOR}" echo "Processors: $cpus" echo "Threads to try: $threads" echo "Inflate sw fallback: $ZLIB_INFLATE_THRESHOLD" echo "Available IBM Processing accelerators:" lspci | grep "Processing accelerators: IBM" echo echo "Check availability of test data and libraries:" for d in NA1.bam na1.sam NA12878.bam ${libz} ; do echo -n " ${d} ... " if [ ! -f ${d} ]; then echo "MISSING!" pwd exit 1 else echo "OK" fi done echo p8_dma_sanity_check ############################################################################### # System Load Logging ############################################################################### function system_load_find_sadc() { if [ -x /usr/lib64/sa/sadc ]; then sadc=/usr/lib64/sa/sadc elif [ -x /usr/lib/sysstat/sadc ]; then sadc=/usr/lib/sysstat/sadc else echo "Cannot find sadc tool for CPU load measurement!" exit 1 fi } function system_load_logging_start() { rm -f system_load.sar system_load.pid /usr/lib/sysstat/sadc 1 system_load.sar & echo $! > system_load.pid } function system_load_logging_stop() { kill -9 `cat system_load.pid` # Skip the 1st 4 lines, since they container some header information cp system_load.sar system_load.$ZLIB_ACCELERATOR.sar LC_TIME=posix sar -u -f system_load.sar | tail -n +4 > system_load.txt grep -v Average system_load.txt > system_load.csv LC_TIME=posix sar -u -f system_load.sar > system_load.$ZLIB_ACCELERATOR.csv start=`head -n1 system_load.csv | cut -f1 -d' '` end=`tail -n1 system_load.csv | cut -f1 -d' '` cat < system_load.gnuplot set terminal pdf size 16,8 set output "system_load.pdf" set autoscale set title "System Load using $ZLIB_ACCELERATOR" set xdata time set timefmt "%H:%M:%S" set xlabel "Time" set xrange ["$start":"$end"] set ylabel "CPU Utilization" set yrange ["0.00":"35.00"] set style data lines set grid # set datafile separator " " plot "system_load.csv" using 1:3 title "%user" with lines lw 4, '' using 1:5 title "%system" with lines lw 4 EOF # Instructing gnuplot to generate a png with out CPU load statistics cat system_load.gnuplot | gnuplot # Safe it under an accelerator unique name mv system_load.pdf system_load.${ZLIB_ACCELERATOR}.pdf } system_load_find_sadc system_load_logging_start ############################################################################### echo "SAMTOOLS sort (inflate/deflate)" echo " threads ; ${ZLIB_ACCELERATOR}" sam_sort NA1.bam 0 for ((t = 1; t <= $threads; t *= 2)); do sam_sort NA1.bam ${t} done echo ############################################################################### echo "SAMTOOLS bam2fq (inflate)" echo " ; ${ZLIB_ACCELERATOR}" bam2fq NA1.bam echo ############################################################################### echo "SAMTOOLS indexing (inflate)" echo " threads ; ${ZLIB_ACCELERATOR}" indexing NA1.bam if [ $huge -eq 1 ]; then indexing NA12878.bam fi echo ############################################################################### echo "SAMTOOLS viewing (deflate)" echo " threads ; ${ZLIB_ACCELERATOR}" viewing na1.sam 0 for ((t = 1; t <= $threads; t *= 2)); do viewing na1.sam ${t} done echo ############################################################################### # Gather CPU Load Statistics ############################################################################### system_load_logging_stop popd genwqe-user-4.0.18/misc/zlib_test.sh000077500000000000000000000440071303345043000173370ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Some tests to ensure proper function of the hardware accelerated zlib. # # Setup tools path, such that we do not need to prefix the binaries and # test-script. This should also help to reduce change effort when we # move our test binaries from one to another source code repository. # export PATH=`pwd`/tools:`pwd`/misc:$PATH export LD_LIBRARY_PATH=`pwd`/lib:$LD_LIBRARY_PATH card=0 verbose=0 trace=0 ibuf_size="1MiB" export ZLIB_ACCELERATOR=GENWQE; function usage() { echo "Usage:" echo " zlib_test.sh" echo " [-A] use either GENWQE for the PCIe and CAPI for" echo " CAPI based solution available only on System p" echo " Use SW to use software compress/decompression" echo " [-C ] card to be used for the test" echo " [-v ]" echo " [-t ]" echo " [-i ]" } while getopts "A:C:i:t:v:h" opt; do case $opt in A) export ZLIB_ACCELERATOR=$OPTARG; ;; C) card=$OPTARG; ;; t) trace=$OPTARG; ;; i) ibuf_size=$OPTARG; ;; v) verbose=$OPTARG; ;; h) usage; exit 0; ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; esac done ulimit -c unlimited function test_compress_decompress() { local fname=$1; echo "Compress ${fname} ..." echo " zpipe < ${fname} > ${fname}.rfc1950" time zpipe < ${fname} > ${fname}.rfc1950 if [ $? -ne 0 ]; then echo "zpipe failed!" echo " zpipe < ${fname} > ${fname}.rfc1950" exit 1 fi #od -tx1 ${fname}.rfc1950 | head #echo "..." #od -tx1 ${fname}.rfc1950 | tail echo "ok" echo "Check size of resulting file ..." du -ch ${fname}.rfc1950 echo echo "Decompress data ..." echo " zpipe -d < ${fname}.rfc1950 > ${fname}.out" time zpipe -d < ${fname}.rfc1950 > ${fname}.out if [ $? -ne 0 ]; then echo "zpipe failed!" echo " zpipe -d < ${fname}.rfc1950 > ${fname}.out" exit 1 fi echo "ok" echo "Compare data ..." diff ${fname} ${fname}.out &> /dev/null if [ $? -ne 0 ]; then echo "${fname} and ${fname}.out are different!" exit 1 fi echo "ok" } function test_compress_decompress_rnd() { local fname=$1; local bufsize=$2; echo "--- bufsize=$bufsize/randomized ------------------------------------" echo "Compress ${fname} with random buffer sizes 1..$bufsize" time zpipe_rnd -i$bufsize -o$bufsize -r \ < ${fname} > ${fname}.rfc1950 if [ $? -ne 0 ]; then echo "zpipe_rnd failed!" echo " zpipe_rnd -i$bufsize -o$bufsize -r < ${fname} > ${fname}.rfc1950" exit 1 fi #od -tx1 ${fname}.rfc1950 | head echo "ok" echo "Check size of resulting file ..." du -ch ${fname}.rfc1950 echo echo "Decompress data ..." time zpipe_rnd -i$bufsize -o$bufsize -r -d \ < ${fname}.rfc1950 > ${fname}.out if [ $? -ne 0 ]; then echo "zpipe_rnd failed! in=${fname}.rfc1950 out=${fname}.out" exit 1 fi echo "ok" echo "Compare data ..." diff ${fname} ${fname}.out &> /dev/null if [ $? -ne 0 ]; then echo "${fname} and ${fname}.out are different!" exit 1 fi echo "ok" } function test_compress_decompress_fixed() { local fname=$1; local bufsize=$2; echo "--- bufsize=$bufsize/fixed -----------------------------------------" echo "Compress ${fname} with fixed buffer size $bufsize" time zpipe_rnd -i$bufsize -o$bufsize \ < ${fname} > ${fname}.rfc1950 if [ $? -ne 0 ]; then echo "zpipe_rnd failed! in=${fname}.out out=${fname}.rfc1950 " exit 1 fi echo "ok" echo "Check size of resulting file ..." du -ch ${fname}.rfc1950 echo echo "Decompress data ..." time zpipe_rnd -i$bufsize -o$bufsize -d \ < ${fname}.rfc1950 > ${fname}.out if [ $? -ne 0 ]; then echo "zpipe_rnd failed! in=${fname}.rfc1950 out=${fname}.out" exit 1 fi echo "ok" echo "Compare data ..." diff ${fname} ${fname}.out &> /dev/null if [ $? -ne 0 ]; then echo "${fname} and ${fname}.out are different!" exit 1 fi echo "ok" } function build_code () { echo "--------------------------------------------------------------------" echo "Build code ..." make || exit 1 echo "--------------------------------------------------------------------" if [ -f test_data.bin ]; then echo "test_data.bin is already existing, continue ..." else echo "Copy test data ..." cat /usr/bin* /usr/lib/* > test_data.bin 2> /dev/null fi du -ch test_data.bin if [ -f empty.bin ]; then echo "empty.bin is already existing, continue ..." else touch empty.bin fi du -ch empty.bin } function zlib_software () { echo "--------------------------------------------------------------------" echo "- SOFTWARE ---------------------------------------------------------" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=0 export ZLIB_DEFLATE_IMPL=0 export ZLIB_TRACE=${trace} echo "Use SW libz zipe with standard 16KiB buffers" env | grep ZLIB test_compress_decompress test_data.bin mv test_data.bin.rfc1950 test_data.bin.rfc1950.zlib test_compress_decompress empty.bin mv empty.bin.rfc1950 empty.bin.rfc1950.zlib } function zlib_hardware_no_buffering () { echo "--------------------------------------------------------------------" echo "- HARDWARE without buffering ---------------------------------------" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_TRACE=${trace} export ZLIB_CARD=${card} export ZLIB_VERBOSE=${verbose} echo "--- zpipe with 16KiB in- and output buffers ------------------------" export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 env | grep ZLIB echo "Use HW libz using card ${card} ..." echo " CARD: ${ZLIB_CARD}" echo " IBUF_TOTAL: ${ZLIB_IBUF_TOTAL}" echo " OBUF_TOTAL: ${ZLIB_OBUF_TOTAL}" test_compress_decompress test_data.bin mv test_data.bin.rfc1950 test_data.bin.rfc1950.genwqe test_compress_decompress empty.bin mv empty.bin.rfc1950 empty.bin.rfc1950.genwqe } function zlib_hardware_buffering () { echo "--------------------------------------------------------------------" echo "- HARDWARE with buffering ------------------------------------------" echo "--------------------------------------------------------------------" echo "--- zpipe with 16KiB in- and output buffers ------------------------" export ZLIB_IBUF_TOTAL=${ibuf_size} export ZLIB_OBUF_TOTAL=${ibuf_size} echo "Use HW libz using card ${card} ..." echo " CARD: ${ZLIB_CARD}" echo " IBUF_TOTAL: ${ZLIB_IBUF_TOTAL}" echo " OBUF_TOTAL: ${ZLIB_OBUF_TOTAL}" test_compress_decompress test_data.bin mv test_data.bin.rfc1950 test_data.bin.rfc1950.genwqe echo "--- zpipe_rnd with buffer size variations --------------------------" bufsizes="1023 4095 128KiB 256KiB 4MiB 7MiB 16MiB"; for bufsize in $bufsizes ; do test_compress_decompress_rnd test_data.bin $bufsize mv test_data.bin.rfc1950 test_data.bin.rfc1950.genwqe done for bufsize in $bufsizes ; do test_compress_decompress_fixed test_data.bin $bufsize mv test_data.bin.rfc1950 test_data.bin.rfc1950.genwqe done echo "--------------------------------------------------------------------" echo "- HARDWARE without buffering but using large buffers ---------------" echo "--------------------------------------------------------------------" export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 echo "Use HW libz using card ${card} ..." echo " CARD: ${ZLIB_CARD}" echo " IBUF_TOTAL: ${ZLIB_IBUF_TOTAL}" echo " OBUF_TOTAL: ${ZLIB_OBUF_TOTAL}" for bufsize in 256KiB 512KiB 1MiB 2MiB 4MiB ; do test_compress_decompress_fixed test_data.bin $bufsize mv test_data.bin.rfc1950 test_data.bin.rfc1950.genwqe done export ZLIB_IBUF_TOTAL=${ibuf_size} export ZLIB_OBUF_TOTAL=${ibuf_size} echo "--------------------------------------------------------------------" echo "Test: Decompress SW compressed data with HW $ibuf_size buffers ..." echo "--------------------------------------------------------------------" time zpipe_rnd -s1MiB -d \ < test_data.bin.rfc1950.zlib > test_data.bin.out if [ $? -ne 0 ]; then echo "zpipe failed!" exit 1 fi echo "ok" echo "Compare data ..." diff test_data.bin test_data.bin.out if [ $? -ne 0 ]; then echo "test_data.bin and test_data.bin.out are different!" exit 1 fi echo "ok" echo "--------------------------------------------------------------------" echo "Test: Decompress SW data + padding using $ibuf_size buffers ..." echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=${ibuf_size} export ZLIB_OBUF_TOTAL=0 # We must not buffer for inflate for this test dd if=/dev/urandom bs=1 count=100 of=padding.bin cat test_data.bin.rfc1950.zlib padding.bin > \ test_data.bin.rfc1950.padded.zlib time zpipe_rnd -s1MiB -d \ < test_data.bin.rfc1950.padded.zlib > test_data.bin.out if [ $? -ne 0 ]; then echo "zpipe failed!" exit 1 fi echo "ok" echo "Compare data ..." diff test_data.bin test_data.bin.out if [ $? -ne 0 ]; then echo "test_data.bin and test_data.bin.out are different!" exit 1 fi echo "ok" echo "--------------------------------------------------------------------" echo "Test: Decompress SW data + padding using $ibuf_size buffers (fully)" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=${ibuf_size} export ZLIB_OBUF_TOTAL=${ibuf_size} dd if=/dev/urandom bs=1 count=100 of=padding.bin cat test_data.bin.rfc1950.zlib padding.bin > \ test_data.bin.rfc1950.padded.zlib time zpipe_rnd -s1MiB -d \ < test_data.bin.rfc1950.padded.zlib > test_data.bin.out if [ $? -ne 0 ]; then echo "zpipe failed!" exit 1 fi echo "ok" echo "Compare data ..." diff test_data.bin test_data.bin.out if [ $? -ne 0 ]; then echo "test_data.bin and test_data.bin.out are different!" exit 1 fi echo "ok" } function zlib_append () { local flush=$1 local params=$2 # Use default settings ... # Set size large enough that hardware inflate is realy used # # hhh [0x3ffff1c655d8] loops=0 flush=1 Z_PARTIAL_FLUSH # hhh [0x3ffff1c655d8] *** giving out 100 bytes ... # hhh Accumulated input data: # 00000000: 54 68 69 73 20 69 73 20 74 68 65 20 45 4e 44 21 | This.is.the.END. # # hhh d=2 # hhh d=50, 0 is goodness # hhh [0x3ffff1c655d8] flush=1 Z_PARTIAL_FLUSH avail_in=16 avail_out=0 unset ZLIB_INFLATE_IMPL unset ZLIB_DEFLATE_IMPL unset ZLIB_IBUF_TOTAL unset ZLIB_OBUF_TOTAL echo "Special zpipe_append setup, which failed once ... " echo -n " zpipe_append -FZLIB -fZ_PARTIAL_FLUSH -i2MiB -o4KiB -s256KiB -p122846 -t122846 " zpipe_append -FZLIB -fZ_PARTIAL_FLUSH -i2MiB -o4KiB -s256KiB -p122846 if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" for f in ZLIB DEFLATE GZIP ; do for ibuf in 2MiB 1MiB 128KiB 4KiB 1000 100 ; do for obuf in 1MiB 128KiB 4KiB 1000 100 ; do echo -n "zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} -s256KiB -e -E ${params} " zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} -s256KiB -e -E ${params} if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done done export ZLIB_INFLATE_IMPL=0x41 export ZLIB_DEFLATE_IMPL=0x41 #unset ZLIB_INFLATE_IMPL #unset ZLIB_DEFLATE_IMPL unset ZLIB_IBUF_TOTAL unset ZLIB_OBUF_TOTAL env | grep ZLIB for f in ZLIB DEFLATE GZIP ; do for ibuf in 2MiB 1MiB 128KiB 4KiB 1000 100 ; do for obuf in 1MiB 128KiB 4KiB 1000 100 ; do # echo "Append feature: format=${f} ib=${ibuf} ob=${obuf} ... " echo -n "zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} -s256KiB ${params} " zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} -s256KiB ${params} if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done done echo "--------------------------------------------------------------------" echo "zpipe_append: HW compression/decompression without buffering" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 env | grep ZLIB for f in ZLIB DEFLATE GZIP ; do for ibuf in 2MiB 1MiB 128KiB 4KiB 1000 100 ; do for obuf in 1MiB 128KiB 4KiB 1000 100 ; do # echo "Append feature: format=${f} ib=${ibuf} ob=${obuf} ... " echo -n "zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} " zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done done echo "--------------------------------------------------------------------" echo "zpipe_append: HW compression/decompression with buffering" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=1MiB export ZLIB_OBUF_TOTAL=0 # known to fail env | grep ZLIB for f in ZLIB DEFLATE GZIP ; do for ibuf in 2MiB 1MiB 128KiB 4KiB 1000 100 ; do for obuf in 1MiB 128KiB 4KiB 1000 100 ; do # echo "Append feature: format=${f} ib=${ibuf} ob=${obuf} ... " echo -n "zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} " zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done done echo "--------------------------------------------------------------------" echo "zpipe_append: HW compression/decompression with buffering obuf=1MiB" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=1MiB export ZLIB_OBUF_TOTAL=1MiB env | grep ZLIB for f in ZLIB DEFLATE GZIP ; do for ibuf in 2MiB 1MiB 128KiB 4KiB 1000 100 ; do for obuf in 1MiB 128KiB 4KiB 1000 100 ; do # echo "Append feature: format=${f} ib=${ibuf} ob=${obuf} ... " echo -n "zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} " zpipe_append -F${f} -f${flush} -i${ibuf} -o${obuf} ${params} if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done done } function multithreading_quick () { echo "--------------------------------------------------------------------" echo "zpipe_mt: HW compression/decompression without buffering" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 echo -n "Multithreading: sniff-test ... " zpipe_mt -t32 -c128 -i1MiB -o1MiB 2> /dev/null if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" } function multithreading_unbuffered_memalign () { echo "--------------------------------------------------------------------" echo "zpipe_mt: HW comp/decomp w/o buffering and posix_memalign" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 for ibuf in 256KiB 4KiB 1000 ; do for obuf in 256KiB 4KiB 1000 ; do echo -n "Multithreading: ib=${ibuf} ob=${obuf} posix_memalign ... " zpipe_mt -t32 -c100 -i${ibuf} -o${obuf} -p 2> /dev/null if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done } function multithreading_unbuffered () { echo "--------------------------------------------------------------------" echo "zpipe_mt: HW compression/decompression without buffering" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=1 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=0 export ZLIB_OBUF_TOTAL=0 for ibuf in 256KiB 4KiB 1000 ; do for obuf in 256KiB 4KiB 1000 ; do echo -n "Multithreading: ib=${ibuf} ob=${obuf} ... " zpipe_mt -t32 -c64 -i${ibuf} -o${obuf} 2> /dev/null if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done } function multithreading_buffered () { echo "--------------------------------------------------------------------" echo "zpipe_mt: HW compression/decompression with buffering" echo "--------------------------------------------------------------------" export ZLIB_INFLATE_IMPL=0 export ZLIB_DEFLATE_IMPL=1 export ZLIB_IBUF_TOTAL=1MiB export ZLIB_OBUF_TOTAL=1MiB for ibuf in 256KiB 4KiB 1000 ; do for obuf in 256KiB 4KiB 1000 ; do echo -n "Multithreading: ib=${ibuf} ob=${obuf} ... " zpipe_mt -t32 -c64 -i${ibuf} -o${obuf} 2> /dev/null if [ $? -ne 0 ]; then echo "failed" exit 1 fi echo "ok" done done } build_code for flush in Z_PARTIAL_FLUSH Z_NO_FLUSH Z_FULL_FLUSH ; do zlib_append ${flush} done zlib_software zlib_hardware_no_buffering zlib_hardware_buffering multithreading_unbuffered_memalign multithreading_quick multithreading_unbuffered multithreading_buffered exit 0 genwqe-user-4.0.18/misc/zpipe.c000066400000000000000000000143101303345043000162660ustar00rootroot00000000000000/* zpipe.c: example of proper use of zlib's inflate() and deflate() Not copyrighted -- provided to the public domain Version 1.4 11 December 2005 Mark Adler */ /* Version history: 1.0 30 Oct 2004 First version 1.1 8 Nov 2004 Add void casting for unused return values Use switch statement for inflate() return values 1.2 9 Nov 2004 Add assertions to document zlib guarantees 1.3 6 Apr 2005 Remove incorrect assertion in inf() 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions Avoid some compiler warnings for input and output buffers */ #include #include #include #include "zlib.h" #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) #else # define SET_BINARY_MODE(file) #endif #define CHUNK 16384 /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int def(FILE *source, FILE *dest, int level) { int ret, flush; unsigned have; z_stream strm; unsigned char in[CHUNK]; unsigned char out[CHUNK]; /* allocate deflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = deflateInit(&strm, level); if (ret != Z_OK) return ret; /* compress until end of file */ do { strm.avail_in = fread(in, 1, CHUNK, source); if (ferror(source)) { (void)deflateEnd(&strm); return Z_ERRNO; } flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; strm.next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { strm.avail_out = CHUNK; strm.next_out = out; ret = deflate(&strm, flush); /* no bad return value */ assert(ret != Z_STREAM_ERROR); /* state not clobbered */ have = CHUNK - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)deflateEnd(&strm); return Z_ERRNO; } } while (strm.avail_out == 0); assert(strm.avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH); assert(ret == Z_STREAM_END); /* stream will be complete */ /* clean up and return */ (void)deflateEnd(&strm); return Z_OK; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int inf(FILE *source, FILE *dest) { int ret; unsigned have; z_stream strm; unsigned char in[CHUNK]; unsigned char out[CHUNK]; /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; ret = inflateInit(&strm); if (ret != Z_OK) return ret; /* decompress until deflate stream ends or end of file */ do { strm.avail_in = fread(in, 1, CHUNK, source); if (ferror(source)) { (void)inflateEnd(&strm); return Z_ERRNO; } if (strm.avail_in == 0) break; strm.next_in = in; /* run inflate() on input until output buffer not full */ do { strm.avail_out = CHUNK; strm.next_out = out; ret = inflate(&strm, Z_NO_FLUSH); assert(ret != Z_STREAM_ERROR); /* state not clobbered */ switch (ret) { case Z_NEED_DICT: ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&strm); return ret; } have = CHUNK - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)inflateEnd(&strm); return Z_ERRNO; } } while (strm.avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); /* clean up and return */ (void)inflateEnd(&strm); return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; } /* report a zlib or i/o error */ static void zerr(int ret) { fputs("zpipe: ", stderr); switch (ret) { case Z_ERRNO: if (ferror(stdin)) fputs("error reading stdin\n", stderr); if (ferror(stdout)) fputs("error writing stdout\n", stderr); break; case Z_STREAM_ERROR: fputs("invalid compression level\n", stderr); break; case Z_DATA_ERROR: fputs("invalid or incomplete deflate data\n", stderr); break; case Z_MEM_ERROR: fputs("out of memory\n", stderr); break; case Z_VERSION_ERROR: fputs("zlib version mismatch!\n", stderr); } } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int ret; /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); /* do compression if no arguments */ if (argc == 1) { ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); if (ret != Z_OK) zerr(ret); return ret; } /* do decompression if -d specified */ else if (argc == 2 && strcmp(argv[1], "-d") == 0) { ret = inf(stdin, stdout); if (ret != Z_OK) zerr(ret); return ret; } /* otherwise, report usage */ else { fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); return 1; } } genwqe-user-4.0.18/misc/zpipe_append.c000066400000000000000000000356341303345043000176310ustar00rootroot00000000000000/* zpipe.c: example of proper use of zlib's inflate() and deflate() Not copyrighted -- provided to the public domain Version 1.4 11 December 2005 Mark Adler */ /* * Copyright 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * This testcase appends data at the end of the compressed stream. The * challenge for the decompressor hardware/software is to stop on the * first byte, which is not part of the stream. * * This behavior is heavily relied upon from software like JAVA, which * appends its own trailing data to the encoded stream. If this * feature fails, the trailer cannot be found resulting in funny * behavior. * * At the same time, it is this feature, which prevents us from * buffering the input data when doing decompression. This causes * severe performance penalty when using too small buffers, up to * dropping to software if the input data size is below our threshold * value. */ #include #include #include #include #include #include #include #include #include #include /* For SYS_xxx definitions */ #include "zlib.h" #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) #else # define SET_BINARY_MODE(file) #endif static int verbose = 0; static unsigned int CHUNK_i = 16 * 1024; /* 16384; */ static unsigned int CHUNK_o = 16 * 1024; /* 16384; */ static int _pattern = 0; static inline pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } static int figure_out_window_bits(const char *format) { if (strcmp(format, "ZLIB") == 0) return 15; /* 8..15: ZLIB encoding (RFC1950) */ else if (strcmp(format, "DEFLATE") == 0) return -15; /* -15 .. -8: inflate/deflate (RFC1951) */ else if (strcmp(format, "GZIP") == 0) return 31; /* GZIP encoding (RFC1952) */ return 15; } /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int def(FILE *source, FILE *dest, int window_bits, int _flush, int level, size_t *compressed_size, size_t *uncompressed_size) { int ret, flush; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; in = malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = malloc(CHUNK_o); if (in == NULL) { free(in); return Z_ERRNO; } /* allocate deflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.total_out = 0; ret = deflateInit2(&strm, level, Z_DEFLATED, window_bits, 8, Z_DEFAULT_STRATEGY); if (ret != Z_OK) return ret; /* compress until end of file */ do { chunk_i = CHUNK_i; strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)deflateEnd(&strm); free(in); free(out); return Z_ERRNO; } /* flush = _flush; */ flush = feof(source) ? Z_FINISH : _flush; strm.next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { chunk_o = CHUNK_o; strm.avail_out = chunk_o; strm.next_out = out; ret = deflate(&strm, flush); /* no bad ret value */ assert(ret != Z_STREAM_ERROR); /* not clobbered */ have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)deflateEnd(&strm); free(in); free(out); return Z_ERRNO; } } while (strm.avail_out == 0); assert(strm.avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH /* !feof(source) */); #if 0 /* Experimental, does not work in all cases */ /* Put Z_FINISH as last step ... */ flush = Z_FINISH; chunk_o = CHUNK_o; strm.avail_out = chunk_o; strm.next_out = out; ret = deflate(&strm, flush); /* no bad ret value */ assert(ret != Z_STREAM_ERROR); /* not clobbered */ have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)deflateEnd(&strm); free(in); free(out); return Z_ERRNO; } #endif assert(ret == Z_STREAM_END); /* stream will be complete */ if (compressed_size) *compressed_size = strm.total_out; if (uncompressed_size) *uncompressed_size = strm.total_in; /* clean up and return */ (void)deflateEnd(&strm); free(in); free(out); return Z_OK; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int inf(FILE *source, FILE *dest, int window_bits, int _flush, size_t *decompressed_bytes, int expect_z_stream_end) { int ret; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; in = malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = malloc(CHUNK_o); if (in == NULL) { free(in); return Z_ERRNO; } /* fprintf(stderr, "===> input %d bytes, output %d bytes\n", CHUNK_i, CHUNK_o); */ /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; strm.total_in = 0; ret = inflateInit2(&strm, window_bits); if (ret != Z_OK) { free(in); free(out); return ret; } /* decompress until deflate stream ends or end of file */ do { chunk_i = CHUNK_i; strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)inflateEnd(&strm); free(in); free(out); return Z_ERRNO; } if (strm.avail_in == 0) break; strm.next_in = in; /* run inflate() on input until output buffer not full */ do { chunk_o = CHUNK_o; strm.avail_out = chunk_o; strm.next_out = out; ret = inflate(&strm, _flush); /* Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH */ /* Expect in some cases that we see Z_STREAM_END */ if (expect_z_stream_end && (ret != Z_STREAM_END)) { fprintf(stderr, "inflate did not return Z_STREAM_END " "rc=%d pattern=%d\n", ret, _pattern); abort(); } /* fprintf(stderr, "AAA inflate() rc=%d\n", ret); */ /* assert(ret != Z_STREAM_ERROR); *//* not clobbered */ if (ret == Z_STREAM_ERROR) { fprintf(stderr, "inflate failed rc=%d pattern=%d\n", ret, _pattern); abort(); } switch (ret) { case Z_NEED_DICT: ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&strm); free(in); free(out); return ret; } have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)inflateEnd(&strm); free(in); free(out); return Z_ERRNO; } /* fprintf(stderr, "AAA avail_out=%d\n", strm.avail_out); */ if (ret == Z_STREAM_END) break; } while (strm.avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); /* clean up and return */ if (decompressed_bytes) *decompressed_bytes = strm.total_in; (void)inflateEnd(&strm); free(in); free(out); return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; } /* report a zlib or i/o error */ static void zerr(int ret) { fprintf(stderr, "zpipe_append (%d): ", ret); switch (ret) { case Z_ERRNO: if (ferror(stdin)) fputs("error reading stdin\n", stderr); else if (ferror(stdout)) fputs("error writing stdout\n", stderr); else fprintf(stderr, "errno=%d %s\n", errno, strerror(errno)); break; case Z_STREAM_ERROR: fputs("invalid compression level\n", stderr); break; case Z_DATA_ERROR: fputs("invalid or incomplete deflate data\n", stderr); break; case Z_MEM_ERROR: fputs("out of memory\n", stderr); break; case Z_VERSION_ERROR: fputs("zlib version mismatch!\n", stderr); break; default: fputs("unknown error\n", stderr); } } /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void usage(char *prog) { char *b = basename(prog); fprintf(stderr, "%s usage: %s [-h] [-v]\n" " [-F, --format ]\n" " [-e, --excact-input] input matches size of data\n" " [-E, --excact-output] output matches size of data\n" " [-f, --fush ]\n" " [-i, --i_bufsize ]\n" " [-o, --o_bufsize ]\n" " [-p, --pattern ] pattern to generate test-data\n" " [-s, --size ]\n", b, b); } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int j, rc; size_t expected_bytes = 0, decompressed_bytes = 0, input_size = 0; FILE *i_fp, *o_fp, *n_fp; char i_fname[64], o_fname[64], n_fname[64]; char diff_cmd[128]; const char *pattern = "This is the END!"; int window_bits; const char *format = "ZLIB"; int flush = Z_NO_FLUSH; int exact_input = 0, exact_output = 0; size_t size = 256 * 1024; int expect_z_stream_end = 0; _pattern = getpid(); /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "format", required_argument, NULL, 'F' }, { "flush", required_argument, NULL, 'f' }, { "exact-input", no_argument, NULL, 'e' }, { "exact-output", no_argument, NULL, 'E' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { "size", required_argument, NULL, 's' }, { "pattern", required_argument, NULL, 'p' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "F:f:Eei:o:s:p:vh?", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'F': format = optarg; break; case 'f': if (strcmp(optarg, "Z_NO_FLUSH") == 0) flush = Z_NO_FLUSH; else if (strcmp(optarg, "Z_PARTIAL_FLUSH") == 0) flush = Z_PARTIAL_FLUSH; else if (strcmp(optarg, "Z_SYNC_FLUSH") == 0) flush = Z_SYNC_FLUSH; else if (strcmp(optarg, "Z_FULL_FLUSH") == 0) flush = Z_FULL_FLUSH; break; case 'e': exact_input = 1; break; case 'E': exact_output = 1; break; case 'v': verbose++; break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'o': CHUNK_o = str_to_num(optarg); break; case 's': size = str_to_num(optarg); break; case 'p': _pattern = str_to_num(optarg); break; case 'h': case '?': usage(argv[0]); exit(EXIT_SUCCESS); break; } } window_bits = figure_out_window_bits(format); /* fprintf(stderr, "AAA _pattern=%d\n", _pattern); */ sprintf(i_fname, "i_%d_%d.bin", _pattern, _pattern); sprintf(o_fname, "o_%d_%d.bin", _pattern, _pattern); sprintf(n_fname, "n_%d_%d.bin", _pattern, _pattern); /* Write output data */ i_fp = fopen(i_fname, "w+"); j = 0; input_size = 0; while (input_size < size) { rc = fprintf(i_fp, "%d %s %s ...\n", j, i_fname, o_fname); if (rc < 0) exit(EXIT_FAILURE); input_size += rc; j++; } fclose(i_fp); i_fp = fopen(i_fname, "r"); /* original data */ if (i_fp == NULL) exit(EXIT_FAILURE); o_fp = fopen(o_fname, "w+"); /* compressed data */ if (o_fp == NULL) exit(EXIT_FAILURE); /* Compress data */ rc = def(i_fp, o_fp, window_bits, flush, Z_DEFAULT_COMPRESSION, &expected_bytes, &decompressed_bytes); if (rc != Z_OK) { fprintf(stderr, "err: compression failed.\n"); zerr(rc); return rc; } fclose(i_fp); /* Append pattern */ rc = fprintf(o_fp, "%s", pattern); /* fprintf(stderr, "Appending %d bytes\n", rc); */ fclose(o_fp); o_fp = fopen(o_fname, "r"); /* original data */ if (o_fp == NULL) exit(EXIT_FAILURE); n_fp = fopen(n_fname, "w+"); /* new original data */ if (n_fp == NULL) exit(EXIT_FAILURE); /* * Test this special case: fully load the input data and * decompress in one shot. We can barely get into this * sitatation because of the codes internal buffering, which * provides a buffer of larger size to the decompressor. We * need to set ZLIB_OBUF_TOTAL=0 to disable this. * * Even with that we had trouble to see the circumvention for * the Z_STREAM_END detection to kick in. Which is not good, * since it requires coverage to get it right. */ if (exact_input) CHUNK_i = expected_bytes + strlen(pattern); if (exact_output) { CHUNK_o = decompressed_bytes; expect_z_stream_end = 1; } /* fprintf(stderr, "AAA Compressed: %d bytes and %d bytes padding\n" "AAA Decompressed: %d bytes\n", (int)expected_bytes, (int)strlen(pattern), (int)decompressed_bytes); */ /* * fprintf(stderr, "info: expected_bytes=%ld decompressed_bytes=%ld " * "strlen(pattern)=%ld\n", * expected_bytes, decompressed_bytes, strlen(pattern)); */ rc = inf(o_fp, n_fp, window_bits, flush, &decompressed_bytes, expect_z_stream_end); if (expected_bytes != decompressed_bytes) { fprintf(stderr, "err: compressed size mismatch " "%lld (expected) != %lld (absorbed). " "Expecting %d bytes remaining\n", (long long)expected_bytes, (long long)decompressed_bytes, (int)strlen(pattern)); exit(EXIT_FAILURE); } if (rc != Z_OK) { fprintf(stderr, "err: decompression failed.\n"); zerr(rc); return rc; } fclose(o_fp); fclose(n_fp); sprintf(diff_cmd, "diff -q %s %s", i_fname, n_fname); rc = system(diff_cmd); if (rc != 0) { fprintf(stderr, "Input %s and output %s differ!\n", i_fname, n_fname); exit(EXIT_FAILURE); } unlink(i_fname); unlink(n_fname); unlink(o_fname); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/misc/zpipe_mt.c000066400000000000000000000455311303345043000167770ustar00rootroot00000000000000/* zpipe.c: example of proper use of zlib's inflate() and deflate() Not copyrighted -- provided to the public domain Version 1.4 11 December 2005 Mark Adler */ /* * Copyright 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* For SYS_xxx definitions */ #include "zlib.h" #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) #else # define SET_BINARY_MODE(file) #endif /* FIXME Fake this for old RHEL verions e.g. RHEL5.6 */ #ifndef CPU_ALLOC #define CPU_ALLOC(cpus) ({ void *ptr = NULL; ptr; }) #define CPU_ALLOC_SIZE(cpus) ({ int val = 0; val; }) #define CPU_ISSET_S(cpu, size, cpusetp) ({ int val = 0; val; }) #define CPU_FREE(cpusetp) #define CPU_ZERO_S(size, cpusetp) #define CPU_SET_S(run_cpu, size, cpusetp) #define sched_getcpu() ({ int val = 0; val; }) #define sched_setaffinity(x, size, cpusetp) ({ int val = 0; val; }) #endif /* FIXME Fake this for old RHEL verions e.g. RHEL5.6 */ #ifndef CLOCK_MONOTONIC_RAW #define clock_gettime(clk_id, tp) ({ int val = 0; val; }) #endif /* HACK Complicated debug cases only */ #undef CONFIG_ERROR_TRIGGER /* see wrap_hw.c too */ #ifdef CONFIG_ERROR_TRIGGER extern void error_trigger(void); #else static inline void error_trigger(void) { } #endif static pthread_mutex_t mutex; static int verbose = 0; static int count = 0; static int use_posix_memalign = 0; static int pre_alloc_memory = 0; static unsigned int CHUNK_i = 128 * 1024; /* 16384; */ static unsigned int CHUNK_o = 128 * 1024; /* 16384; */ static unsigned int data_size = 128 * 1024; static unsigned int threads = 1; static struct thread_data *d; static int exit_on_err = 0; #define pr_dbg(level, fmt, ...) do { \ if ((verbose) >= (level)) \ fprintf(stderr, fmt, ## __VA_ARGS__); \ } while (0) struct thread_data { pthread_t thread_id; pid_t tid; int thread_rc; int cpu; unsigned long compressions; unsigned long decompressions; unsigned long compare_ok; unsigned char *in; unsigned char *out; } __attribute__((__may_alias__)); /** * Try to ping process to a specific CPU. Returns the CPU we are * currently running on. */ static int pin_to_cpu(int run_cpu) { cpu_set_t *cpusetp; size_t size; int num_cpus; num_cpus = CPU_SETSIZE; /* take default, currently 1024 */ cpusetp = CPU_ALLOC(num_cpus); if (cpusetp == NULL) return sched_getcpu(); size = CPU_ALLOC_SIZE(num_cpus); CPU_ZERO_S(size, cpusetp); CPU_SET_S(run_cpu, size, cpusetp); if (sched_setaffinity(0, size, cpusetp) < 0) { CPU_FREE(cpusetp); return sched_getcpu(); } /* figure out on which cpus we actually run */ CPU_FREE(cpusetp); return run_cpu; } static pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } static inline unsigned long get_nsec(void) { struct timespec ptime = { .tv_sec = 0, .tv_nsec = 0 }; clock_gettime(CLOCK_MONOTONIC_RAW, &ptime); return ptime.tv_sec * 1000000000 + ptime.tv_nsec; } static inline void *__malloc(size_t size) { if (use_posix_memalign) { int rc; void *ptr; rc = posix_memalign(&ptr, sysconf(_SC_PAGESIZE), size); if (rc != 0) { printf("err: errno=%d %s\n", errno, strerror(errno)); return NULL; } return ptr; } return malloc(size); } static inline void __free(void *ptr) { if (ptr) free(ptr); } static int check_for_pattern(const unsigned char *buf, unsigned int len, int it, void *in, void *out, uint8_t pattern) { unsigned int i; unsigned int zeros = 0; for (i = 0; i < len; i++) { if (buf[i] == pattern) zeros++; else zeros = 0; if (zeros >= 5) { fprintf(stderr, "%08lx.%08lx err: i=%016lx o=%016lx " "it=%d: %d or more times \"%02x\" " "at %016lx!\n", (unsigned long)getpid(), (unsigned long)gettid(), (unsigned long)in, (unsigned long)out, it, zeros, pattern, (unsigned long)&buf[i] - zeros); return 1; } } return 0; } /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ #undef CONFIG_DUMP_DATA static int def(struct thread_data *d, FILE *source, FILE *dest, int level, int iter __attribute__((unused))) { int ret, flush; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; int nr = 0; in = (pre_alloc_memory) ? d->in : __malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = (pre_alloc_memory) ? d->out : __malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate deflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = deflateInit(&strm, level); if (ret != Z_OK) { if (!pre_alloc_memory) { __free(in); __free(out); } return ret; } /* compress until end of file */ do { strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)deflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return Z_ERRNO; } flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; strm.next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { strm.avail_out = chunk_o; strm.next_out = out; memset(strm.next_out, 0xF0, chunk_o); if (chunk_o >= 8) *((uint32_t *)&out[4]) = gettid(); pr_dbg(3, "%08lx.%08lx 1) %02x%02x%02x%02x%02x ...\n", (unsigned long)getpid(), (unsigned long)gettid(), out[0], out[1], out[2], out[3], out[4]); ret = deflate(&strm, flush); /* no bad ret value */ assert(ret != Z_STREAM_ERROR); /* not clobbered */ have = chunk_o - strm.avail_out; pr_dbg(3, "%08lx.%08lx 2) %02x%02x%02x%02x%02x ...\n", (unsigned long)getpid(), (unsigned long)gettid(), out[0], out[1], out[2], out[3], out[4]); if (check_for_pattern(out, have, nr, in, out, 0x00) || check_for_pattern(out, have, nr, in, out, 0xf0) || check_for_pattern(out, have, nr, in, out, 0xf1)) { exit_on_err = 1; error_trigger(); /* FIXME Write a REGISTER as trigger */ } nr++; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)deflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return Z_ERRNO; } } while (strm.avail_out == 0); assert(strm.avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH); assert(ret == Z_STREAM_END); /* stream will be complete */ /* clean up and return */ (void)deflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return Z_OK; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int inf(struct thread_data *d, FILE *source, FILE *dest, int iter __attribute__((unused))) { int ret; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; in = (pre_alloc_memory) ? d->in : __malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = (pre_alloc_memory) ? d->out : __malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; ret = inflateInit(&strm); if (ret != Z_OK) { if (!pre_alloc_memory) { __free(in); __free(out); } return ret; } /* decompress until deflate stream ends or end of file */ do { strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)inflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return Z_ERRNO; } if (strm.avail_in == 0) break; strm.next_in = in; /* run inflate() on input until output buffer not full */ do { strm.avail_out = chunk_o; strm.next_out = out; memset(strm.next_out, 0xF1, chunk_o); ret = inflate(&strm, Z_NO_FLUSH /* Z_SYNC_FLUSH */); /* assert(ret != Z_STREAM_ERROR); *//* not clobbered */ switch (ret) { case Z_NEED_DICT: ret = Z_DATA_ERROR; /* and fall through */ case Z_STREAM_ERROR: case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return ret; } have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)inflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return Z_ERRNO; } } while (strm.avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); /* clean up and return */ (void)inflateEnd(&strm); if (!pre_alloc_memory) { __free(in); __free(out); } return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; } /* report a zlib or i/o error */ static void zerr(int ret) { int xerrno = errno; switch (ret) { case Z_ERRNO: fprintf(stderr, "errno=%d: %s\n", xerrno, strerror(xerrno)); if (ferror(stdin)) fputs("error reading stdin\n", stderr); if (ferror(stdout)) fputs("error writing stdout\n", stderr); break; case Z_STREAM_ERROR: fputs("stream error\n", stderr); break; case Z_DATA_ERROR: fprintf(stderr, "invalid or incomplete deflate data (%d)\n", ret); break; case Z_MEM_ERROR: fputs("out of memory\n", stderr); break; case Z_VERSION_ERROR: fputs("zlib version mismatch!\n", stderr); } } /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void usage(char *prog) { char *b = basename(prog); fprintf(stderr, "%s usage: %s\n" " [-X, --cpu ]\n" " [-t, --threads ] # of threads in parallel\n" " [-c, --count ] # of files to comp/decomp\n" " [-p, --use-posix-memalign]# use aligned allocationn\n" " [-P, --pre-alloc-memory] # zse pre-allocated memoryn\n" " [-i, --i_bufsize ]\n" " [-o, --o_bufsize ]\n" " [-d, --data_size ]\n" "\n", b, b); } static void *libz_thread(void *data) { int rc; unsigned int i, len = 0; struct thread_data *d = (struct thread_data *)data; FILE *i_fp, *o_fp, *n_fp; char i_fname[64], o_fname[64], n_fname[64]; char diff_cmd[128]; d->tid = gettid(); d->cpu = sched_getcpu(); for (i = 0; (i < (unsigned int)count) && (exit_on_err == 0); i++) { unsigned int j; int new_cpu; sprintf(i_fname, "i_%08x_%08x_%d.bin", getpid(), gettid(), i); sprintf(o_fname, "o_%08x_%08x_%d.bin", getpid(), gettid(), i); sprintf(n_fname, "n_%08x_%08x_%d.bin", getpid(), gettid(), i); i_fp = fopen(i_fname, "w+"); for (j = 0, len = 0; len < data_size; j++) { uint64_t x[4]; /* write binary data */ x[0] = __cpu_to_be64(0x1122334455667788ull); x[1] = __cpu_to_be64((unsigned long)d->in); x[2] = __cpu_to_be64((unsigned long)d->out); x[3] = __cpu_to_be64((unsigned long)i); rc = fwrite(x, 1, sizeof(x), i_fp); if (rc <= 0) exit(EXIT_FAILURE); #if CONFIG_ASCII_DATA rc = fprintf(i_fp, "%d %s %s in=%016llx out=%016llx ...\n", j, i_fname, o_fname, (long long)d->in, (long long)d->out); if (rc < 0) exit(EXIT_FAILURE); #endif len += rc; /* data_size */ } fclose(i_fp); i_fp = fopen(i_fname, "r"); /* original data */ if (i_fp == NULL) exit(EXIT_FAILURE); o_fp = fopen(o_fname, "w+"); /* compressed data */ if (o_fp == NULL) exit(EXIT_FAILURE); pr_dbg(3, "%08x.%08x %d. compressing ...\n", getpid(), gettid(), i); rc = def(d, i_fp, o_fp, Z_DEFAULT_COMPRESSION, i); if (rc != Z_OK) { error_trigger(); fprintf(stderr, "err/def: rc=%d %s %s %s\n", rc, i_fname, o_fname, n_fname); zerr(rc); goto exit_failure; } new_cpu = sched_getcpu(); if (d->cpu != new_cpu) { pr_dbg(1, "%08x.%08x CPU moved from %d to %d\n", getpid(), gettid(), d->cpu, new_cpu); d->cpu = new_cpu; } fclose(i_fp); fclose(o_fp); d->compressions++; pr_dbg(3, "%08x.%08x %d. decompressing ...\n", getpid(), gettid(), i); o_fp = fopen(o_fname, "r"); /* original data */ if (o_fp == NULL) exit(EXIT_FAILURE); n_fp = fopen(n_fname, "w+"); /* new original data */ if (n_fp == NULL) exit(EXIT_FAILURE); rc = inf(d, o_fp, n_fp, i); if (rc != Z_OK) { error_trigger(); fprintf(stderr, "%08x.%08x err/inf: rc=%d %s %s %s\n", getpid(), gettid(), rc, i_fname, o_fname, n_fname); zerr(rc); fprintf(stderr, "Dumping %s ...\n", o_fname); sprintf(diff_cmd, "xxd %s", o_fname); rc = system(diff_cmd); if (rc != 0) fprintf(stderr, "%08x.%08x %s: %d\n", getpid(), gettid(), strerror(errno), errno); goto exit_failure; } new_cpu = sched_getcpu(); if (d->cpu != new_cpu) { pr_dbg(1, "CPU moved from %d to %d\n", d->cpu, new_cpu); d->cpu = new_cpu; } fclose(o_fp); fclose(n_fp); d->decompressions++; sprintf(diff_cmd, "diff -q %s %s", i_fname, n_fname); rc = system(diff_cmd); if (rc != 0) { error_trigger(); fprintf(stderr, "%08x.%08x In %s and Out %s differ!\n", getpid(), gettid(), i_fname, n_fname); goto exit_failure; } d->compare_ok++; unlink(i_fname); unlink(o_fname); unlink(n_fname); } d->thread_rc = 0; pthread_exit(&d->thread_rc); exit_failure: exit_on_err = 1; d->thread_rc = -2; pthread_exit(&d->thread_rc); } static int run_threads(struct thread_data *d, unsigned int threads) { int rc; unsigned int i, errors = 0; for (i = 0; i < threads; i++) { d[i].thread_rc = -1; if (pre_alloc_memory) { d[i].in = __malloc(CHUNK_i); if (d[i].in == NULL) return Z_ERRNO; d[i].out = __malloc(CHUNK_o); if (d[i].out == NULL) return Z_ERRNO; } rc = pthread_create(&d[i].thread_id, NULL, &libz_thread, &d[i]); if (rc != 0) { fprintf(stderr, "starting %d. libz_thread failed!\n", i); return EXIT_FAILURE; } } /* FIXME give some time to setup the tid value ... ;-) */ sleep(1); if (pre_alloc_memory) for (i = 0; i < threads; i++) fprintf(stderr, " %08lx.%08lx " "in:%016lx-%016lx out:%016lx-%016lx\n", (unsigned long)getpid(), (unsigned long)d[i].tid, (unsigned long)d[i].in, (unsigned long)d[i].in + CHUNK_i, (unsigned long)d[i].out, (unsigned long)d[i].out + CHUNK_i); for (i = 0; i < threads; i++) { rc = pthread_join(d[i].thread_id, NULL); if (rc != 0) { fprintf(stderr, "joining genwqe_health thread failed!\n"); return EXIT_FAILURE; } } for (i = 0; i < threads; i++) errors += d[i].compressions - d[i].compare_ok; if (pre_alloc_memory) { for (i = 0; i < threads; i++) { __free(d[i].in); __free(d[i].out); } } return errors; } static void __print_results(struct thread_data *d, unsigned int threads) { unsigned int i, errors = 0; fprintf(stderr, "Statistics:\n"); for (i = 0; i < threads; i++) { fprintf(stderr, " %08lx.%08lx thread_id=%08lx rc=%d cmp=%ld " "decmp=%ld cmp_ok=%ld\n", (unsigned long)getpid(), (unsigned long)d[i].tid, (unsigned long)d[i].thread_id, (int)d[i].thread_rc, d[i].compressions, d[i].decompressions, d[i].compare_ok); errors += d[i].compressions - d[i].compare_ok; } fprintf(stderr, "%d errors found%c\n", errors, errors ? '!' : '.'); } static void print_results(void) { __print_results(d, threads); } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int rc = EXIT_SUCCESS; int cpu = -1; /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "cpu", required_argument, NULL, 'X' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { "data_size", required_argument, NULL, 'd' }, { "threads", required_argument, NULL, 't' }, { "count", required_argument, NULL, 'c' }, { "use-posix-memalign", no_argument, NULL, 'p' }, { "pre-alloc-memory", no_argument, NULL, 'P' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "X:d:Ppc:t:i:o:vh?", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'X': cpu = strtoul(optarg, NULL, 0); break; case 'v': verbose++; break; case 't': threads = str_to_num(optarg); break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'd': data_size = str_to_num(optarg); break; case 'c': count = str_to_num(optarg); break; case 'p': use_posix_memalign = 1; break; case 'P': pre_alloc_memory = 1; break; case 'o': CHUNK_o = str_to_num(optarg); break; case 'h': case '?': usage(argv[0]); exit(EXIT_SUCCESS); break; } } pin_to_cpu(cpu); d = calloc(threads, sizeof(struct thread_data)); if (d == NULL) return EXIT_FAILURE; atexit(print_results); rc = pthread_mutex_init(&mutex, NULL); if (rc != 0) fprintf(stderr, "err: initializing mutex failed!\n"); rc = run_threads(d, threads); pthread_mutex_destroy(&mutex); if (rc != 0) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/misc/zpipe_rnd.c000066400000000000000000000266021303345043000171400ustar00rootroot00000000000000/* zpipe.c: example of proper use of zlib's inflate() and deflate() Not copyrighted -- provided to the public domain Version 1.4 11 December 2005 Mark Adler */ /* * Copyright 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include "zlib.h" #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) #else # define SET_BINARY_MODE(file) #endif static int verbose = 0; static unsigned int seed = 0x1974; static int rnd = 0; static unsigned int CHUNK_i = 4 * 1024 * 1024; /* 16384; */ static unsigned int CHUNK_o = 4 * 1024 * 1024; /* 16384; */ /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int def(FILE *source, FILE *dest, int level, int windowBits, uint8_t *dictionary, int dictLength) { int ret, flush; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; in = malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate deflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = deflateInit2(&strm, level, Z_DEFLATED, windowBits, 8, Z_DEFAULT_STRATEGY); if (ret != Z_OK) return ret; if (dictLength > 0) { ret = deflateSetDictionary(&strm, dictionary, dictLength); if (ret != Z_OK) return ret; } /* compress until end of file */ do { chunk_i = rnd ? random() % CHUNK_i + 1 : CHUNK_i; if (verbose) fprintf(stderr, "chunk_i=%d\n", chunk_i); strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)deflateEnd(&strm); free(in); free(out); return Z_ERRNO; } flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; strm.next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { chunk_o = rnd ? random() % CHUNK_o + 1 : CHUNK_o; if (verbose) fprintf(stderr, "chunk_o=%d\n", chunk_o); strm.avail_out = chunk_o; strm.next_out = out; ret = deflate(&strm, flush); /* no bad ret value */ assert(ret != Z_STREAM_ERROR); /* not clobbered */ have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)deflateEnd(&strm); free(in); free(out); return Z_ERRNO; } } while (strm.avail_out == 0); assert(strm.avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH); assert(ret == Z_STREAM_END); /* stream will be complete */ /* clean up and return */ (void)deflateEnd(&strm); free(in); free(out); return Z_OK; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int inf(FILE *source, FILE *dest, int windowBits, uint8_t *dictionary, int dictLength) { int ret; unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; in = malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; ret = inflateInit2(&strm, windowBits); if (ret != Z_OK) return ret; if (!((windowBits >= 8) && (windowBits <= 15)) && /* !ZLIB */ (dictLength > 0)) { ret = inflateSetDictionary(&strm, dictionary, dictLength); if (ret != Z_OK) return ret; } /* decompress until deflate stream ends or end of file */ do { chunk_i = rnd ? random() % CHUNK_i + 1 : CHUNK_i; if (verbose) fprintf(stderr, "chunk_i=%d\n", chunk_i); strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)inflateEnd(&strm); free(in); free(out); return Z_ERRNO; } if (strm.avail_in == 0) break; strm.next_in = in; /* run inflate() on input until output buffer not full */ do { try_again: chunk_o = rnd ? random() % CHUNK_o + 1 : CHUNK_o; if (verbose) fprintf(stderr, "chunk_o=%d\n", chunk_o); strm.avail_out = chunk_o; strm.next_out = out; ret = inflate(&strm, Z_NO_FLUSH /* Z_SYNC_FLUSH */); assert(ret != Z_STREAM_ERROR); /* not clobbered */ switch (ret) { case Z_NEED_DICT: if (((windowBits >= 8) && (windowBits <= 15)) && /* ZLIB! */ (dictLength > 0)) { ret = inflateSetDictionary(&strm, dictionary, dictLength); if (ret != Z_OK) { (void)inflateEnd(&strm); free(in); free(out); return ret; } goto try_again; /* try again */ } case Z_DATA_ERROR: /* and fall through */ case Z_MEM_ERROR: (void)inflateEnd(&strm); free(in); free(out); return ret; } have = chunk_o - strm.avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { (void)inflateEnd(&strm); free(in); free(out); return Z_ERRNO; } } while (strm.avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); /* clean up and return */ (void)inflateEnd(&strm); free(in); free(out); return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; } /* report a zlib or i/o error */ static void zerr(int ret) { fputs("zpipe_rnd: ", stderr); switch (ret) { case Z_ERRNO: if (ferror(stdin)) fputs("error reading stdin\n", stderr); if (ferror(stdout)) fputs("error writing stdout\n", stderr); break; case Z_STREAM_ERROR: fputs("invalid compression level\n", stderr); break; case Z_DATA_ERROR: fputs("invalid or incomplete deflate data\n", stderr); break; case Z_MEM_ERROR: fputs("out of memory\n", stderr); break; case Z_NEED_DICT: fputs("need dictionary data\n", stderr); break; case Z_VERSION_ERROR: fputs("zlib version mismatch!\n", stderr); break; default: fprintf(stderr, "zlib unknown error %d\n", ret); break; } } /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void usage(char *prog) { char *b = basename(prog); fprintf(stderr, "%s usage: %s [-d, --decompress]\n" " [-F, --format ]\n" " [-r, --rnd\n" " [-s, --seed \n" " [-1, --fast]\n" " [-6, --default]\n" " [-9, --best]\n" " [-i, --i_bufsize ]\n" " [-D, --dictionary ]\n" " [-o, --o_bufsize ] < source > dest\n", b, b); } static int figure_out_windowBits(const char *format) { if (strcmp(format, "ZLIB") == 0) return 15; /* 8..15: ZLIB encoding (RFC1950) */ else if (strcmp(format, "DEFLATE") == 0) return -15; /* -15 .. -8: inflate/deflate (RFC1951) */ else if (strcmp(format, "GZIP") == 0) return 31; /* GZIP encoding (RFC1952) */ return 15; } /** * Load dictionary into buffer. * Max size is 32 KiB. */ static ssize_t dict_load(const char *fname, uint8_t *buff, size_t len) { int rc; FILE *fp; if ((fname == NULL) || (buff == NULL) || (len == 0)) return -EINVAL; fp = fopen(fname, "r"); if (!fp) { fprintf(stderr, "Cannot open file %s: %s\n", fname, strerror(errno)); return -1; } rc = fread(buff, 1, len, fp); if (rc == -1) fprintf(stderr, "Cannot read file %s: %s\n", fname, strerror(errno)); fclose(fp); return rc; } /** * Compress or decompress from stdin to stdout. */ int main(int argc, char **argv) { int ret; int compress = 1; const char *format = "ZLIB"; const char *dictName = NULL; uint8_t dictionary[32 * 1024]; /* 32 KiB maximum size */ int dictLength = 0; int windowBits; int level = Z_DEFAULT_COMPRESSION; /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "decompress", no_argument, NULL, 'd' }, { "format", required_argument, NULL, 'F' }, { "fast", no_argument, NULL, '1' }, { "default", no_argument, NULL, '6' }, { "best", no_argument, NULL, '9' }, { "seed", required_argument, NULL, 's' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { "dictionary", required_argument, NULL, 'D' }, { "rnd", no_argument, NULL, 'r' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "169D:F:rs:i:o:dvh?", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'd': compress = 0; break; case 'F': format = optarg; break; case '1': level = Z_BEST_SPEED; break; case '6': level = Z_DEFAULT_COMPRESSION; break; case '9': level = Z_BEST_COMPRESSION; break; case 'D': dictName = optarg; break; case 'r': rnd++; break; case 'v': verbose++; break; case 's': seed = str_to_num(optarg); break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'o': CHUNK_o = str_to_num(optarg); break; case 'h': case '?': usage(argv[0]); exit(EXIT_SUCCESS); break; } } srandom(seed); windowBits = figure_out_windowBits(format); dictLength = dict_load(dictName, dictionary, sizeof(dictionary)); /* do compression if no arguments */ if (compress == 1) { ret = def(stdin, stdout, level, windowBits, dictionary, dictLength); if (ret != Z_OK) zerr(ret); return ret; } /* do decompression if -d specified */ else if (compress == 0) { ret = inf(stdin, stdout, windowBits, dictionary, dictLength); if (ret != Z_OK) zerr(ret); return ret; } /* otherwise, report usage */ else { usage(argv[0]); return 1; } exit(EXIT_SUCCESS); } genwqe-user-4.0.18/spec/000077500000000000000000000000001303345043000147735ustar00rootroot00000000000000genwqe-user-4.0.18/spec/genwqe.spec000066400000000000000000000126621303345043000171440ustar00rootroot00000000000000# Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # zlib-devel 1.2.8 is better, but 1.2.7 should work too # # The following switch tries to take care that the distros libz.so is been taken: # CONFIG_ZLIB_PATH=%{_libdir}/libz.so # No special libz build should be needed anymore, since we added the right # dependency to the spec file. We want to have a zlib-devel installed. # Summary: GenWQE userspace tools Name: genwqe-tools Version: 4.0.18 Release: 1%{?dist} License: Apache-2.0 Group: Development/Tools URL: https://github.com/ibm-genwqe/genwqe-user/ Requires: zlib >= 1.2.7 BuildRequires: zlib-devel >= 1.2.7 help2man BuildRoot: %{_tmppath}/%{name}-root Source0: https://github.com/ibm-genwqe/genwqe-user/archive/v%{version}.tar.gz %description Provide a suite of utilities to manage and configure the IBM GenWQE card. %package -n genwqe-zlib Summary: GenWQE hardware accelerated libz Group: System Environment/Base %description -n genwqe-zlib GenWQE hardware accelerated libz and test-utilities. %package -n genwqe-vpd Summary: GenWQE adapter VPD tools Group: System Environment/Base %description -n genwqe-vpd The genwqe-vpd package contains GenWQE adapter VPD tools. %package devel Summary: Development files for %{name} Group: Development/Libraries Requires: %{name} = %{version} %description devel The %{name}-devel package contains libraries and header files for developing applications that use %{name}. %prep %setup -q -n genwqe-user-%{version} %ifarch ppc64le %define libcxl "BUNDLE_LIBCXL=1" %endif %build %{__make} %{?_smp_mflags} tools lib VERSION=%{version} \ CONFIG_ZLIB_PATH=%{_libdir}/libz.so.1 %{?libcxl} %install %{__make} %{?_smp_mflags} install DESTDIR=%{buildroot}/%{_prefix} \ VERSION=%{version} SYSTEMD_UNIT_DIR=%{buildroot}/%{_unitdir} \ LIB_INSTALL_PATH=%{buildroot}/%{_libdir}/genwqe \ INCLUDE_INSTALL_PATH=%{buildroot}/%{_includedir}/genwqe # FIXME Instead of trying to fixup things in the spec fike, let us consider # changing the associated install rule, such that the spec file # can get smaller and simpler. # # Move genwqe_vpd.csv to expected location. %{__mkdir} -p %{buildroot}/%{_sysconfdir}/ %{__install} -m 0644 tools/genwqe_vpd.csv %{buildroot}/etc/ strip %{buildroot}%{_bindir}/genwqe_gzip strip %{buildroot}%{_bindir}/genwqe_gunzip ln -sf %{_bindir}/genwqe_gunzip %{buildroot}/%{_libdir}/genwqe/gunzip ln -sf %{_bindir}/genwqe_gzip %{buildroot}/%{_libdir}/genwqe/gzip %files -n genwqe-tools %defattr(0755,root,root) %{_bindir}/genwqe_echo %{_bindir}/genwqe_ffdc %{_bindir}/genwqe_cksum %{_bindir}/genwqe_memcopy %{_bindir}/genwqe_peek %{_bindir}/genwqe_poke %{_bindir}/genwqe_update %{_bindir}/genwqe_gunzip %{_bindir}/genwqe_gzip %{_bindir}/genwqe_test_gz %{_bindir}/genwqe_mt_perf %{_bindir}/zlib_mt_perf %{_libdir}/genwqe/gunzip %{_libdir}/genwqe/gzip %defattr(-,root,root) %doc LICENSE %{_mandir}/man1/genwqe_echo.1.gz %{_mandir}/man1/genwqe_ffdc.1.gz %{_mandir}/man1/genwqe_gunzip.1.gz %{_mandir}/man1/genwqe_gzip.1.gz %{_mandir}/man1/genwqe_cksum.1.gz %{_mandir}/man1/genwqe_memcopy.1.gz %{_mandir}/man1/genwqe_peek.1.gz %{_mandir}/man1/genwqe_poke.1.gz %{_mandir}/man1/genwqe_update.1.gz %{_mandir}/man1/zlib_mt_perf.1.gz %{_mandir}/man1/gzFile_test.1.gz %ifarch ppc64le %{_bindir}/genwqe_maint %{_bindir}/genwqe_loadtree %{_unitdir}/genwqe_maint.service %{_mandir}/man1/genwqe_maint.1.gz %{_mandir}/man1/genwqe_loadtree.1.gz %endif %files -n genwqe-zlib %defattr(-,root,root) %doc LICENSE %defattr(0755,root,root) %dir %{_libdir}/genwqe %{_libdir}/genwqe/*.so* %files -n genwqe-vpd %defattr(-,root,root,-) %{_bindir}/genwqe_csv2vpd %{_bindir}/genwqe_vpdconv %{_bindir}/genwqe_vpdupdate %defattr(-,root,root) %doc LICENSE %{_sysconfdir}/genwqe_vpd.csv %{_mandir}/man1/genwqe_csv2vpd.1.gz %{_mandir}/man1/genwqe_vpdconv.1.gz %{_mandir}/man1/genwqe_vpdupdate.1.gz %files devel %defattr(-,root,root,-) %dir %{_includedir}/genwqe %{_includedir}/genwqe/* %{_libdir}/genwqe/*.a %changelog * Thu Jan 05 2017 Frank Haverkamp - 4.0.17 - Make Z_STREAM_END detection circumvention configurable - Improve debug output - Improve Z_STREAM_END detection and add testcases (most likely not final yet) * Wed Apr 06 2016 Gabriel Krisman Bertazi - 4.0.16 - dlopen uses SONAME when opening libz. - Support CAPI version. - Bulid fixes. - Include genwqe_maint daemon (CAPI version). * Mon Apr 04 2016 Frank Haverkamp - Renamed some scripts again * Thu Feb 04 2016 Frank Haverkamp - Fix s390 and Intel build. Remove debug stuff from zlib rpm. * Fri Dec 11 2015 Frank Haverkamp - Changing some install directories again. * Tue Dec 08 2015 Gabriel Krisman Bertazi - 4.0.7-1 - Create Fedora package. - Make genwqe-vpd and genwqe-libz subpackages of genwqe-tools. * Wed Apr 22 2015 Frank Haverkamp - Initial release. genwqe-user-4.0.18/tools/000077500000000000000000000000001303345043000152015ustar00rootroot00000000000000genwqe-user-4.0.18/tools/.gitignore000066400000000000000000000002601303345043000171670ustar00rootroot00000000000000genwqe_cksum genwqe_csv2vpd genwqe_echo genwqe_ffdc genwqe_gunzip genwqe_gzip genwqe_memcopy genwqe_peek genwqe_poke genwqe_update genwqe_vpdconv genwqe_vpdupdate zlib_mt_perf genwqe-user-4.0.18/tools/Makefile000066400000000000000000000121341303345043000166420ustar00rootroot00000000000000# # Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # include ../config.mk DESTDIR ?= /usr libs = ../lib/libDDCB.a ../lib/libzHW.a ../lib/libcard.a LDLIBS += $(libs) -lpthread -lrt genwqe_peek_objs = force_cpu.o genwqe_poke_objs = force_cpu.o genwqe_memcopy_objs = force_cpu.o genwqe_cksum_objs = force_cpu.o genwqe_echo_objs = force_cpu.o genwqe_vpdupdate_objs = genwqe_vpd_common.o genwqe_vpdconv_objs = genwqe_vpd_common.o genwqe_memcopy_libs = -lz genwqe_cksum_libs = -lz genwqe_gzip_libs = ../lib/libzADC.a -ldl # statically link our libz genwqe_gunzip_libs = ../lib/libzADC.a -ldl # statically link our libz zlib_mt_perf_libs = ../lib/libzADC.a -ldl # statically link our libz gzFile_test_libs = -L../lib -lzADC -ldl # dynamically link our libz projs = genwqe_update genwqe_gzip genwqe_gunzip zlib_mt_perf genwqe_memcopy \ genwqe_echo genwqe_peek genwqe_poke genwqe_cksum genwqe_vpdconv \ genwqe_vpdupdate genwqe_csv2vpd genwqe_ffdc gzFile_test ifdef WITH_LIBCXL # genwqe_maint is only used with CAPI support. projs += genwqe_maint genwqe_loadtree CAPI_INSTALL=capi_install # If we are bundling, we need to link statically. Otherwise, go dynamic. ifeq ($(BUNDLE_LIBCXL),1) LDLIBS += $(libcxl_a) else LDLIBS += -lcxl endif # !CONFIG_LIBCXL_PATH endif # WITH_LIBCXL all: $(projs) genwqe_memcopy: force_cpu.o genwqe_vpdconv genwqe_vpdupdate: genwqe_vpd_common.o $(projs): $(libs) objs = force_cpu.o genwqe_vpd_common.o $(projs:=.o) manpages = $(projs:=.1.gz) manpages: all $(manpages) genwqe_gunzip.o: genwqe_gzip.c $(CC) -c $< $(CPPFLAGS) $(CFLAGS) -o $@ ### Deactivate existing implicit rule %: %.c %: %.sh ### Generic rule to build a tool %: %.o $(CC) $(LDFLAGS) $@.o $($(@)_objs) $($(@)_libs) $(LDLIBS) -o $@ %.o: %.c $(libs) $(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@ ### Setting LD_LIBRARY_PATH helps to try tools with dynamic linkage %.1: % LD_LIBRARY_PATH=../lib $(HELP2MAN) -N --output=$@ \ --name "IBM Hardware Accelerator Tool." ./$< %.1.gz: %.1 gzip --best -c $< > $@ # # Tools for card maintenance # install_release_tools: all @mkdir -p $(DESTDIR)/bin cp -uv genwqe_peek genwqe_poke genwqe_memcopy genwqe_echo \ genwqe_update \ $(DESTDIR)/bin # # Example tools and testcases # gzip and gunzip go into special directory, since they should not be # in regular path to be mixed up with distro version of those tools. # Nevertheless we need them to accelerate tar for example with our # hardware accelerated version of gzip/gunzip. # install_gzip_tools: all install -D -m 755 genwqe_gzip -T $(DESTDIR)/bin/genwqe_gzip install -D -m 755 genwqe_gunzip -T $(DESTDIR)/bin/genwqe_gunzip install -D -m 755 zlib_mt_perf -T $(DESTDIR)/bin/zlib_mt_perf install -D -m 755 genwqe_mt_perf -T $(DESTDIR)/bin/genwqe_mt_perf install -D -m 755 genwqe_test_gz -T $(DESTDIR)/bin/genwqe_test_gz uninstall_gzip_tools: $(RM) $(DESTDIR)/bin/genwqe_gzip \ $(DESTDIR)/bin/genwqe_gunzip \ $(DESTDIR)/bin/zlib_mt_perf \ $(DESTDIR)/bin/genwqe_mt_perf \ $(DESTDIR)/bin/genwqe_test_gz install_manpages: $(manpages) @mkdir -p $(MAN_INSTALL_PATH) cp -uv $(manpages) $(MAN_INSTALL_PATH) install_release_manpages: $(manpages) @mkdir -p $(MAN_INSTALL_PATH) cp -uv genwqe_memcopy.1 genwqe_echo.1 genwqe_update.1 \ $(DESTDIR)/man/man1 cp -uv genwqe_memcopy.1 genwqe_echo.1 genwqe_update.1 \ $(MAN_INSTALL_PATH) uninstall_manpages: @for f in $(manpages) ; do \ echo "removing $(DESTDIR)/man/man1/$$f ..."; \ $(RM) $(DESTDIR)/man/man1/$$f; \ done capi_install: genwqe_maint install -D -m 755 genwqe_maint -T $(DESTDIR)/bin/genwqe_maint install -D -m 755 genwqe_loadtree -T $(DESTDIR)/bin/genwqe_loadtree install: install_gzip_tools install_manpages $(CAPI_INSTALL) @mkdir -p $(DESTDIR)/bin install -D genwqe_update -T $(DESTDIR)/bin/genwqe_update install -D -m 755 genwqe_memcopy -T $(DESTDIR)/bin/genwqe_memcopy install -D -m 755 genwqe_echo -T $(DESTDIR)/bin/genwqe_echo install -D -m 755 genwqe_peek -T $(DESTDIR)/bin/genwqe_peek install -D -m 755 genwqe_poke -T $(DESTDIR)/bin/genwqe_poke install -D -m 755 genwqe_cksum -T $(DESTDIR)/bin/genwqe_cksum install -D -m 755 genwqe_vpdconv -T $(DESTDIR)/bin/genwqe_vpdconv install -D -m 755 genwqe_vpdupdate -T $(DESTDIR)/bin/genwqe_vpdupdate install -D -m 755 genwqe_csv2vpd -T $(DESTDIR)/bin/genwqe_csv2vpd install -D -m 755 genwqe_ffdc -T $(DESTDIR)/bin/genwqe_ffdc uninstall: uninstall_gzip_tools uninstall_manpages @for f in $(projs) ; do \ echo "removing $(DESTDIR)/bin/$$f ..."; \ $(RM) $(DESTDIR)/bin/$$f; \ done clean distclean: $(RM) $(objs) $(projs) $(manpages) genwqe-user-4.0.18/tools/force_cpu.c000066400000000000000000000050511303345043000173130ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include "force_cpu.h" /* FIXME Fake this for old RHEL versions e.g. RHEL5.6 */ #ifndef CPU_ALLOC #define CPU_ALLOC(cpus) ({ void *ptr = NULL; ptr; }) #define CPU_ALLOC_SIZE(cpus) ({ int val = 0; val; }) #define CPU_ISSET_S(cpu, size, cpusetp) ({ int val = 0; val; }) #define CPU_FREE(cpusetp) #define CPU_ZERO_S(size, cpusetp) #define CPU_SET_S(run_cpu, size, cpusetp) #define sched_getcpu() ({ int val = 0; val; }) #define sched_setaffinity(x, size, cpusetp) ({ int val = 0; val; }) #endif void print_cpu_mask(void) { cpu_set_t *cpusetp; size_t size; int num_cpus, cpu; num_cpus = CPU_SETSIZE; /* take default, currently 1024 */ cpusetp = CPU_ALLOC(num_cpus); if (cpusetp == NULL) return; size = CPU_ALLOC_SIZE(num_cpus); /* figure out on which cpus we might run now after change */ CPU_ZERO_S(size, cpusetp); if (sched_getaffinity(0, size, cpusetp) < 0) { CPU_FREE(cpusetp); return; } for (cpu = 0; cpu < num_cpus; cpu += 1) { if (!CPU_ISSET_S(cpu, size, cpusetp)) { printf("\n"); break; } printf(" CPU: %4d = %s", cpu, CPU_ISSET_S(cpu, size, cpusetp)?"yes":"no "); if ((cpu & 0x3) == 0x3) printf("\n"); } CPU_FREE(cpusetp); } /** * Try to ping process to a specific CPU. Returns the CPU we are * currently running on. */ int pin_to_cpu(int run_cpu) { cpu_set_t *cpusetp; size_t size; int num_cpus; num_cpus = CPU_SETSIZE; /* take default, currently 1024 */ cpusetp = CPU_ALLOC(num_cpus); if (cpusetp == NULL) { return sched_getcpu(); } size = CPU_ALLOC_SIZE(num_cpus); CPU_ZERO_S(size, cpusetp); CPU_SET_S(run_cpu, size, cpusetp); if (sched_setaffinity(0, size, cpusetp) < 0) { CPU_FREE(cpusetp); return sched_getcpu(); } /* figure out on which cpus we actually run */ CPU_FREE(cpusetp); return run_cpu; } int switch_cpu(int cpu, int verbose) { int new_cpu; /* pin to specific CPU to get more precise performance measurements */ if (cpu < 0) return 0; if (verbose) { printf("Default possible CPUs:\n"); print_cpu_mask(); printf("Running on CPU %d, want to run on CPU %d...\n", sched_getcpu(), cpu); } new_cpu = pin_to_cpu(cpu); if (new_cpu != cpu) { fprintf(stderr, "err: desired CPU %d does not match current " "CPU %d\n", cpu, new_cpu); return -1; } if (verbose) { printf("New possible CPUs:\n"); print_cpu_mask(); printf("Running on CPU %d\n", new_cpu); } return 0; } genwqe-user-4.0.18/tools/force_cpu.h000066400000000000000000000014541303345043000173230ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __FORCE_CPU_H__ #define __FORCE_CPU_H__ #include void print_cpu_mask(void); int pin_to_cpu(int run_cpu); int switch_cpu(int cpu, int verbose); #endif /* __FORCE_CPU_H__ */ genwqe-user-4.0.18/tools/genwqe_cksum.c000066400000000000000000000237741303345043000200520ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "force_cpu.h" #include "libcard.h" #include "memcopy_ddcb.h" int verbose_flag = 0; static int debug_flag = 0; static int DATA_BUF_SIZE = 4096 * 512; static int use_sglist = 0; static int use_adler32 = 0; static int check_result = 0; static const char *version = GENWQE_LIB_VERS_STRING; /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Usage: %s [-h] [-v, --verbose] [-C, --card |RED]\n" "\t[-V, --version]\n" "\t[-X, --cpu ]\n" "\t[-D, --debug ]\n" "\t[-G, --use-sglist use the scatter gather list support]\n" "\t[-c, --check-result] check result against the software\n" "\t[-s, --bufsize ]\n" "\t[-a, --adler32] use adler32 instead of crc32\n" "\t[-i, --pgoffs_i ] byte offset for input buffer\n" "\t[FILE]...\n" "\n" "This utility sends memcopy/checksum DDCBs to the application\n" "chip unit. The CRC32 is compatible to zlib. The UNIX program\n" "cksum is using a different variation of the algorithm.\n\n", prog); } /** * str_to_num - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static int genwqe_card_cksum(card_handle_t card, struct genwqe_ddcb_cmd *cmd, void *src, size_t n, uint32_t *crc32, uint32_t *adler32, uint32_t *inp_processed, struct genwqe_debug_data *debug_data) { int rc; struct asiv_memcpy *asiv; struct asv_memcpy *asv; genwqe_ddcb_cmd_init(cmd); cmd->ddata_addr = (unsigned long)debug_data; cmd->acfunc = DDCB_ACFUNC_APP; /* goto accelerator */ cmd->cmd = ZCOMP_CMD_ZEDC_MEMCOPY; cmd->cmdopts = 0x0001; /* discard output for cksum */ cmd->asiv_length= 0x40 - 0x20; cmd->asv_length = 0xC0 - 0x80; /* try to absorb all */ /* setup ASIV part */ asiv = (struct asiv_memcpy *)&cmd->asiv; asiv->inp_buff = __cpu_to_be64((unsigned long)src); asiv->inp_buff_len = __cpu_to_be32((uint32_t)n); asiv->outp_buff = __cpu_to_be64(0); asiv->outp_buff_len = __cpu_to_be32(0); asiv->in_adler32 = __cpu_to_be32(*adler32); asiv->in_crc32 = __cpu_to_be32(*crc32); if (use_sglist) { cmd->ats = __cpu_to_be64( ATS_SET_FLAGS(struct asiv_memcpy, inp_buff, ATS_TYPE_SGL_RD)); } else { cmd->ats = __cpu_to_be64( ATS_SET_FLAGS(struct asiv_memcpy, inp_buff, ATS_TYPE_FLAT_RD)); } rc = genwqe_card_execute_ddcb(card, cmd); asv = (struct asv_memcpy *)&cmd->asv; *crc32 = __be32_to_cpu(asv->out_crc32); *adler32 = __be32_to_cpu(asv->out_adler32); *inp_processed = __be32_to_cpu(asv->inp_processed); if (verbose_flag) fprintf(stderr, " crc32=%u adler32=%u inp_processed=%u\n", *crc32, *adler32, *inp_processed); return rc; } static int process_in_file(card_handle_t card, const char *in_f, uint8_t *ibuf, int ibuf_size) { int rc, size_f; struct stat st; FILE *i_fp; uint32_t crc = 0, m_crc32 = 0; /* defined start value of 0 */ uint32_t m_adler32 = 1; /* defined start value of 1 */ uint32_t m_inp_processed; struct genwqe_ddcb_cmd cmd; struct genwqe_debug_data debug_data; int xerrno; if (check_result) crc = crc32(0L, Z_NULL, 0); /* start value */ memset(&debug_data, 0, sizeof(debug_data)); if (stat(in_f, &st) == -1) { fprintf(stderr, "err: stat on input file (%s)\n", strerror(errno)); exit(EX_ERRNO); } size_f = st.st_size; i_fp = fopen(in_f, "r"); if (!i_fp) { pr_err("err: can't open input file %s: %s\n", in_f, strerror(errno)); exit(EX_ERRNO); } while (size_f) { int tocopy = MIN(ibuf_size, size_f); rc = fread(ibuf, tocopy, 1, i_fp); if (rc != 1) { pr_err("err: can't read input file %s: %s\n", in_f, strerror(errno)); exit(EX_ERRNO); } if (check_result) crc = crc32(crc, ibuf, tocopy); /* software */ rc = genwqe_card_cksum(card, &cmd, ibuf, tocopy, /* hardware */ &m_crc32, &m_adler32, &m_inp_processed, debug_flag ? &debug_data : NULL); xerrno = errno; if (debug_flag && verbose_flag) genwqe_print_debug_data(stdout, &debug_data, GENWQE_DD_ALL); /* Did the ioctl succeed? */ if (rc != GENWQE_OK) { struct asv_runtime_dma_error *d; fprintf(stderr, "\nerr: CKSUM DDCB failed, %s (%d)\n" " errno=%d %s\n", card_strerror(rc), rc, xerrno, strerror(xerrno)); if (debug_flag && !verbose_flag) genwqe_print_debug_data(stdout, &debug_data, GENWQE_DD_ALL); fprintf(stderr, " RETC: %03x %s ATTN: %x PROGR: %x\n" " from card CRC32: %08x ADLER: %08x\n" " DEQUEUE=%016llx CMPLT=%016llx DISP=%016llx\n", cmd.retc, retc_strerror(cmd.retc), cmd.attn, cmd.progress, m_crc32, m_adler32, (long long)cmd.deque_ts, (long long)cmd.cmplt_ts, (long long)cmd.disp_ts); if ((cmd.retc == DDCB_RETC_UNEXEC) && (cmd.attn == 0xe007)) { d = (struct asv_runtime_dma_error *)cmd.asv; fprintf(stderr, " raddr: %016llx rfmt/chan/disc: %08x " "rdmae: %04x rsge: %04x\n" " waddr: %016llx wfmt/chan/disc: %08x " "wdmae: %04x wsge: %04x\n", (long long)__be64_to_cpu(d->raddr_be64), __be32_to_cpu(d->rfmt_chan_disccnt_be32), __be16_to_cpu(d->rdmae_be16), __be16_to_cpu(d->rsge_be16), (long long)__be64_to_cpu(d->waddr_be64), __be32_to_cpu(d->wfmt_chan_disccnt_be32), __be16_to_cpu(d->wdmae_be16), __be16_to_cpu(d->wsge_be16)); } genwqe_hexdump(stderr, cmd.asv, sizeof(cmd.asv)); exit(EXIT_FAILURE); } size_f -= tocopy; } if (use_adler32) printf("%u %llu %s\n", m_adler32, (long long)st.st_size, in_f); else printf("%u %llu %s\n", m_crc32, (long long)st.st_size, in_f); if ((check_result) && (m_crc32 != crc)) { fprintf(stderr, "err: CRCs do not match %u != %u\n", m_crc32, crc); } fclose(i_fp); return 0; } int main(int argc, char *argv[]) { int card_no = 0, err_code; card_handle_t card; uint8_t *ibuf, *ibuf4k; unsigned int page_size = sysconf(_SC_PAGESIZE); const char *in_f = NULL; int cpu = -1; int pgoffs_i = 0; while (1) { int ch; int option_index = 0; static struct option long_options[] = { /* functions */ /* options */ { "card", required_argument, NULL, 'C' }, { "cpu", required_argument, NULL, 'X' }, { "use-sglist", no_argument, NULL, 'G' }, { "use-adler32", no_argument, NULL, 'a' }, { "check-result", no_argument, NULL, 'c' }, { "bufsize", required_argument, NULL, 's' }, { "pgoffs_i", required_argument, NULL, 'i' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "debug", no_argument, NULL, 'D' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "acC:X:Gs:i:vDVh", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': if (strcmp(optarg, "RED") == 0) { card_no = GENWQE_CARD_REDUNDANT; break; } card_no = strtol(optarg, (char **)NULL, 0); break; case 'X': cpu = strtoul(optarg, (char **)NULL, 0); break; case 'G': use_sglist++; break; case 'a': use_adler32 = 1; break; case 'c': check_result++; break; case 'i': pgoffs_i = strtol(optarg, (char **)NULL, 0); break; case 's': DATA_BUF_SIZE = str_to_num(optarg); break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'D': debug_flag++; break; case 'v': verbose_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } switch_cpu(cpu, verbose_flag); genwqe_card_lib_debug(verbose_flag); card = genwqe_card_open(card_no, GENWQE_MODE_RDWR, &err_code, 0x475a4950, GENWQE_APPL_ID_MASK); if (card == NULL) { printf("err: genwqe card: %s/%d; %s\n", card_strerror(err_code), err_code, strerror(errno)); exit(EXIT_FAILURE); } if (use_sglist) { ibuf4k = memalign(page_size, DATA_BUF_SIZE + pgoffs_i); if (use_sglist > 1) { genwqe_pin_memory(card, ibuf4k, DATA_BUF_SIZE + pgoffs_i, 0); } } else { ibuf4k = genwqe_card_malloc(card, DATA_BUF_SIZE + pgoffs_i); } if (DATA_BUF_SIZE != 0 && ibuf4k == NULL) { pr_err("cannot allocate memory\n"); exit(EXIT_FAILURE); } ibuf = ibuf4k + pgoffs_i; while (optind < argc) { /* input file */ in_f = argv[optind++]; process_in_file(card, in_f, ibuf, DATA_BUF_SIZE); } if (use_sglist) { if (use_sglist > 1) { genwqe_unpin_memory(card, ibuf4k, DATA_BUF_SIZE + pgoffs_i); } free(ibuf4k); } else { genwqe_card_free(card, ibuf4k, DATA_BUF_SIZE + pgoffs_i); } genwqe_card_close(card); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/tools/genwqe_csv2vpd.c000066400000000000000000000302761303345043000203120ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Convert from CSV file to binary file used for making the vpd bin * file from a cvs file. */ #include #include #include #include #include #define MAX_LINE 512 #define DEBUG_PRINTF(...) printf(__VA_ARGS__) /* Command line arguments */ static int arg_index; static int arg_count; static char *arg_values[100]; /* * Standard CRC-32 Polynomial of: * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + * x^5 + x^4 + x^2 + x^1+1 */ typedef enum { FALSE = 0, TRUE = 1 } BOOL; /** * Global parameters used by the utilities * */ static char input_fn[MAX_LINE]; static char output_fn[MAX_LINE]; static BOOL verbose_flag = FALSE; /** TRUE if verbose messages output */ static BOOL add_crc = FALSE; typedef enum _SOS_ENDIANNESS { SOS_ENDIANNESS_BIG = 10, SOS_ENDIANNESS_LITTLE } SOS_ENDIANNESS; /** * Return endianness of the system we are running on * */ static SOS_ENDIANNESS sos0_endianness(void) { int16_t x; int16_t y; /* Set up an integer */ x = 1; /* Get the first byte of the integer address range */ y = *((char *) &x); if (y == 1) return (SOS_ENDIANNESS_LITTLE); else return (SOS_ENDIANNESS_BIG); } /** * Forces a 32 bit value to be big endian * */ static uint32_t endian_big_uint32(SOS_ENDIANNESS endi, uint32_t input) { uint32_t temp = 0x0; /* If we are running on a little endian system, convert to big */ if (endi == SOS_ENDIANNESS_LITTLE) { temp |= ((input >> 24) & 0x000000ff); temp |= ((input >> 8) & 0x0000ff00); temp |= ((input << 8) & 0x00ff0000); temp |= ((input << 24) & 0xff000000); } else { temp = input; } return (temp); } /* * Standard CRC-32 Polynomial of: x^32 + x^26 + x^23 + x^22 + x^16 + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1+1 */ static const uint32_t crc32_lut[] = { 0x0, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0xb1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; static unsigned long memcrc(const unsigned char *b, size_t n) { /* Input arguments: * const char* b == byte sequence to checksum * size_t n == length of sequence */ register unsigned i, c, s = 0; for (i = n; i > 0; --i) { c = (unsigned)(*b++); s = (s << 8) ^ crc32_lut[(s >> 24) ^ c]; } /* Extend with the length of the string. */ while (n != 0) { c = n & 0xff; n >>= 8; s = (s << 8) ^ crc32_lut[(s >> 24) ^ c]; } return ~s; } // Search for this in column 6 in order to add crc32 static char crc_token[] = {"crc32"}; /** * * Converts CSV file to binary fiule * * Returns TRUE if no errors * */ static BOOL convert_csv(void) { FILE *ip, *op; char line[MAX_LINE]; char token[MAX_LINE]; unsigned line_nr = 0; int offset; unsigned int crc32_seek = 0; unsigned int value; char desc[MAX_LINE]; uint32_t crc32 = 0; uint8_t data; int i, j; BOOL parse_error; int last_byte = 0; int nrw = 0; SOS_ENDIANNESS endianness; uint8_t *buffer = NULL; ip = fopen(input_fn, "r"); if (ip == NULL ) { printf("Cannot open input file '%s'\n", input_fn); return (FALSE); } op = fopen(output_fn, "w"); if (op == NULL ) { printf("Cannot open output file '%s'\n", output_fn); fclose(ip); return (FALSE); } while (fgets(line, MAX_LINE, ip) != NULL ) { int field_num = 0; int num_fields; int line_len; parse_error = FALSE; ++line_nr; num_fields = 0; j = 0; token[j] = '\0'; field_num = 0; line[strlen(line) - 2] = '\0'; /* remove newline character */ line_len = (int)strlen(line); /* only use lines that start with "0x" */ if (line[0] != '0' || tolower(line[1]) != 'x') { if (verbose_flag) DEBUG_PRINTF("\nSkipping Line (#%d) " "len: %d <%s>", line_nr, line_len, line); continue; } if (verbose_flag) DEBUG_PRINTF("\nLine (#%d): <%s>\n", line_nr, line); for (i = 0; i <= line_len; i++) { switch (line[i]) { case ',': case '\0': if (strlen(token) != 0) { switch (field_num) { case 0: /* Offset */ if (verbose_flag) DEBUG_PRINTF("Token " "(Offset): <%s>\n", token); if (token[0] != '0' || tolower(token[1]) != 'x' || sscanf(&token[2], "%x", &offset) != 1) { parse_error = TRUE; printf("ERROR while " "reading Offset-Token " "on line %d! " "skipping line\n", line_nr); continue; } num_fields++; break; case 1: /* Desc */ strcpy(desc, token); num_fields++; break; case 6: /* CRC */ if (0 == strncmp(crc_token, token, 5)) { if (0 == crc32_seek) { if (verbose_flag) DEBUG_PRINTF("Token (crc32): " "at offset %d <%s>\n", offset, token); crc32_seek = offset; /* Save */ } } break; case 9: /* Value */ if (verbose_flag) DEBUG_PRINTF("Token " "(Value): <%s>\n", token); if (token[0] != '0' || tolower(token[1]) != 'x' || sscanf(&token[2], "%x", &value) != 1) { parse_error = TRUE; printf("ERROR while reading " "Value-Token on line %d! " "skipping line\n", line_nr); continue; } num_fields++; break; default: if (verbose_flag) DEBUG_PRINTF("Token %d: " "<%s>\n", field_num, token); break; } } j = 0; token[j] = '\0'; field_num++; break; default: token[j++] = line[i]; token[j] = '\0'; break; } } if (!parse_error && ((num_fields == 3) || (num_fields == 2))) { if (verbose_flag) DEBUG_PRINTF("Offset: <0x%04X>, Desc: <%s>, " "Value: <0x%02X>\n", offset, desc, value); fseek(op, offset, SEEK_SET); data = (uint8_t)value;; nrw = fwrite(&data, 1, 1, op); if (1 != nrw) printf("Error: fwrite %d != 1\n", nrw); if (offset > last_byte) last_byte = offset + 1; } } if (verbose_flag) DEBUG_PRINTF("Close In <%s> Out <%s> Size %d\n", input_fn, output_fn, last_byte); fclose(op); fclose(ip); // Add some code to add crc32 to output file if ((TRUE == add_crc) && (0 != crc32_seek)) { op = fopen(output_fn, "r+"); if (op == NULL ) { printf("\nCannot open '%s'", output_fn); return (FALSE); } buffer = malloc(last_byte); if (NULL == buffer) { printf("\nCannot allocate %d Bytes\n", last_byte); fclose(op); return (FALSE); } nrw = fread(buffer, 1, last_byte, op); if (nrw == last_byte) { crc32 = memcrc(buffer, last_byte); DEBUG_PRINTF("%ld %d %s\n", (unsigned long)crc32, last_byte, output_fn); /* Work out endianness */ endianness = sos0_endianness(); crc32 = endian_big_uint32(endianness, crc32); // Seek to crc32 offset fseek(op, crc32_seek, SEEK_SET); nrw = fwrite(&crc32, 1, 4, op); if (4 != nrw) printf("Error: fwrite %d of 4 Bytes\n", nrw); } else { printf("Error: fread %d of %d Bytes\n", nrw, last_byte); } free(buffer); fclose(op); if (verbose_flag) DEBUG_PRINTF("CRC32 Added to <%s>\n", output_fn); } return (TRUE); } static void set_args(int argc, char *argv[]) { int i; arg_count = argc; for (i = 0; i < argc; i++) arg_values[i] = argv[i]; arg_index = 1; } static char *next_arg(void) { if (arg_index < arg_count) return (arg_values[arg_index]); return (NULL); } static char *get_next_arg(void) { if (arg_index < arg_count) return (arg_values[arg_index++]); return (NULL); } static void help(void) { printf("csv2bin -i -o \n" "\t-crc Add crc32 to bin file (same as from chksum).\n" "\t-v Verbose mode.\n"); return; } /** * Get command line parameters and create the output file. */ int main(int argc, char *argv[]) { BOOL input_fn_set = FALSE; BOOL output_fn_set = FALSE; set_args(argc, argv); /* Process command line args */ while (arg_index < argc) { if (strcmp(next_arg(), "-h") == 0) { help(); exit(0); } if (strcmp(next_arg(), "-o") == 0) { get_next_arg(); if (sscanf(next_arg(), "%s", (char *) &(output_fn)) == 1) output_fn_set = TRUE; } else if (strcmp(next_arg(), "-i") == 0) { get_next_arg(); if (sscanf(next_arg(), "%s", (char *) &(input_fn)) == 1) input_fn_set = TRUE; } else if (strcmp(next_arg(), "-v") == 0) verbose_flag = TRUE; else if (strcmp(next_arg(), "-crc") == 0) add_crc = TRUE; get_next_arg(); } if (verbose_flag) { printf("\nInput Filename: '%s'", input_fn); printf("\nOutput Filename: '%s'", output_fn); printf("\n"); } if ((TRUE == input_fn_set) && (TRUE == output_fn_set)) convert_csv(); else help(); if (verbose_flag) printf("\n"); exit(0); } genwqe-user-4.0.18/tools/genwqe_echo.c000066400000000000000000000273231303345043000176400ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file genwqe_echo.c * @brief FPGA accelerator SW utility. * * This utility sends ECHO-DDCBs to the Service Layer Unit (SLU) or an * chip application unit (or AFU), waits for completion and checks if * the teststring is correctly returned. */ #include #include #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "force_cpu.h" #include "libddcb.h" #define timediff_usec(t0, t1) \ ((double)(((t0)->tv_sec * 1000000 + (t0)->tv_usec) - \ ((t1)->tv_sec * 1000000 + (t1)->tv_usec))) int verbose_flag = 0; static const char *version = GIT_VERSION; const char *tstring_default = "ABCDEF_echo test [123456789abcde]"; /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Usage: %s [OPTIONS]\n" " -h, --help\n" " -v, --verbose\n" " -C, --card=CARDNO|RED Note: RED is for Card Redundant mode\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available " "for System p\n" " -q, --quiet quiece output\n" " -V, --version\n" " -H, --hardware-version\n" " -c, --count=COUNT\n" " -X, --cpu=CPU only run on this CPU number\n" " -D, --debug create extended debug data on failure\n" #if defined (CONFIG_BUILD_4TEST) " -u, --unitid=0:service layer|1:APP\n" #endif " -e, --exit-on-err exit program when seeing an error\n" " -f, --flood\n" " -l, --preload=1..N N <= 64\n" " -i, --interval=INTERVAL_USEC\n" " -s, --string=TESTSTRING\n" " -p, --polling use DDCB polling mode.\n" " -q, --quiet only summary output\n" "\n" "This utility sends echo DDCBs either to the service layer\n" "or other chip units. It can be used to check the cards\n" "health and/or to produce stress on the card to verify its\n" "correct function.\n\n", prog); } static void INT_handler(int sig); static int stop_echoing = 0; static void INT_handler(int sig) { signal(sig, SIG_IGN); stop_echoing = 1; /* signal(SIGINT, INT_handler); *//* Try again */ } /** * @brief prepare data to be send via ECHO-DDCB. * clear receive part. * SLU allows 64 bytes to be echoed. * * @param tstring test string to be send to SLU * @param acfunc Unit-ID in HW: 0=SLU, 1=APP * @param cmd command definitions and return values * @param count how often shall the command be issued * */ static void preset_echo_cmd(char *tstring, uint8_t acfunc, struct ddcb_cmd *cmd, int count) { int i, j; int len; /* preset tx values */ len = (int)strlen(tstring); len = ((len + 7) / 8) * 8; /* round up to multiples of 8 */ if (len > DDCB_ASV_LENGTH) { pr_info("test string too long (%u)\n", (unsigned int)strlen(tstring)); len = DDCB_ASV_LENGTH; /* limit to 64 chars */ } for (i = 0; i < count; i++) { cmd->acfunc = acfunc; /* to which func is echo going? */ cmd->ddata_addr = 0ull; /* FIXME */ cmd->cmd = DDCB_CMD_ECHO_SYNC; cmd->cmdopts = _DDCB_OPT_ECHO_COPY_ALL; cmd->ats = 0ULL; strncpy((char *)cmd->asiv, tstring, len); cmd->asiv_length = 64; /* clear rx values */ for (j=0; j < DDCB_ASV_LENGTH; j++) cmd->asv[j] = 0; cmd->asv_length = 64; cmd->retc = DDCB_RETC_IDLE; /* still unprocessed */ if (i < (count-1)) /* chaining */ cmd->next_addr = (unsigned long)(cmd + 1); else cmd->next_addr = 0x0; cmd++; } pr_info("%u ECHO DDCBs prepared (%u bytes to send)\n", count, len); } static int do_echo(accel_t card, int preload, uint8_t unit, char *teststring) { int rc, j, xerrno; unsigned int i; struct ddcb_cmd *cmd, *pcmd; /* uint64_t reg64; */ /* FIXME mallocs eat performance. Consider to allocate largest size on stack. */ cmd = (struct ddcb_cmd *)malloc(preload * sizeof(*cmd)); if (cmd == NULL) { fprintf(stderr, "err: failed to alloc cmd memory\n"); return -ENOMEM; } memset(cmd, 0, preload * sizeof(*cmd)); /* preset all cmd structures */ preset_echo_cmd(teststring, unit, cmd, preload); pcmd = cmd; /* issue ECHO commands */ rc = accel_ddcb_execute(card, pcmd, NULL, &xerrno); if (rc != DDCB_OK) { fprintf(stderr, "err: Echo DDCB failed: %s (%d)\n" " errno=%d %s\n" " RETC: %03x %s ATTN: %02x PROGR: %x\n", ddcb_strerror(rc), rc, xerrno, strerror(xerrno), pcmd->retc, ddcb_retc_strerror(pcmd->retc), pcmd->attn, pcmd->progress); goto err; } /* now check all results */ pcmd = cmd; rc = EXIT_SUCCESS; for (j = 0; j < preload; j++) { if (strncmp((char *)pcmd->asv, teststring, strlen(teststring)) != 0) { printf("\nDDCB echo compare failed\n" " retc=%x %s:\n", pcmd->retc, ddcb_retc_strerror(pcmd->retc)); printf(" original: "); for (i = 0; i < strlen(teststring); i++) { printf(" %02x", teststring[i]); if ((i & 0x0f) == 0x0f) printf("\n "); } printf("\n received: "); for (i = 0; i < pcmd->asv_length; i++) { printf(" %02x", pcmd->asv[i]); if ((i & 0x0f) == 0x0f) printf("\n "); } printf("\n"); rc = EX_ERR_DATA; break; } else { pr_info("Echo OK (retc=%x %s)\n", pcmd->retc, ddcb_retc_strerror(pcmd->retc)); } pcmd++; } err: free(cmd); return rc; } /** * @brief the utility itself */ int main(int argc, char *argv[]) { int ch, rc = DDCB_OK; int card_no = 0; int card_type = DDCB_TYPE_GENWQE; int preload = 1; int flood = 0; bool print_hardware_version = false; int quiet = 0; int exit_on_err = 1; unsigned long count = 0; int run_infinite = 1; unsigned long interval = 1000000; /* 1sec is default */ uint8_t unit = DDCB_ACFUNC_APP; /* 0=Servicelayer/1=ZCOMP/GZIP/... */ accel_t card; char *teststring =(char *)tstring_default; unsigned long packets_send = 0, packets_received = 0; int cpu = -1; int err_code = 0; unsigned long long frequency, wtime_usec = 0, wtime_s = 0, wtime_e = 0; unsigned int mode = (DDCB_MODE_RDWR | DDCB_MODE_ASYNC); while (1) { int option_index = 0; static struct option long_options[] = { /* functions */ /* options */ { "card", required_argument, NULL, 'C' }, { "accelerator-type", required_argument, NULL, 'A' }, { "cpu", required_argument, NULL, 'X' }, { "count", required_argument, NULL, 'c' }, { "preload", required_argument, NULL, 'l' }, { "interval", required_argument, NULL, 'i' }, { "string", required_argument, NULL, 's' }, #if defined (CONFIG_BUILD_4TEST) { "unit", required_argument, NULL, 'u' }, #endif { "exit-on-err", required_argument, NULL, 'e' }, { "flood", no_argument, NULL, 'f' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "hardware-version", no_argument, NULL, 'H' }, { "debug", no_argument, NULL, 'D' }, { "polling", no_argument, NULL, 'p' }, { "quiet", no_argument, NULL, 'q' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; #if defined (CONFIG_BUILD_4TEST) ch = getopt_long(argc, argv, "pDC:A:c:fhl:i:s:qvX:HVu:e:", long_options, &option_index); #else ch = getopt_long(argc, argv, "pDC:A:c:fhl:i:s:qvX:HVe:", long_options, &option_index); #endif if (ch == -1) /* all params processed ? */ break; switch (ch) { case 'C': if (strcmp(optarg, "RED") == 0) { card_no = ACCEL_REDUNDANT; break; } card_no = strtol(optarg, (char **)NULL, 0); break; case 'A': /* set card number */ if (strcmp(optarg, "GENWQE") == 0) { card_type = DDCB_TYPE_GENWQE; break; } if (strcmp(optarg, "CAPI") == 0) { card_type = DDCB_TYPE_CAPI; break; } card_type = strtol(optarg, (char **)NULL, 0); break; case 'X': cpu = strtoul(optarg, NULL, 0); break; case 'c': /* loop count */ count = strtol(optarg, (char **)NULL, 0); run_infinite = 0; break; case 'l': /* preload */ preload = strtol(optarg, (char **)NULL, 0); break; case 'i': /* interval */ interval = strtol(optarg, (char **)NULL, 0); break; case 'f': flood = 1; interval = 0; break; case 's': /* string */ teststring = optarg; if (strlen(teststring) > DDCB_ASV_LENGTH) { printf("WARNING: Limited string to %d bytes\n", DDCB_ASV_LENGTH); teststring[DDCB_ASV_LENGTH] = 0; } break; case 'e': exit_on_err = strtol(optarg, (char **)NULL, 0); break; #if defined (CONFIG_BUILD_4TEST) case 'u': /* unit */ unit = strtol(optarg, (char **)NULL, 0); break; #endif case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'D': /* debug_flag++; *//* FIXME disabled */ break; case 'p': mode |= DDCB_MODE_POLLING; break; case 'q': quiet++; break; case 'v': verbose_flag++; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'H': print_hardware_version = true; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (optind != argc) { usage(argv[0]); exit(EXIT_FAILURE); } switch_cpu(cpu, verbose_flag); ddcb_debug(verbose_flag); /* open card access (for DDCB) */ card = accel_open(card_no, card_type, mode, &err_code, 0, DDCB_APPL_ID_IGNORE); if (card == NULL) { fprintf(stderr, "err: failed to open card %u type %u " "(%d/%s)\n", card_no, card_type, err_code, accel_strerror(card, err_code)); rc = err_code; goto err_out; } if (print_hardware_version) { accel_dump_hardware_version(card, stderr); goto close_card; } /* Note: I want to be able to send to an illegal unit as a testcase */ pr_info("Start DDCB Echo '%s' for unit #%x\n", teststring, unit); if (preload < 1) preload = 1; signal(SIGINT, INT_handler); wtime_s = accel_get_queue_work_time(card); while (!stop_echoing) { struct timeval t0, t1; if (!run_infinite && !count) break; gettimeofday(&t0, NULL); rc = do_echo(card, preload, unit, teststring); gettimeofday(&t1, NULL); packets_send++; if (rc == 0) { if (!flood && !quiet) { printf("%d x %u bytes from UNIT #%x: " "echo_req time=%2.1f usec\n", preload, (int)strlen(teststring), unit, timediff_usec(&t1, &t0)); } packets_received++; } count--; if (!run_infinite && !count) break; /* stop without waiting, if count is 0 */ if (interval) usleep(interval); if (exit_on_err && (rc != DDCB_OK)) break; } wtime_e = accel_get_queue_work_time(card); frequency = accel_get_frequency(card); wtime_usec = frequency ? (wtime_e - wtime_s) / (frequency/1000000) : 0; close_card: accel_close(card); if (!flood && !quiet) printf("\n"); err_out: if (!quiet) { printf("--- UNIT #%x echo statistics ---\n" "%ld packets transmitted, %ld received, %ld lost, " "%ld%% packet loss, queue %lld usec\n", unit, packets_send, packets_received, (packets_send - packets_received), !packets_send ? 100 : 100 * (packets_send - packets_received)/packets_send, wtime_usec); } if (rc != DDCB_OK) exit(EXIT_FAILURE); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/tools/genwqe_ffdc.c000066400000000000000000000115611303345043000176210ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include "genwqe_tools.h" static const char *version = GIT_VERSION; int verbose_flag = 0; static inline uint64_t genwqe_readq(card_handle_t c, uint32_t reg) { int rc; uint64_t val; val = genwqe_card_read_reg64(c, reg, &rc); if (rc != GENWQE_OK) fprintf(stderr, "warn: genwqe_readq returned %d\n", rc); return val; } /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Utility to do first failure data capture (FFDC).\n" "\n" "Usage: %s [-h] [-v,--verbose]\n" "\t[-C, --card ]\n" "\t[-Q, --dump-queues] Dump DDCB queue registers of all funcs\n" "\t[-V, --version]\n" "\t[-v, --verbose]\n" "\n", prog); } static uint64_t vreadq(card_handle_t c, uint32_t reg, int func) { int rc; uint64_t val; rc = genwqe_card_write_reg64(c, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); if (rc != GENWQE_OK) { fprintf(stderr, "warn: genwqe_card_write_reg64 returned %d errno=%d\n", rc, errno); return (uint64_t)-1; } val = genwqe_card_read_reg64(c, reg, &rc); if (rc != GENWQE_OK) { fprintf(stderr, "warn: genwqe_card_read_reg64 returned %d errno=%d\n", rc, errno); return (uint64_t)-1; } return val; } static void do_dump_queues(card_handle_t c) { int func; pr_info("[%s] Genwqe queue config and debug registers\n", __func__); for (func = 0; func < 16; func++) { printf("PCI FUNCTION %d\n" " 0x%08x %016llx IO_QUEUE_CONFIG\n" " 0x%08x %016llx IO_QUEUE_STATUS\n" " 0x%08x %016llx IO_QUEUE_SEGMENT\n" " 0x%08x %016llx IO_QUEUE_INITSQN\n" " 0x%08x %016llx IO_QUEUE_WRAP\n" " 0x%08x %016llx IO_QUEUE_OFFSET\n" " 0x%08x %016llx IO_QUEUE_WTIME\n" " 0x%08x %016llx IO_QUEUE_ERRCNTS\n" " 0x%08x %016llx IO_QUEUE_LRW\n", func, IO_SLC_QUEUE_CONFIG, (long long)vreadq(c, IO_SLC_VF_QUEUE_CONFIG, func), IO_SLC_QUEUE_STATUS, (long long)vreadq(c, IO_SLC_VF_QUEUE_STATUS, func), IO_SLC_QUEUE_SEGMENT, (long long)vreadq(c, IO_SLC_VF_QUEUE_SEGMENT, func), IO_SLC_QUEUE_INITSQN, (long long)vreadq(c, IO_SLC_VF_QUEUE_INITSQN, func), IO_SLC_QUEUE_WRAP, (long long)vreadq(c, IO_SLC_VF_QUEUE_WRAP, func), IO_SLC_QUEUE_OFFSET, (long long)vreadq(c, IO_SLC_VF_QUEUE_OFFSET, func), IO_SLC_QUEUE_WTIME, (long long)vreadq(c, IO_SLC_VF_QUEUE_WTIME, func), IO_SLC_QUEUE_ERRCNTS, (long long)vreadq(c, IO_SLC_VF_QUEUE_ERRCNTS, func), IO_SLC_QUEUE_LRW, (long long)vreadq(c, IO_SLC_VF_QUEUE_LRW, func)); } } int main(int argc, char *argv[]) { int ch; int dump_queues = 0; int card_no = 0; int err_code; int rc; card_handle_t card; while (1) { int option_index = 0; static struct option long_options[] = { /* options */ { "card", required_argument, NULL, 'C' }, { "dump-queues", no_argument, NULL, 'Q' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "C:QVvh", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': card_no = strtol(optarg, (char **)NULL, 0); break; case 'Q': dump_queues = 1; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'v': verbose_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (optind != argc) { usage(argv[0]); exit(EXIT_FAILURE); } /* Open the Card */ card = genwqe_card_open(card_no, GENWQE_MODE_RDWR, &err_code, 0, GENWQE_APPL_ID_IGNORE); if (card == NULL) { pr_err("opening genwqe card (err=%d)\n", err_code); exit(EXIT_FAILURE); } genwqe_card_lib_debug(verbose_flag); rc = EXIT_FAILURE; if (dump_queues) { do_dump_queues(card); rc = EXIT_SUCCESS; } /* Close driver */ genwqe_card_close(card); if (rc == EXIT_FAILURE) usage(argv[0]); exit(rc); } genwqe-user-4.0.18/tools/genwqe_ffdc.sh000077500000000000000000000165401303345043000200160ustar00rootroot00000000000000#!/bin/bash # # Copyright 2015, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # version="https://github.com/ibm-genwqe/genwqe-user" card=0 verbose=0 export PATH=$PATH:./tools dump_debugfs=0 dump_sysfs=0 dump_current=0 dump_config=0 dump_previous=0 dump_queues=0 function usage() { echo "Usage of $PROGRAM:" echo " [-C ] card to be used for the FFDC gathering" echo " Note: only Physical Function cards make sense here!" echo " [-a] dump all available information" echo " [-s] dump sysfs entries" echo " [-d] dump debugfs entries" echo " [-c] dump PCIe config space" echo " [-q] dump all DDCB queues" echo " [-t <0:current|1:previous>]" echo " Mostly you might want to see \"previous\" data." echo " [-V] print program version" echo " [-h] help" echo echo "This utility dumps available first failure data capture (FFDC)" echo "information to stdout. It can be used if the card is still" echo "accessible, but not functioning correctly. Some functionality" echo "of this tool requires super-user privileges." echo echo "Note: To get all available information, you need to run this" echo " script as superuser because it wants to write so some" echo " card registers, which is only allowed for privileged users." echo } while getopts "C:asdcqt:Vh" opt; do case $opt in C) card=$OPTARG; ;; a) dump_current=1; dump_previous=1; dump_debugfs=1; dump_sysfs=1; dump_config=1; dump_queues=1; ;; s) dump_sysfs=1; ;; d) dump_debugfs=1; ;; c) dump_config=1; ;; q) dump_queues=1; ;; t) dump_type=$OPTARG; if [ $dump_type -eq 0 ]; then dump_current=1; fi if [ $dump_type -eq 1 ]; then dump_previous=1; fi if [ $dump_type -eq 2 ]; then dump_current=1; dump_previous=1; fi ;; V) echo "${version}" exit 0; ;; h) usage; exit 0; ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; esac done function do_dump_debugfs () { if [ $dump_current -eq 1 ]; then echo "--------------------------------------------------------------------" echo "Current traces" echo "--------------------------------------------------------------------" for id in 0 1 2 ; do echo "curr_dbg_uid$id" cat /sys/kernel/debug/genwqe/genwqe${card}_card/curr_dbg_uid${id} if [ $? -ne 0 ]; then echo "failed!" exit 1 fi done echo echo "--------------------------------------------------------------------" echo "FIRs from current run (to check if recovery state is ok)" echo " See dump below for all FIRs/FECs." echo "--------------------------------------------------------------------" cat /sys/kernel/debug/genwqe/genwqe${card}_card/curr_regs if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo fi if [ $dump_previous -eq 1 ]; then echo "--------------------------------------------------------------------" echo "Previous traces" echo "--------------------------------------------------------------------" for id in 0 1 2 ; do echo "prev_dbg_uid$id" cat /sys/kernel/debug/genwqe/genwqe${card}_card/prev_dbg_uid${id} if [ $? -ne 0 ]; then echo "failed!" exit 1 fi done echo echo "--------------------------------------------------------------------" echo "FIRs from previous run (most likely where the problem occurred)" echo " See dump below for all FIRs/FECs." echo "--------------------------------------------------------------------" cat /sys/kernel/debug/genwqe/genwqe${card}_card/prev_regs if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo echo "--------------------------------------------------------------------" echo "Current state of DDCB queue" echo "--------------------------------------------------------------------" cat /sys/kernel/debug/genwqe/genwqe${card}_card/ddcb_info if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo fi echo "--------------------------------------------------------------------" echo "Driver/bitstream version information" echo "--------------------------------------------------------------------" cat /sys/kernel/debug/genwqe/genwqe${card}_card/info if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo } function do_dump_sysfs () { echo "====================================================================" echo "Genwqe Card FFDC Dump" echo " `basename $0` version ${version}" echo -n " Dump taken: " date echo "====================================================================" echo -n "Type: " cat /sys/class/genwqe/genwqe${card}_card/type if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo -n "AppID: " cat /sys/class/genwqe/genwqe${card}_card/appid if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo -n "Version: " cat /sys/class/genwqe/genwqe${card}_card/version if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo -n "Current bitstream: " cat /sys/class/genwqe/genwqe${card}_card/curr_bitstream if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo -n "Next bitstream: " cat /sys/class/genwqe/genwqe${card}_card/next_bitstream if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo -n "Temperature: " cat /sys/class/genwqe/genwqe${card}_card/tempsens if [ $? -ne 0 ]; then echo "failed!" exit 1 fi echo "--------------------------------------------------------------------" echo } function do_dump_queues () { echo "--------------------------------------------------------------------" echo "Queue status of all PCI functions" echo "--------------------------------------------------------------------" genwqe_ffdc -C ${card} --dump-queues ; echo } function do_dump_config_space () { s=$( basename `ls -l /sys/class/genwqe/genwqe${card}_card/device | cut -d'>' -f2` ) echo "--------------------------------------------------------------------" echo "PCIe Config Space of card${card}: ${s}" echo "--------------------------------------------------------------------" if [ -x /sbin/lspci ]; then /sbin/lspci -vvvxxxxs ${s} fi if [ -x /usr/bin/lspci ]; then /usr/bin/lspci -vvvxxxxs ${s} fi } # Ensure that root is executing this script. if [ "$(id -u)" != "0" ]; then echo "warning: This script must be executed as root to get all available information!" exit 1; fi # Based on previous experience we dump the traces first which are in # debugfs. Since accessing the card will at least mess up the trace # with ffdc capturing traffic. if [ $dump_debugfs -eq 1 ]; then do_dump_debugfs; fi if [ $dump_sysfs -eq 1 ]; then do_dump_sysfs; fi if [ $dump_config -eq 1 ]; then do_dump_config_space; fi if [ $dump_queues -eq 1 ]; then do_dump_queues; fi exit 0; genwqe-user-4.0.18/tools/genwqe_file_perf000077500000000000000000000130061303345043000204300ustar00rootroot00000000000000#!/bin/bash # # Copyright 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # card=0 verbose=0 PLATFORM=`uname -p` start_time=`date` options="" verbose=0 logging=0 keep=0 test_data=linux-3.17.tar sizes="1 8 16 64 256 1024 4096 8192 9000 12000 14000 16000 32000 48000 56000 64000 96000 128000 200000 300000 400000 500000" # Set default accelerator based on platform we are running on if [ ${PLATFORM} == "ppc64le" ]; then accelerator=CAPI else accelerator=GENWQE fi function usage() { echo "Usage:" echo " genwqe_file_perf" echo " [-A] GENWQE|CAPI" echo " [-C] card to be used for the test" echo " [-t] file to use for performance measurement" echo " [-1] --fast mode for compression" echo " [-9] --best mode for compression" echo " [-k] keep output data and write to filesystem" echo " [-l] Enable system load logging" echo " sadc - System activity data collector and gnuplot" echo " must be installed" echo " [-v] verbose" echo echo "To exclude influences from reading and writing to disk," echo "it is recommended to create a ramdisk e.g. by using tmpfs" echo "to get more precise measurements about what is caused by" echo "the accellerator and what is caused by using faster or smaller" echo "disks." echo } function erase_files() { for s in ${sizes} ; do rm -f ${s}KiB.bin ${s}KiB.orig ${s}KiB.gz out.stderr done rm -f system_load.sar system_load.*.sar system_load.csv system_load.txt \ system_load.gnuplot system_load.pid } function cleanup() { if [ $logging -eq 1 ]; then system_load_logging_stop fi echo "EXIT ..." } trap cleanup SIGINT trap cleanup SIGKILL trap cleanup SIGTERM ############################################################################### # System Load Logging ############################################################################### function system_load_logging_start() { rm -f system_load.sar system_load.pid sync /usr/lib/sysstat/sadc 1 system_load.sar & echo $! > system_load.pid } function system_load_logging_stop() { kill -9 `cat system_load.pid` # Skip the 1st 4 lines, since they container some header information cp system_load.sar system_load.$ZLIB_ACCELERATOR.sar sar -u -f system_load.sar | tail -n +3 > system_load.txt grep -v Average system_load.txt > system_load.csv start=`head -n1 system_load.csv | cut -f1 -d' '` end=`tail -n1 system_load.csv | cut -f1 -d' '` cat < system_load.gnuplot # Gnuplot Config # set terminal pdf size 16,8 set output "system_load.pdf" set autoscale set title "System Load using $ZLIB_ACCELERATOR" set xdata time set timefmt "%H:%M:%S" set xlabel "Time" set xrange ["$start":"$end"] set ylabel "CPU Utilization" # Set yrange small, we use only one CPU using 100.00 is not useful here set yrange ["0.00":"5.00"] set style data lines set grid # set datafile separator " " plot "system_load.csv" using 1:4 title "%user", '' using 1:6 title "%system", '' using 1:9 title "%idle" EOF # Instructing gnuplot to generate a png with out CPU load statistics cat system_load.gnuplot | gnuplot # Safe it under an accelerator unique name mv system_load.pdf system_load.${ZLIB_ACCELERATOR}.pdf } while getopts "19A:C:t:klvh" opt; do case $opt in A) accelerator=$OPTARG; ;; C) card=$OPTARG; ;; t) test_data=${OPTARG}; ;; 1) options+=" -1" ;; 9) options+=" -9" ;; l) logging=1; ;; k) keep=1; ;; b) verbose=1; ;; h) usage; exit 0; ;; t) tools_dir=$OPTARG; ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; esac done if [ ${accelerator} = SW ]; then options+=" -s" fi ZLIB_ACCELERATOR=${accelerator} # Generate core dumps, in case something needs debug ulimit -c unlimited echo "ACCELERATOR: ${accelerator}" if [ ! -f ${test_data} ]; then echo "Testdata: ${test_data} missing!" echo "Use -t to specify alternate file." exit 1 fi for s in ${sizes} ; do dd if=${test_data} of=${s}KiB.bin bs=1024 count=${s} 2> /dev/null done if [ $logging -eq 1 ]; then system_load_logging_start fi echo "KiB ; sec ; sec" for s in ${sizes} ; do if [ ${keep} -eq 1 ]; then outfile=${s}KiB.orig else outfile=/dev/null fi sync (time genwqe_gzip -A ${accelerator} ${options} -B ${card} -c \ ${s}KiB.bin > ${s}KiB.gz) 2> out.stderr if [ $? -ne 0 ]; then echo "Compression failed ..." exit 1 fi c=`grep real out.stderr | cut -f2 -d'm' | cut -d's' -f1`; if [ $verbose -eq 1 ]; then cat out.stderr fi sync (time genwqe_gunzip -A ${accelerator} -o512KiB ${options} -B ${card} \ -c ${s}KiB.gz > ${outfile}) 2> out.stderr if [ $? -ne 0 ]; then echo "Decompression failed ..." exit 1 fi d=`grep real out.stderr | cut -f2 -d'm' | cut -d's' -f1`; if [ $verbose -eq 1 ]; then cat out.stderr fi echo "${s} ; ${c} ; ${d}" rm -f ${s}KiB.orig ${s}KiB.gz out.stderr done if [ $logging -eq 1 ]; then system_load_logging_stop fi erase_files exit 0 genwqe-user-4.0.18/tools/genwqe_find_card000077500000000000000000000056001303345043000204070ustar00rootroot00000000000000#!/bin/bash # # Copyright 2015, 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Figure out GenWQE cards in the system. We have GENWQE/PCIe GZIP cards # and CAPI GZIP cards. This script should help automatic testing of # any available cards in one system. It should print out a list of # available cards of each type. # export accel=UNKNOWN # Parse any options given on the command line while getopts "A:C:t:PvVhl" opt; do case ${opt} in A) accel=${OPTARG}; ;; V) echo "${version}" exit 0; ;; h) usage; exit 0; ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 exit 1; ;; esac done # Print usage message helper function function usage() { echo "Usage of $PROGRAM:" echo " [-A] use either GENWQE for the PCIe " echo " and CAPI for CAPI based solution available " echo " only on System p" } # # We need to take into account that there might be other CAPI cards # in our system. Therefore we check the psl_revision, which identifies # the card hardware and the device id in the configuration record cr0 # which is the unique id the card has. The combination of both, should # be more or less bullet prove. # function detect_capi_cards() { # We have MAX 4 CAPI cards in one system for card in `seq 0 3` ; do if [ ! -d /sys/class/cxl/card${card} ]; then continue fi psl_revision=`cat /sys/class/cxl/card${card}/psl_revision` if [ $psl_revision != 1 ] ; then continue fi device=`cat /sys/class/cxl/card${card}/afu${card}.0/cr0/device` if [ $device = 0x0602 ]; then echo -n "${card} " fi done } # # GenWQE cards are unique, so there is not much complexity here. The # only thing to consider is that we can have up to 16 PCI functions, # one physical and 15 virtual ones for one card. Those appear as # virtual cards, and that is what we list here. If one likes to see # only the PFs or the VFs, some more ifs are needed, but it should be # doable too. Here there is currently no need for this. # function detect_genwqe_cards() { # We can have a lot of genwqe cards/functions in one system for card in `seq 0 127` ; do if [ ! -d /sys/class/genwqe/genwqe${card}_card ]; then continue fi echo -n "${card} " done } case ${accel} in # Set one or the other, but not miss to set the accelerator "UNKNOWN") usage exit 1 ;; "CAPI") detect_capi_cards ;; "GENWQE") detect_genwqe_cards ;; esac genwqe-user-4.0.18/tools/genwqe_gzip.c000066400000000000000000000630161303345043000176720ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * zpipe.c: example of proper use of zlib's inflate() and deflate() * Not copyrighted -- provided to the public domain * Version 1.4 11 December 2005 Mark Adler */ /* * EDC version of gzip. Experimental tool to show the advantages of * hardware compression/decompression. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SET_BINARY_MODE(file) /* FIXME Fake this for old RHEL versions e.g. RHEL5.6 */ #ifndef CPU_ALLOC #define CPU_ALLOC(cpus) ({ void *ptr = NULL; ptr; }) #define CPU_ALLOC_SIZE(cpus) ({ int val = 0; val; }) #define CPU_ISSET_S(cpu, size, cpusetp) ({ int val = 0; val; }) #define CPU_FREE(cpusetp) #define CPU_ZERO_S(size, cpusetp) #define CPU_SET_S(run_cpu, size, cpusetp) #define sched_getcpu() ({ int val = 0; val; }) #define sched_setaffinity(x, size, cpusetp) ({ int val = 0; val; }) #endif /** common error printf */ #define pr_err(fmt, ...) do { \ fprintf(stderr, "gzip: " fmt, ## __VA_ARGS__); \ } while (0) static const char *version = GIT_VERSION; /** * Common tool return codes * 0: EX_OK/EXIT_SUCCESS * 1: Catchall for general errors/EXIT_FAILURE * 2: Misuse of shell builtins (according to Bash documentation) * 64..78: predefined in sysexits.h * * 79..128: Exit codes for our applications * * 126: Command invoked cannot execute * 127: "command not found" * 128: Invalid argument to exit * 128+n: Fatal error signal "n" * 255: Exit status out of range (exit takes only integer args in the * range 0 - 255) */ #define EX_ERRNO 79 /* libc problem */ #define EX_MEMORY 80 /* mem alloc failed */ #define EX_ERR_DATA 81 /* data not as expected */ #define EX_ERR_CRC 82 /* CRC wrong */ #define EX_ERR_ADLER 83 /* Adler checksum wrong */ #define EX_ERR_CARD 84 /* accelerator problem */ #define EX_COMPRESS 85 /* compression did not work */ #define EX_DECOMPRESS 86 /* decompression failed */ #define EX_ERR_DICT 87 /* dictionary compare failed */ static int verbose = 0; /* Default Buffer Size */ static unsigned int CHUNK_i = 128 * 1024; /* 128 KiB; */ static unsigned int CHUNK_o = 128 * 1024; /* 128 KiB; */ /** * Try to ping process to a specific CPU. Returns the CPU we are * currently running on. */ static int pin_to_cpu(int run_cpu) { cpu_set_t *cpusetp; size_t size; int num_cpus; num_cpus = CPU_SETSIZE; /* take default, currently 1024 */ cpusetp = CPU_ALLOC(num_cpus); if (cpusetp == NULL) return sched_getcpu(); size = CPU_ALLOC_SIZE(num_cpus); CPU_ZERO_S(size, cpusetp); CPU_SET_S(run_cpu, size, cpusetp); if (sched_setaffinity(0, size, cpusetp) < 0) { CPU_FREE(cpusetp); return sched_getcpu(); } /* figure out on which cpus we actually run */ CPU_FREE(cpusetp); return run_cpu; } /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int def(FILE *source, FILE *dest, z_stream *strm, unsigned char *in, unsigned char *out) { int ret, flush; unsigned have; /* compress until end of file */ do { strm->avail_in = fread(in, 1, CHUNK_i, source); if (ferror(source)) { return Z_ERRNO; } flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; strm->next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { if (verbose) fprintf(stderr, "CHUNK_o=%d\n", CHUNK_o); strm->avail_out = CHUNK_o; strm->next_out = out; ret = deflate(strm, flush); /* no bad ret value */ assert(ret != Z_STREAM_ERROR); /* not clobbered */ have = CHUNK_o - strm->avail_out; if (fwrite(out, 1, have, dest) != have || ferror(dest)) { return Z_ERRNO; } } while (strm->avail_out == 0); assert(strm->avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH); assert(ret == Z_STREAM_END); /* stream will be complete */ return Z_OK; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int inf(FILE *source, FILE *dest, z_stream *strm, unsigned char *in, unsigned char *out) { int ret = Z_OK; int rc; long start_offs; long read_offs = 0; long have; strm->avail_in = 0; strm->next_in = Z_NULL; start_offs = ftell(source); read_offs = 0; /* decompress until deflate stream ends or end of file */ do { strm->avail_in = fread(in, 1, CHUNK_i, source); if (ferror(source)) { fprintf(stderr, "fread error\n"); return Z_ERRNO; } if (0 == strm->avail_in) break; strm->next_in = in; __more_inf: /* run inflate() on input until output buffer not full */ do { strm->avail_out = CHUNK_o; strm->next_out = out; ret = inflate(strm, Z_NO_FLUSH /* Z_SYNC_FLUSH */); assert(ret != Z_STREAM_ERROR); /* not clobbered */ switch (ret) { case Z_OK: /* Need to continue with Read more data */ break; case Z_STREAM_END: read_offs += strm->total_in; break; case Z_NEED_DICT: fprintf(stderr, "NEED Dict........\n"); ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: fprintf(stderr, "Fault..... %d\n", ret); return ret; } have = CHUNK_o - strm->avail_out; if (fwrite(out, 1, have, dest) != (size_t)have || ferror(dest)) { fprintf(stderr, "fwrite fault\n"); return Z_ERRNO; } } while (strm->avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); /* FIXME: this goto and the limit check is not nice. */ if (strm->avail_in > (16 * 1024)) { inflateReset(strm); /* reset and continue */ goto __more_inf; } /* Set the file position right after the absorbed input */ start_offs += read_offs; /* Add to seek offset */ rc = fseek(source, start_offs, SEEK_SET); if (rc == -1) fprintf(stderr, "err: fseek rc=%d\n", rc); inflateReset(strm); return ret; } /* report a zlib or i/o error */ static void zerr(int ret) { switch (ret) { case Z_ERRNO: if (ferror(stdin)) pr_err("error reading stdin\n"); if (ferror(stdout)) pr_err("error writing stdout\n"); break; case Z_STREAM_ERROR: pr_err("invalid compression level\n"); break; case Z_DATA_ERROR: pr_err("invalid or incomplete deflate data\n"); break; case Z_MEM_ERROR: pr_err("out of memory\n"); break; case Z_VERSION_ERROR: pr_err("zlib version mismatch!\n"); break; } } /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void userinfo(FILE *fp, char *prog, const char *version) { fprintf(fp, "%s %s\n(c) Copyright IBM Corp. 2015\n", basename(prog), version); } static void print_args(FILE *fp, int argc, char **argv) { int i; fprintf(fp, "Called with:\n"); for (i = 0; i < argc; i++) fprintf(fp, " ARGV[%d]: \"%s\"\n", i, argv[i]); } static void usage(FILE *fp, char *prog, int argc, char *argv[]) { fprintf(fp, "Usage: %s [OPTION]... [FILE]...\n" "Compress or uncompress FILEs (by default, compress FILES in-place).\n" "\n" "Mandatory arguments to long options are mandatory for short options too.\n" "\n" " -c, --stdout write on standard output, keep original files unchanged\n" " -d, --decompress decompress\n" " -f, --force force overwrite of output file and compress links\n" " -h, --help give this help\n" " -l, --list list compressed file contents\n" " -L, --license display software license\n" " -N, --name save or restore the original name and time stamp\n" " -q, --quiet suppress all warnings\n" " -S, --suffix=SUF use suffix SUF on compressed files\n" " -v, --verbose verbose mode\n" " -V, --version display version number\n" " -1, --fast compress faster\n" " -9, --best compress better\n" "\n" "Special options for testing and debugging:\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available for System p\n" " -B, --card= -1 is for automatic card selection\n" " -X, --cpu force to run on CPU \n" " -s, --software force to use software compression/decompression\n" " -i, --i_bufsize input buffer size (%d KiB)\n" " -o, --o_bufsize output buffer size (%d KiB)\n" " -N, --name=NAME write NAME into gzip header\n" " -C, --comment=CM write CM into gzip header\n" " -E, --extra=EXTRA write EXTRA (file) into gzip header\n" "\n" "With no FILE, or when FILE is -, read standard input.\n" "\n" "NOTE: Not all options are supported in this limited version!\n" "Suggestions or patches are welcome!\n" "\n" "Report bugs via https://github.com/ibm-genwqe/genwqe-user.\n" "\n", prog, CHUNK_i/1024, CHUNK_o/1024); print_args(fp, argc, argv); } static inline void hexdump(FILE *fp, const void *buff, unsigned int size) { unsigned int i; const uint8_t *b = (uint8_t *)buff; char ascii[17]; char str[2] = { 0x0, }; if (size == 0) return; for (i = 0; i < size; i++) { if ((i & 0x0f) == 0x00) { fprintf(fp, " %08x:", i); memset(ascii, 0, sizeof(ascii)); } fprintf(fp, " %02x", b[i]); str[0] = isalnum(b[i]) ? b[i] : '.'; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } /* print trailing up to a 16 byte boundary. */ for (; i < ((size + 0xf) & ~0xf); i++) { fprintf(fp, " "); str[0] = ' '; str[1] = '\0'; strncat(ascii, str, sizeof(ascii) - 1); if ((i & 0x0f) == 0x0f) fprintf(fp, " | %s\n", ascii); } fprintf(fp, "\n"); } static void do_print_gzip_hdr(gz_headerp head, FILE *fp) { fprintf(fp, "GZIP Header\n" " Text: %01X\n", head->text); fprintf(fp, " Time: %s", ctime((time_t*) &head->time)); fprintf(fp, " xflags: %08X\n", head->xflags); fprintf(fp, " OS: %01X (0x03 Linux per RFC1952)\n", head->os); fprintf(fp, " Extra Len: %d\n", head->extra_len); fprintf(fp, " Extra Max: %d\n", head->extra_max); hexdump(fp, head->extra, head->extra_len); fprintf(fp, " Name: %s\n", head->name ? (char *)head->name : ""); fprintf(fp, " Name Max: %d\n", head->name_max); fprintf(fp, " Comment: %s\n", head->comment ? (char *)head->comment : ""); fprintf(fp, " Comment Max: %d\n", head->comm_max); fprintf(fp, " Header CRC : %X\n", head->hcrc); fprintf(fp, " Done: %01X\n", head->done); } static inline ssize_t file_size(const char *fname) { int rc; struct stat s; rc = lstat(fname, &s); if (rc != 0) { fprintf(stderr, "err: Cannot find %s!\n", fname); return rc; } return s.st_size; } static inline ssize_t file_read(const char *fname, uint8_t *buff, size_t len) { int rc; FILE *fp; if ((fname == NULL) || (buff == NULL) || (len == 0)) return -EINVAL; fp = fopen(fname, "r"); if (!fp) { fprintf(stderr, "err: Cannot open file %s: %s\n", fname, strerror(errno)); return -ENODEV; } rc = fread(buff, len, 1, fp); if (rc == -1) { fprintf(stderr, "err: Cannot read from %s: %s\n", fname, strerror(errno)); fclose(fp); return -EIO; } fclose(fp); return rc; } static inline ssize_t file_write(const char *fname, const uint8_t *buff, size_t len) { int rc; FILE *fp; if ((fname == NULL) || (buff == NULL) || (len == 0)) return -EINVAL; fp = fopen(fname, "w+"); if (!fp) { fprintf(stderr, "err: Cannot open file %s: %s\n", fname, strerror(errno)); return -ENODEV; } rc = fwrite(buff, len, 1, fp); if (rc == -1) { fprintf(stderr, "err: Cannot write to %s: %s\n", fname, strerror(errno)); fclose(fp); return -EIO; } fclose(fp); return rc; } /** * FIXME Verbose mode missing yet. */ static int do_list_contents(FILE *fp, char *out_f, int list_contents) { int rc; struct stat st; uint32_t d, crc32, size, compressed_size; float ratio = 0.0; z_stream strm; uint8_t in[4096]; uint8_t out[4096]; gz_header head; uint8_t extra[64 * 1024]; uint8_t comment[1024]; uint8_t name[1024]; int window_bits = 31; /* GZIP */ const char *mon[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; rc = fstat(fileno(fp), &st); if (rc != 0) return rc; memset(&strm, 0, sizeof(strm)); strm.avail_in = 0; strm.next_in = Z_NULL; rc = inflateInit2(&strm, window_bits); if (Z_OK != rc) return rc; strm.next_out = out; strm.avail_out = sizeof(out); strm.next_in = in; strm.avail_in = fread(in, 1, sizeof(in), fp); if (ferror(fp)) return Z_ERRNO; head.extra = extra; head.extra_len = 0; head.extra_max = sizeof(extra); head.comment = comment; head.comm_max = sizeof(comment); head.name = name; head.name_max = sizeof(name); rc = inflateGetHeader(&strm, &head); if (Z_OK != rc) { fprintf(stderr, "err: Cannot read gz header! rc=%d\n", rc); return rc; } rc = inflate(&strm, Z_BLOCK); if (Z_OK != rc) { fprintf(stderr, "err: inflate(Z_BLOCK) failed rc=%d\n", rc); return rc; } if (head.done == 0) { fprintf(stderr, "err: gzip header not entirely decoded! " "total_in=%ld total_out=%ld head.done=%d\n", strm.total_in, strm.total_out, head.done); return Z_DATA_ERROR; } rc = fseek(fp, st.st_size - 2 * sizeof(uint32_t), SEEK_SET); if (rc != 0) return rc; rc = fread(&d, sizeof(d), 1, fp); if (rc != 1) return -1; crc32 = __le32_to_cpu(d); rc = fread(&d, sizeof(d), 1, fp); if (rc != 1) return -1; size = __le32_to_cpu(d); /* Compressed size is total file size reduced by gzip header size and 8 bytes for the gzip trailer. */ compressed_size = st.st_size - strm.total_in - 8; if (size) ratio = 100 - (float)compressed_size * 100 / size; if (!verbose) { fprintf(stderr, " compressed uncompressed ratio " "uncompressed_name\n" "%19lld %19lld %2.2f%% %s\n", (long long)st.st_size, (long long)size, ratio, out_f); } else { time_t t = time(NULL); struct tm *tm = localtime(&t); /* (const time_t *)&head.time */ fprintf(stderr, "method crc date time " "compressed uncompressed ratio " "uncompressed_name\n" "%s %x %s %2d %d:%d %19lld %19lld %2.2f%% %s\n", "defla", crc32, mon[tm->tm_mon], tm->tm_mday, tm->tm_hour, tm->tm_min, (long long)st.st_size, (long long)size, ratio, out_f); } if (list_contents > 1) do_print_gzip_hdr(&head, stderr); return 0; } static int strip_ending(char *oname, const char *iname, size_t n, const char *suffix) { char *ending; snprintf(oname, n, "%s", iname); /* create a copy */ ending = strstr(oname, suffix); /* find suffix ... */ if (ending == NULL) return -1; /* hey, suffix not found! */ ending--; *ending = 0; /* ... and strip suffix */ return 0; } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int rc = Z_OK; bool compress = true; int list_contents = 0; bool force = false; bool quiet __attribute__((unused)) = false; int window_bits = 31; /* GZIP */ int level = Z_DEFAULT_COMPRESSION; char *prog = basename(argv[0]); const char *in_f = NULL; char out_f[PATH_MAX]; FILE *i_fp = stdin; FILE *o_fp = NULL; const char *suffix = "gz"; int force_software = 0; int cpu = -1; unsigned char *in = NULL; unsigned char *out = NULL; z_stream strm; const char *name = NULL; char *comment = NULL; const char *extra_fname = NULL; uint8_t *extra = NULL; int extra_len = 0; struct stat s; const char *accel = "GENWQE"; const char *accel_env = getenv("ZLIB_ACCELERATOR"); int card_no = 0; const char *card_no_env = getenv("ZLIB_CARD"); /* Use environment variables as defaults. Command line options can than overrule this. */ if (accel_env != NULL) accel = accel_env; if (card_no_env != NULL) card_no = atoi(card_no_env); /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); if (strstr(prog, "gunzip") != 0) { compress = false; CHUNK_o *= 4; /* adjust default output buffer size to avoid memcpy */ } while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "stdout", no_argument, NULL, 'c' }, { "decompress", no_argument, NULL, 'd' }, { "force", no_argument, NULL, 'f' }, { "help", no_argument, NULL, 'h' }, /* list */ { "list", no_argument, NULL, 'l' }, { "license", no_argument, NULL, 'L' }, { "suffix", required_argument, NULL, 'S' }, { "verbose", no_argument, NULL, 'v' }, { "version", no_argument, NULL, 'V' }, { "fast", no_argument, NULL, '1' }, { "best", no_argument, NULL, '9' }, /* our own options */ { "cpu", required_argument, NULL, 'X' }, { "accelerator-type", required_argument, NULL, 'A' }, { "card_no", required_argument, NULL, 'B' }, { "software", no_argument, NULL, 's' }, { "extra", required_argument, NULL, 'E' }, { "name", required_argument, NULL, 'N' }, { "comment", required_argument, NULL, 'C' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "E:N:C:cdfqhlLsS:vV123456789?i:o:X:A:B:", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { case 'X': cpu = strtoul(optarg, NULL, 0); break; case 'A': accel = optarg; break; case 'B': card_no = strtol(optarg, (char **)NULL, 0); break; case 'E': extra_fname = optarg; break; case 'N': name = optarg; break; case 'C': comment = optarg; break; case 'd': compress = false; break; case 'f': force = true; break; case 'q': /* Currently does nothing, zless needs it */ quiet = true; break; case 'c': o_fp = stdout; break; case 'S': suffix = optarg; break; case 's': force_software = true; break; case 'l': list_contents++; break; case '1': level = Z_BEST_SPEED; break; case '2': level = 2; break; case '3': level = 3; break; case '4': level = 4; break; case '5': level = 5; break; case '6': level = 6; break; case '7': level = 7; break; case '8': level = 8; break; case '9': level = Z_BEST_COMPRESSION; break; case 'v': verbose++; break; case 'V': fprintf(stdout, "%s\n", version); exit(EXIT_SUCCESS); break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'o': CHUNK_o = str_to_num(optarg); break; case 'L': userinfo(stdout, prog, version); exit(EXIT_SUCCESS); break; case 'h': case '?': usage(stdout, prog, argc, argv); exit(EXIT_SUCCESS); break; } } if (cpu != -1) pin_to_cpu(cpu); if (force_software) { zlib_set_inflate_impl(ZLIB_SW_IMPL); zlib_set_deflate_impl(ZLIB_SW_IMPL); } else { zlib_set_accelerator(accel, card_no); zlib_set_inflate_impl(ZLIB_HW_IMPL); zlib_set_deflate_impl(ZLIB_HW_IMPL); } /* FIXME loop over this ... */ if (optind < argc) { /* input file */ in_f = argv[optind++]; i_fp = fopen(in_f, "r"); if (!i_fp) { pr_err("%s\n", strerror(errno)); print_args(stderr, argc, argv); exit(EX_ERRNO); } rc = lstat(in_f, &s); if ((rc == 0) && S_ISLNK(s.st_mode)) { pr_err("%s: Too many levels of symbolic links\n", in_f); exit(EXIT_FAILURE); } if (list_contents) { rc = strip_ending(out_f, in_f, PATH_MAX, suffix); if (rc < 0) { pr_err("No .%s file!\n", suffix); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } rc = do_list_contents(i_fp, out_f, list_contents); if (rc != 0) { pr_err("Unable to list contents.\n"); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } fclose(i_fp); exit(EXIT_SUCCESS); } } if (in_f == NULL) o_fp = stdout; /* should not be a terminal! */ if (o_fp == NULL) { if (compress) snprintf(out_f, PATH_MAX, "%s.%s", in_f, suffix); else { rc = strip_ending(out_f, in_f, PATH_MAX, suffix); if (rc < 0) { pr_err("No .%s file!\n", suffix); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } } rc = stat(out_f, &s); if (!force && (rc == 0)) { pr_err("File %s already exists!\n", out_f); print_args(stderr, argc, argv); exit(EX_ERRNO); } o_fp = fopen(out_f, "w+"); if (!o_fp) { pr_err("Cannot open output file %s: %s\n", out_f, strerror(errno)); print_args(stderr, argc, argv); exit(EX_ERRNO); } /* get mode settings for existing file and ... */ rc = fstat(fileno(i_fp), &s); if (rc == 0) { rc = fchmod(fileno(o_fp), s.st_mode); if (rc != 0) { pr_err("Cannot set mode %s: %s\n", out_f, strerror(errno)); exit(EX_ERRNO); } } else /* else ignore ... */ pr_err("Cannot set mode %s: %s\n", out_f, strerror(errno)); /* If output does not go to stdout and a filename is given, set it */ if (name == NULL) name = in_f; } if (isatty(fileno(o_fp))) { pr_err("Output must not be a terminal!\n"); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } if (optind != argc) { /* now it must fit */ usage(stderr, prog, argc, argv); exit(EXIT_FAILURE); } in = malloc(CHUNK_i); /* This is the bigger Buffer by default */ if (NULL == in) { pr_err("%s\n", strerror(errno)); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } out = malloc(CHUNK_o); /* This is the smaller Buffer by default */ if (NULL == out) { pr_err("%s\n", strerror(errno)); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } /* allocate inflate state */ memset(&strm, 0, sizeof(strm)); strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; if (compress) { gz_header head; struct timeval tv; if (extra_fname) { extra_len = file_size(extra_fname); if (extra_len <= 0) { rc = extra_len; goto err_out; } extra = malloc(extra_len); if (extra == NULL) { rc = -ENOMEM; goto err_out; } rc = file_read(extra_fname, extra, extra_len); if (rc != 1) { fprintf(stderr, "err: Unable to read extra " "data rc=%d\n", rc); free(extra); goto err_out; } hexdump(stderr, extra, extra_len); } /* --------------- DEFALTE ----------------- */ rc = deflateInit2(&strm, level, Z_DEFLATED, window_bits, 8, Z_DEFAULT_STRATEGY); if (Z_OK != rc) goto err_out; memset(&head, 0, sizeof(head)); gettimeofday(&tv, NULL); head.time = tv.tv_sec; head.os = 0x03; if (extra != NULL) { head.extra = extra; head.extra_len = extra_len; head.extra_max = extra_len; } if (comment != NULL) { head.comment = (Bytef *)comment; head.comm_max = strlen(comment) + 1; } if (name != NULL) { head.name = (Bytef *)name; head.name_max = strlen(name) + 1; } rc = deflateSetHeader(&strm, &head); if (Z_OK != rc) { fprintf(stderr, "err: Cannot set gz header! rc=%d\n", rc); deflateEnd(&strm); goto err_out; } if (verbose) { fprintf(stderr, "deflateBound() %lld bytes for %lld bytes input\n", (long long)deflateBound(&strm, CHUNK_i), (long long)CHUNK_i); fprintf(stderr, "compressBound() %lld bytes for %lld bytes input\n", (long long)compressBound(CHUNK_i), (long long)CHUNK_i); } /* do compression if no arguments */ rc = def(i_fp, o_fp, &strm, in, out); if (Z_OK != rc) zerr(rc); if (extra != NULL) free(extra); deflateEnd(&strm); } else { /* --------------- INFALTE ----------------- */ strm.avail_in = 0; strm.next_in = Z_NULL; rc = inflateInit2(&strm, window_bits); if (Z_OK != rc) goto err_out; do { rc = inf(i_fp, o_fp, &strm, in, out); if (Z_STREAM_END != rc) { zerr(rc); break; } } while (!feof(i_fp) && !ferror(i_fp)); inflateEnd(&strm); } err_out: /* Delete the input file, only if input is not stdin and if output is not stdout */ if ((rc == EXIT_SUCCESS) && (i_fp != stdin) && (o_fp != stdout)) { rc = unlink(in_f); if (rc != 0) { pr_err("%s\n", strerror(errno)); print_args(stderr, argc, argv); exit(EXIT_FAILURE); } } fclose(i_fp); fclose(o_fp); free(in); free(out); exit(rc); } genwqe-user-4.0.18/tools/genwqe_loadtree.c000066400000000000000000000133611303345043000205160ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Load a Text file as a new tree into the Capi Card */ #include #include #include #include #include #include #include "genwqe_tools.h" #include #define MAX_LINE 512 static const char *version = GIT_VERSION; int verbose_flag = 0; static int do_mmio(accel_t card, uint32_t addr, uint64_t data) { int rc = 0; rc = accel_write_reg64(card, addr, (uint64_t)data); return rc; } static int check_app(accel_t card) { uint64_t data; int rc; /* Check Application Version for Version higher than 0403 */ /* Register 8 does have following layout for the 64 bits */ /* RRRRFFIINNNNNNNN */ /* RRRR == 16 bit Software Release (0404) */ /* FF == 8 bit Software Fix Level on card (01) */ /* II == 8 bit Software Interface ID (03) */ /* NNNNNNNN == 32 Bit Function (475a4950) = (GZIP) */ data = accel_read_reg64(card, 8, &rc); if (0 == rc) { data = data >> 32; if (0x03 == (data & 0xff)) { /* Check 16 bits Release */ data = data >> 16; if (data > 0x0402) return 0; } } return 1; } static void help(char *prog) { printf("Usage: %s [-CvhV] file\n" "\t-C, --card Card to use, default is 0\n" "\t-V, --version Print Version number for this tool\n" "\t-h, --help This help message\n" "\t-q, --quiet No output at all\n" "\t-v, --verbose verbose mode, up to -vvv\n", prog); return; } /** * Get command line parameters and create the output file. */ int main(int argc, char *argv[]) { bool quiet = false; accel_t card; int card_no = 0; int err_code; int ch; uint32_t addr; long long data; FILE *fp; char line[MAX_LINE]; int line_no, line_len, rc, mmio_done = 0; char *filename = NULL; rc = EXIT_SUCCESS; while (1) { int option_index = 0; static struct option long_options[] = { { "card", required_argument, NULL, 'C' }, { "version", no_argument, NULL, 'V' }, { "quiet", no_argument, NULL, 'q' }, { "help", no_argument, NULL, 'h' }, { "verbose", no_argument, NULL, 'v' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "C:Vqhv", long_options, &option_index); if (-1 == ch) break; switch (ch) { case 'C': card_no = strtol(optarg, (char **)NULL, 0); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); break; case 'q': quiet = true; break; case 'h': help(argv[0]); exit(EXIT_SUCCESS); break; case 'v': verbose_flag++; break; default: help(argv[0]); exit(EXIT_FAILURE); } } if (optind < argc) { filename = argv[optind++]; fp = fopen(filename, "r"); if (NULL == fp) { printf("Err: Can not open: %s\n", filename); exit(EXIT_FAILURE); } } else { help(argv[0]); exit(EXIT_FAILURE); } if (!quiet) printf("Using Input Tree File: '%s'\n", filename); if (verbose_flag > 1) ddcb_debug(verbose_flag); if (!quiet && verbose_flag) printf("Open CAPI Card: %d\n", card_no); card = accel_open(card_no, DDCB_TYPE_CAPI, DDCB_MODE_WR, &err_code, 0, DDCB_APPL_ID_IGNORE); if (NULL == card) { printf("Err: failed to open CAPI Card: %u " "(%d / %s)\n", card_no, err_code, accel_strerror(card, err_code)); printf("\tcheck Permissions in /dev/cxl or kernel log\n"); fclose(fp); exit(EXIT_FAILURE); } if (check_app(card)) { printf("Err: Wrong Card Appl ID. Need to have > 0403\n"); rc = EXIT_FAILURE; goto exit_close; } line_no = 0; mmio_done = 0; while (fgets(line, MAX_LINE, fp) != NULL ) { line_no++; line_len = (int)strlen(line); if (30 != line_len) { if (!quiet && verbose_flag) printf("Skip Line [%d] Invalid Len: %d\n", line_no, line_len); continue; } line[line_len - 1] = '\0'; /* remove newline character */ if (!quiet && verbose_flag) printf("Read Line [%d] <%s>\n", line_no, line); /* only use lines that start with "0x" */ if ( tolower(line[0]) != '0') { if (!quiet && verbose_flag) printf("Skip Line [#%d] <%s>\n", line_no, line); continue; } /* e.g. 0x00002100 0x0E0000000008000000 */ rc = sscanf(&line[0], "0x%x", &addr); if (1 != rc) { printf("Err: Wrong Addr in Line [#%d]\n", line_no); continue; } if (0x00002100 != (addr & 0xff00)) { printf("Err: %08x Wrong MMIO Addr in Line [%d]\n", addr, line_no); continue; } rc = sscanf(&line[11], "0x%llx", &data); if (1 != rc) { printf("Err: Wrong Data in Line [#%d]\n", line_no); continue; } if (!quiet && verbose_flag) printf("MMIO Write Addr: %08x Data: %016llx\n", addr, data); rc = do_mmio(card, addr, (uint64_t)data); if (0 != rc) { printf("Err: MMIO Write Error Addr: %08x Data: %016llx " "at line [%d]\n", addr, data, line_no); break; } mmio_done++; } exit_close: if (!quiet && verbose_flag) printf("Close Capi Card: %d\n", card_no); accel_close(card); if (!quiet && verbose_flag) printf("Close File: %s\n", filename); fclose(fp); if (!quiet) printf("%s Exit wth Rc: %d (%d MMIO Writes done)\n", argv[0], rc, mmio_done); exit(rc); } genwqe-user-4.0.18/tools/genwqe_maint.c000066400000000000000000000432051303345043000200270ustar00rootroot00000000000000/* * Copyright 2015, 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Genwqe Capi Card Master Maintenance tool. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "afu_regs.h" /* * This needs to be in sync with the max number of cards supported in * ddcb_capi.c. */ #define NUM_CARDS 4 /* max number of CAPI cards in system */ static const char *version = GIT_VERSION; static int verbose = 0; static FILE *fd_out; #define VERBOSE0(fmt, ...) do { \ fprintf(fd_out, fmt, ## __VA_ARGS__); \ } while (0) #define VERBOSE1(fmt, ...) do { \ if (verbose > 0) \ fprintf(fd_out, fmt, ## __VA_ARGS__); \ } while (0) #define VERBOSE2(fmt, ...) do { \ if (verbose > 1) \ fprintf(fd_out, fmt, ## __VA_ARGS__); \ } while (0) #define VERBOSE3(fmt, ...) do { \ if (verbose > 2) \ fprintf(fd_out, fmt, ## __VA_ARGS__); \ } while (0) struct mdev_ctx { int loop; /* Loop Counter */ int card; /* Card no (0,1,2,3 */ struct cxl_afu_h *afu_h;/* The AFU handle */ int dt; /* Delay time in sec (1 sec default) */ int count; /* Number of loops to do, (-1) = forever */ bool daemon; /* TRUE if forked */ uint64_t wed; /* This is a dummy only for attach */ bool quiet; /* False or true -q option */ pid_t pid; pid_t my_sid; /* for sid */ int mode; /* See below */ size_t errinfo_size; char *errinfo; uint64_t fir[MMIO_FIR_REGS_NUM]; }; struct cgzip_afu_fir { __be32 fir_val; __be32 fir_addr; }; /* Expect min this Release or higher */ #define MIN_REL_VERSION 0x0601 /* Mode Bits for Master Loop */ #define CHECK_FIRS_MODE 0x0001 /* Mode 1 */ #define CHECK_TIME_MODE 0x0002 /* Mode 2 */ static struct mdev_ctx master_ctx; #if 0 static int mmio_write(struct cxl_afu_h *afu_h, int ctx, uint32_t offset, uint64_t data) { int rc = -1; uint32_t offs = (ctx * MMIO_CTX_OFFSET) + offset; VERBOSE3("[%s] Enter, Offset: 0x%x data: 0x%016llx\n", __func__, offs, (long long)data); rc = cxl_mmio_write64(afu_h, offs, data); VERBOSE3("[%s] Exit, rc = %d\n", __func__, rc); return rc; } #endif static int mmio_read(struct cxl_afu_h *afu_h, int ctx, uint32_t offset, uint64_t *data) { int rc = -1; uint32_t offs = (ctx * MMIO_CTX_OFFSET) + offset; VERBOSE3("[%s] Enter, CTX: %d Offset: 0x%x\n", __func__, ctx, offs); rc = cxl_mmio_read64(afu_h, offs, data); VERBOSE3("[%s] Exit, rc = %d data: 0x%016llx\n", __func__, rc, (long long)*data); return rc; } /* * Open AFU Master Device */ static int afu_m_open(struct mdev_ctx *mctx) { int rc = 0; char device[64]; long api_version, cr_device, cr_vendor; sprintf(device, "/dev/cxl/afu%d.0m", mctx->card); VERBOSE3("[%s] Enter, Open Device: %s\n", __func__, device); mctx->afu_h = cxl_afu_open_dev(device); if (NULL == mctx->afu_h) { VERBOSE0("[%s] Exit, Card Open error rc: %d\n", __func__, rc); return -1; } /* Check if the compiled in API version is compatible with the one reported by the kernel driver */ rc = cxl_get_api_version_compatible(mctx->afu_h, &api_version); if ((rc != 0) || (api_version != CXL_KERNEL_API_VERSION)) { VERBOSE0(" [%s] ERR: incompatible API version: %ld/%d rc=%d\n", __func__, api_version, CXL_KERNEL_API_VERSION, rc); rc = -2; goto err_afu_free; } /* Check vendor id */ rc = cxl_get_cr_vendor(mctx->afu_h, 0, &cr_vendor); if ((rc != 0) || (cr_vendor != CGZIP_CR_VENDOR)) { VERBOSE0(" [%s] ERR: vendor_id: %ld/%d rc=%d\n", __func__, (unsigned long)cr_vendor, CGZIP_CR_VENDOR, rc); rc = -3; goto err_afu_free; } /* Check device id */ rc = cxl_get_cr_device(mctx->afu_h, 0, &cr_device); if ((rc != 0) || (cr_device != CGZIP_CR_DEVICE)) { VERBOSE0(" [%s] ERR: device_id: %ld/%d rc=%d\n", __func__, (unsigned long)cr_device, CGZIP_CR_VENDOR, rc); rc = -4; goto err_afu_free; } /* If we cannot get it, continue with warning ... */ mctx->errinfo = NULL; rc = cxl_errinfo_size(mctx->afu_h, &mctx->errinfo_size); if (0 == rc) { mctx->errinfo = malloc(mctx->errinfo_size); if (mctx->errinfo == NULL) { rc = -5; goto err_afu_free; } } else VERBOSE0(" [%s] WARN: Cannot retrieve errinfo size rc=%d\n", __func__, rc); rc = cxl_afu_attach(mctx->afu_h, (__u64)(unsigned long) (void *)mctx->wed); if (0 != rc) { rc = -6; goto err_free_errinfo; } rc = cxl_mmio_map(mctx->afu_h, CXL_MMIO_BIG_ENDIAN); if (rc != 0) { rc = -7; goto err_free_errinfo; } return 0; err_free_errinfo: if (mctx->errinfo) free(mctx->errinfo); mctx->errinfo = NULL; err_afu_free: cxl_afu_free(mctx->afu_h); mctx->afu_h = NULL; VERBOSE3("[%s] Exit rc=%d\n", __func__, rc); return rc; } static int afu_m_close(struct mdev_ctx *mctx) { VERBOSE3("[%s] Enter\n", __func__); if (NULL == mctx->afu_h) return -1; cxl_mmio_unmap(mctx->afu_h); cxl_afu_free(mctx->afu_h); mctx->afu_h = NULL; if (mctx->errinfo) free(mctx->errinfo); mctx->errinfo = NULL; VERBOSE3("[%s] Exit\n", __func__); return 0; } static int afu_check_stime(struct mdev_ctx *mctx) { int gsel, bsel = 0, ctx = 0; uint64_t gmask = 0, qstat_reg, err_reg, mstat_reg; uint64_t wtime; uint64_t cid_reg; int n_act = 0; uint64_t s_time = 0; char s[32]; for (gsel = 0; gsel < MMIO_CASV_REG_NUM; gsel++) { mmio_read(mctx->afu_h, MMIO_MASTER_CTX_NUMBER, MMIO_CASV_REG + (gsel*8), &gmask); if (0 == gmask) continue; /* No bit set, Skip */ for (bsel = 0; bsel < MMIO_CASV_REG_CTX; bsel++) { if (0 == (gmask & (1ull << bsel))) continue; /* Skip */ ctx = (gsel * MMIO_CASV_REG_CTX) + bsel; /* Active */ mmio_read(mctx->afu_h, ctx+1, MMIO_DDCBQ_STATUS_REG, &qstat_reg); if (0 == (qstat_reg & 0xffffffff00000000ull)) { VERBOSE3("AFU[%d:%03d] master skip\n", mctx->card, ctx); continue; /* Skip Master */ } mmio_read(mctx->afu_h, ctx+1, MMIO_DDCBQ_WT_REG, &wtime); wtime = wtime / 250; /* makes time in usec */ mmio_read(mctx->afu_h, ctx+1, MMIO_DDCBQ_CID_REG, &cid_reg); uint16_t cur_cid = (uint16_t)(cid_reg >> 16); /* Currect Context id */ uint16_t my_cid = (uint16_t)(cid_reg & 0xffff); /* My Context id */ mmio_read(mctx->afu_h, ctx+1, MMIO_DDCBQ_DMAE_REG, &err_reg); uint16_t cseq = (uint16_t)(qstat_reg >> 48ull); /* Currect sequence */ uint16_t lseq = (uint16_t)(qstat_reg >> 32ull); /* Last sequence */ uint8_t qidx = (uint8_t)(qstat_reg >> 24); /* Q Index */ uint16_t qnfe = (uint16_t)(qstat_reg >> 8); /* Context Non Fatal Error Bits */ uint8_t qstat = (uint8_t)(qstat_reg & 0xff); /* Context Status */ /* Generate W for Waiting, I for Idle and R for Running */ char flag = 'W'; /* Default Context is Waiting to get executed */ if ((lseq + 1 ) == cseq) flag = 'I'; /* Context is Idle, nothing to do */ else if (0x30 == qstat) /* if Bits 4 + 5 on ? */ flag = 'R'; /* Context is Running */ if (qnfe) { VERBOSE0("AFU[%d:%03d] ERR: CurrentCtx: %03d MyCtx: %03d CS: %04X LS: %04X ", mctx->card, ctx, cur_cid, my_cid, cseq, lseq); VERBOSE0("[%c] IDX: %02d QNFE: %04x QSTAT: %02x Time: %lld usec", flag, qidx, qnfe, qstat, (long long)wtime); if (0 != err_reg) VERBOSE0("DMA Err: 0x%016llx", (long long)err_reg); VERBOSE0("\n"); } else { VERBOSE0("AFU[%d:%03d] CurrentCtx: %03d MyCtx: %03d CS: %04X LS: %04X ", mctx->card, ctx, cur_cid, my_cid, cseq, lseq); VERBOSE0("[%c] IDX: %02d QNFE: %04x QSTAT: %02x Time: %lld usec", flag, qidx, qnfe, qstat, (long long)wtime); if (0 != err_reg) VERBOSE0("DMA Err: 0x%016llx", (long long)err_reg); VERBOSE0("\n"); } n_act++; s_time += wtime; } } if (n_act) { time_t result = time(NULL); struct tm * p = localtime(&result); strftime(s, 32, "%T", p); VERBOSE0("AFU[%d:XXX] at %s Running %d Active Contexts total %lld msec", mctx->card, s, n_act, (long long)s_time/1000); mmio_read(mctx->afu_h, MMIO_MASTER_CTX_NUMBER, MMIO_AFU_STATUS_REG, &mstat_reg); if (0 != mstat_reg) VERBOSE0(" Status: 0x%016llx", (long long)mstat_reg); VERBOSE0("\n"); } return mctx->dt; } static void afu_dump_mfirs(struct mdev_ctx *mctx) { unsigned int i; struct cgzip_afu_fir *fir; if (verbose > 3) { ddcb_hexdump(fd_out, mctx->errinfo, mctx->errinfo_size); return; } for (i = 0, fir = (struct cgzip_afu_fir *)mctx->errinfo; i < MMIO_FIR_REGS_NUM; i++) { VERBOSE0(" AFU[%d] FIR: %d: 0x%08x addr: 0x%08x " "mmio: 0x%016llx\n", mctx->card, i, be32toh(fir[i].fir_val), be32toh(fir[i].fir_addr), (long long)mctx->fir[i]); } } /* * Print FIRs only if they have changed. Always collect them. */ static int afu_check_mfirs(struct mdev_ctx *mctx) { int i; uint64_t data; uint32_t offs; bool changed = false; bool dead = false; long cr_device = 0; time_t t; int rc; for (i = 0; i < MMIO_FIR_REGS_NUM; i++) { offs = MMIO_FIR_REGS_BASE + i * 8; mmio_read(mctx->afu_h, MMIO_MASTER_CTX_NUMBER, offs, &data); if (data != mctx->fir[i]) changed = true; if (data == -1ull) dead = true; mctx->fir[i] = data; } if (changed) { t = time(NULL); VERBOSE0("%s", ctime(&t)); /* Always print this ... */ cxl_get_cr_device(mctx->afu_h, 0, &cr_device); VERBOSE0(" cr_device: 0x%04lx\n", (unsigned long)cr_device); if (mctx->errinfo) { rc = cxl_errinfo_read(mctx->afu_h, mctx->errinfo, 0, mctx->errinfo_size); if (rc != (int)mctx->errinfo_size) { VERBOSE0(" cxl_err_info_read returned %d!\n", rc); } afu_dump_mfirs(mctx); } if (dead) { t = time(NULL); VERBOSE0("%s AFU[%d] card is dead.\n", ctime(&t), mctx->card); } } return mctx->dt; } /* Return true if card Software Release is OK */ static bool check_app(struct mdev_ctx *mctx, uint16_t min_rel) { int rc; uint64_t data; /* Get MMIO_APP_VERSION_REG */ rc = mmio_read(mctx->afu_h, MMIO_MASTER_CTX_NUMBER, MMIO_APP_VERSION_REG, &data); if (0 != rc) return false; /* Check Application Version for Version higher than 0403 */ /* Register 8 does have following layout for the 64 bits */ /* RRRRFFIINNNNNNNN */ /* RRRR == 16 bit Software Release (0404) */ /* FF == 8 bit Software Fix Level on card (01) */ /* II == 8 bit Software Interface ID (03) */ /* NNNNNNNN == 32 Bit Function (475a4950) = (GZIP) */ if (0x475a4950 != (data & 0xffffffff)) return false; data = data >> 32; /* RRRRFFII */ if (0x03 == (data & 0xff)) { /* Check II */ data = data >> 16; /* Check RRRR */ if ((uint16_t)data >= min_rel) /* need >= min_rel */ return true; } return false; } static int do_master(struct mdev_ctx *mctx) { int dt = mctx->dt; mctx->loop++; VERBOSE2("AFU[%d:XXX] Loop: %d Delay: %d sec mode: 0x%x left: %d\n", mctx->card, mctx->loop, mctx->dt, mctx->mode, mctx->count); if (CHECK_FIRS_MODE == (CHECK_FIRS_MODE & mctx->mode)) dt = afu_check_mfirs(mctx); if (CHECK_TIME_MODE == (CHECK_TIME_MODE & mctx->mode)) dt = afu_check_stime(mctx); return dt; } static void sig_handler(int sig) { struct mdev_ctx *mctx = &master_ctx; VERBOSE0("Sig Handler Signal: %d SID: %d\n", sig, mctx->my_sid); afu_m_close(mctx); fflush(fd_out); fclose(fd_out); exit(EXIT_SUCCESS); } static void help(char *prog) { printf("Usage: %s [-CvhVd] [-f file] [-c count] [-i delay]\n" "\t-C, --card Card to use (default 0)\n" "\t-V, --version \tPrint Version number\n" "\t-h, --help This help message\n" "\t-q, --quiet No output at all\n" "\t-v, --verbose \tverbose mode, up to -vvv\n" "\t-c, --count Loops to run (-1 = forever)\n" "\t-i, --interval Interval time in sec (default 1 sec)\n" "\t-d, --daemon Start in Daemon process (background)\n" "\t-m, --mode Mode:\n" "\t 1 = Check Master Firs\n" "\t 2 = Report Context Details\n" "\t-f, --log-file Log File name when running in -d " "(daemon)\n" "\n" "Figure out how many card resets are allowed within an hour:\n" " sudo cat /sys/kernel/debug/powerpc/eeh_max_freezes\n" "\n" "Set this to a higher value with:\n" " sudo sh -c 'echo 10000 > /sys/kernel/debug/powerpc/eeh_max_freezes'\n" "\n" "Manually resetting a card:\n" " sudo sh -c 'echo 1 > /sys/class/cxl/card0/reset'\n" "\n", prog); } /** * Get command line parameters and create the output file. */ int main(int argc, char *argv[]) { int rc = EXIT_SUCCESS; int ch; unsigned int i; char *log_file = NULL; struct mdev_ctx *mctx = &master_ctx; int dt; int mode; fd_out = stdout; /* Default */ mctx->afu_h = NULL; /* No handle */ mctx->loop = 0; /* Start Loop Counter */ mctx->quiet = false; /* Default */ mctx->dt = 1; /* Default, 1 sec delay time */ mctx->count = -1; /* Default, run forever */ mctx->card = 0; /* Default, Card 0 */ mctx->mode = 0; /* Default, nothing to watch */ mctx->daemon = false; /* Not in Daemon mode */ for (i = 0; i < MMIO_FIR_REGS_NUM; i++) mctx->fir[i] = -1; rc = EXIT_SUCCESS; while (1) { int option_index = 0; static struct option long_options[] = { { "card", required_argument, NULL, 'C' }, { "version", no_argument, NULL, 'V' }, { "quiet", no_argument, NULL, 'q' }, { "help", no_argument, NULL, 'h' }, { "verbose", no_argument, NULL, 'v' }, { "count", required_argument, NULL, 'c' }, { "interval", required_argument, NULL, 'i' }, { "daemon", no_argument, NULL, 'd' }, { "log-file", required_argument, NULL, 'f' }, { "mode", required_argument, NULL, 'm' }, { 0, 0, NULL, 0 } }; ch = getopt_long(argc, argv, "C:f:c:i:m:Vqhvd", long_options, &option_index); if (-1 == ch) break; switch (ch) { case 'C': /* --card */ mctx->card = strtol(optarg, (char **)NULL, 0); break; case 'V': /* --version */ printf("%s\n", version); exit(EXIT_SUCCESS); break; case 'q': /* --quiet */ mctx->quiet = true; break; case 'h': /* --help */ help(argv[0]); exit(EXIT_SUCCESS); break; case 'v': /* --verbose */ verbose++; break; case 'c': /* --count */ mctx->count = strtoul(optarg, NULL, 0); if (0 == mctx->count) mctx->count = 1; break; case 'i': /* --interval */ mctx->dt = strtoul(optarg, NULL, 0); break; case 'd': /* --daemon */ mctx->daemon = true; break; case 'm': /* --mode */ mode = strtoul(optarg, NULL, 0); switch (mode) { case 1: mctx->mode |= CHECK_FIRS_MODE; break; case 2: mctx->mode |= CHECK_TIME_MODE; break; default: fprintf(stderr, "Please provide correct " "Mode Option (1..2)\n"); exit(EXIT_FAILURE); } break; case 'f': /* --log-file */ log_file = optarg; break; default: help(argv[0]); exit(EXIT_FAILURE); } } if ((mctx->card < 0) || (mctx->card >= NUM_CARDS)) { fprintf(stderr, "Err: %d for option -C is invalid, please provide " "0..%d!\n", mctx->card, NUM_CARDS-1); exit(EXIT_FAILURE); } if (mctx->daemon) { if (NULL == log_file) { fprintf(stderr, "Please Provide log file name (-f) " "if running in daemon mode !\n"); exit(EXIT_FAILURE); } } if (log_file) { fd_out = fopen(log_file, "w+"); if (NULL == fd_out) { fprintf(stderr, "Can not create/append to file %s\n", log_file); exit(EXIT_FAILURE); } } signal(SIGCHLD,SIG_IGN); /* ignore child */ signal(SIGTSTP,SIG_IGN); /* ignore tty signals */ signal(SIGTTOU,SIG_IGN); signal(SIGTTIN,SIG_IGN); signal(SIGHUP,sig_handler); /* catch -1 hangup signal */ signal(SIGINT, sig_handler); /* Catch -2 */ signal(SIGTERM,sig_handler); /* catch -15 kill signal */ if (mctx->daemon) { mctx->pid = fork(); if (mctx->pid < 0) { printf("Fork() failed\n"); exit(EXIT_FAILURE); } if (mctx->pid > 0) { printf("Child Pid is %d Parent exit here\n", mctx->pid); exit(EXIT_SUCCESS); } if (chdir("/")) { fprintf(stderr, "Can not chdir to / !!!\n"); exit(EXIT_FAILURE); } umask(0); /* set new session */ mctx->my_sid = setsid(); printf("Child sid: %d from pid: %d\n", mctx->my_sid, mctx->pid); if(mctx->my_sid < 0) exit(EXIT_FAILURE); close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); } rc = cxl_mmio_install_sigbus_handler(); if (rc != 0) { VERBOSE0("Err: Install cxl sigbus_handler rc=%d\n", rc); exit(EXIT_FAILURE); } if (0 != afu_m_open(mctx)) { VERBOSE0("Err: failed to open Master Context for " "CAPI Card: %u\n" "\tCheck existence/permissions of /dev/cxl/* or see " "kernel logfile.\n" "\terrno=%d %s\n", mctx->card, errno, strerror(errno)); exit(EXIT_FAILURE); } if (false == check_app(mctx, MIN_REL_VERSION)) { VERBOSE0("Err: Wrong Card Release. Need >= 0x%02x\n", MIN_REL_VERSION); afu_m_close(mctx); exit(EXIT_FAILURE); } while (1) { dt = do_master(mctx); /* Process */ if (dt) sleep(dt); /* Sleep Remaining time */ if (-1 == mctx->count) continue; /* Run Forever */ mctx->count--; /* Decrement Runs */ if (0 == mctx->count) break; /* Exit */ } if (!mctx->quiet && verbose) VERBOSE0("[%s] AFU[%d] after %d loops\n", __func__, mctx->card, mctx->loop); afu_m_close(mctx); fflush(fd_out); fclose(fd_out); exit(rc); } genwqe-user-4.0.18/tools/genwqe_memcopy.c000066400000000000000000000700411303345043000203660ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libddcb.h" #include "genwqe_tools.h" #include "force_cpu.h" #include "memcopy_ddcb.h" /* Error injection bitmask */ #define ERR_INJ_NONE 0x0 #define ERR_INJ_INPUT 0x1 #define ERR_INJ_OUTPUT 0x2 #define ERR_INJ_SIZE 0x4 #define ERR_INJ_DDCB 0x8 static const char *version = GIT_VERSION; int verbose_flag = 0; #define VERBOSE0(...) do { \ fprintf(stderr, __VA_ARGS__); \ } while (0) #define VERBOSE1(...) do { \ if (verbose_flag > 0) \ fprintf(stderr, __VA_ARGS__); \ } while (0) #define VERBOSE2(...) do { \ if (verbose_flag > 1) \ fprintf(stderr, __VA_ARGS__); \ } while (0) #define VERBOSE3(...) do { \ if (verbose_flag > 3) \ fprintf(stderr, __VA_ARGS__); \ } while (0) #define EVERBOSE(...) do { \ fprintf(stderr, __VA_ARGS__); \ } while (0) struct memcpy_in_parms { int card_no; /* Card 0 default, changed with -C option */ int card_type; /* card type 0 def, changed with -A option */ int mode; /* Change with -n option */ bool quiet; /* quiet=false default, changed with -q opt */ int cpu; /* -1 default, changed with - -C option */ int count; /* 1 default, change with -c option */ bool force_cmp; /* default false, Change with -F option */ int use_sglist; /* 0 default, change with -g option */ int preload; /* 1 default, chane with -l option */ int threads; /* 1 default, change with -t option */ FILE *o_fp; /* Output File pointer */ FILE *fpattern; /* pattern input file pointer */ uint64_t in_ats_type; /* ATS_TYPE_FLAT_RDWR or ATS_TYPE_SGL_RDWR */ unsigned int page_size; int data_buf_size; /* 4k default, changed with -s option */ unsigned int pgoffs_i; /* offset in the 4k Aligned input buffer */ unsigned int pgoffs_o; /* offset in the 4k Aligned output buffer */ uint32_t mcpy_crc32; /* my value to compare */ uint32_t mcpy_adler32; /* my value to compare */ int have_threads; struct timespec stime; /* Start time */ struct timespec etime; /* End time */ unsigned int err_inj; /* error injection while running DDCBs */ }; struct memcpy_thread_data { int thread; pthread_t tid; accel_t accel; uint8_t *ibuf4k; /* 4 K aligned buffer */ uint8_t *ibuf; /* the 4k aligned buffer + pgoffs_i */ struct memcpy_in_parms *ip; uint64_t out_ats_type; /* ATS_TYPE_FLAT_RDWR or ATS_TYPE_SGL_RDWR */ int err; /* Return code from Thread */ int errors; /* Return data */ int memcopies; long long bytes_copied; /* Return data */ uint64_t total_usec; /* Return time in usec */ struct timespec stime; /* Thread Start time */ struct timespec etime; /* Thread End time */ }; static void *__memcpy_thread(void *data); /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Usage: %s\n" " -h, --help print usage information\n" " -v, --verbose verbose mode\n" " -C, --card use this card for operation\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available " "for System p\n" " -V, --version\n" " -q, --quiet quiece output\n" " -c, --count do multiple memcopies\n" " -l, --preload preload multiple ddcb's. " "(default 1, only for CAPI Card)\n" " -X, --cpu only run on this CPU\n" " -D, --debug create debug data on failure\n" " -G, --use-sglist use the scatter gather list\n" " -n, --nonblocking use nonblcoking behavior\n" " -p, --patternfile ]\n" " -s, --bufsize default is 4KiB\n" " -i, --pgoffs_i byte offset for input buffer\n" " -o, --pgoffs_o byte offset for output buffer\n" " -F, --force-compare \n" " -t, --threads run threads, default is 1\n" " -Y, --inject-error IN:0x1, OUT:0x2, SIZE:0x4, DDCB:0x8\n" "\n" "This utility sends memcopy DDCBs to the application\n" "chip unit. It can be used to check the cards health and/or\n" "to produce stress on the card to verify its correct\n" "function.\n" "\n" "Example:\n" " dd if=/dev/urandom bs=4096 count=1024 of=input_data.bin\n" " %s -C0 -F -D --patternfile input_data.bin output_data.bin\n" " echo $?\n" " diff input_data.bin output_data.bin\n" " echo $?\n" "\n", prog, prog); } /** * str_to_num - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; else { pr_err("--size or -s out of range, use KiB/MiB or GiB only\n"); num = ULLONG_MAX; errno = ERANGE; exit(EXIT_FAILURE); } return num; } static void INT_handler(int sig); static bool stop_memcopying = false; static void INT_handler(int sig) { signal(sig, SIG_IGN); stop_memcopying = true; /* signal(SIGINT, INT_handler); *//* Try again */ } static void __hexdump(uint8_t *buff, unsigned int size, unsigned int offs) { unsigned int i; const uint8_t *b = (uint8_t *)buff; for (i = 0; i < size; i++) { if ((i & 0x0f) == 0x00) EVERBOSE(" %08x: ", offs + i); EVERBOSE(" %02x", b[i]); if ((i & 0x0f) == 0x0f) EVERBOSE("\n"); } EVERBOSE("\n"); } static uint64_t tdiff_us(struct timespec *et, struct timespec *st) { uint64_t td; if (st->tv_nsec > et->tv_nsec) { td = (uint64_t) (1000000000 + et->tv_nsec); et->tv_sec--; } else td = (uint64_t)et->tv_nsec; td -= (uint64_t)st->tv_nsec; td = td / 1000; td += (uint64_t)(et->tv_sec - st->tv_sec) * 1000000; return td; } /* update tl if t is less than tl */ static void time_low(struct timespec *tl, struct timespec *t) { if ((uint32_t)t->tv_sec < (uint32_t)tl->tv_sec) { tl->tv_sec = t->tv_sec; tl->tv_nsec = t->tv_nsec; return; } if ((uint32_t)t->tv_nsec < (uint32_t)tl->tv_nsec) tl->tv_nsec = t->tv_nsec; return; } /* update th if t is greater than th */ static void time_high(struct timespec *th, struct timespec *t) { if ((uint32_t)t->tv_sec > (uint32_t)th->tv_sec) { th->tv_sec = t->tv_sec; th->tv_nsec = t->tv_nsec; return; } if ((uint32_t)t->tv_nsec > (uint32_t)th->tv_nsec) th->tv_nsec = t->tv_nsec; return; } /** * zEDC has a different cmd code for memcopy and support * CRC32/ADLER32. */ static inline int accel_is_zedc(accel_t card) { return (accel_get_app_id(card) & DDCB_APPL_ID_MASK) == GENWQE_APPL_ID_GZIP; } static int accel_memcpy(accel_t card, struct ddcb_cmd *cmd_list, int preload, void *dest, size_t dest_n, uint64_t out_ats_type, void *src, size_t src_n, uint64_t in_ats_type, uint32_t *crc32, uint32_t *adler32, uint32_t *inp_processed, uint32_t *outp_returned, unsigned int err_inj) { int rc, i; struct ddcb_cmd *cmd = cmd_list; struct asiv_memcpy *asiv; struct asv_memcpy *asv; for (i = 0; i < preload; i++) { ddcb_cmd_init(cmd); /* setup ASIV part */ asiv = (struct asiv_memcpy *)&cmd->asiv; cmd->ddata_addr = 0ull; /* FIXME */ cmd->acfunc = DDCB_ACFUNC_APP; /* goto accelerator */ cmd->cmd = ZCOMP_CMD_ZEDC_MEMCOPY; cmd->cmdopts = 0x0000; /* pass addresses not lists */ cmd->asiv_length= 0x40 - 0x20; cmd->asv_length = 0xC0 - 0x80; /* try to absorb all */ cmd->ats = 0x0; asiv->inp_buff = __cpu_to_be64((unsigned long)src); asiv->inp_buff_len = __cpu_to_be32((unsigned long)src_n); cmd->ats |= ATS_SET_FLAGS(struct asiv_memcpy, inp_buff, in_ats_type); asiv->outp_buff = __cpu_to_be64((unsigned long)dest); asiv->outp_buff_len = __cpu_to_be32((uint32_t)dest_n); cmd->ats |= ATS_SET_FLAGS(struct asiv_memcpy, outp_buff, out_ats_type); /* Only relevant for the ZEDC variant. */ asiv->in_adler32 = __cpu_to_be32(1); asiv->in_crc32 = __cpu_to_be32(0); /* This will surely crash the application ... */ if (err_inj & ERR_INJ_INPUT) { asiv->inp_buff ^= 0xffffffffffffffffull; fprintf(stderr, "ERR_INJ_INPUT: %016llx\n", (long long)asiv->inp_buff); } if (err_inj & ERR_INJ_OUTPUT) { asiv->outp_buff ^= 0xffffffffffffffffull; fprintf(stderr, "ERR_INJ_OUTPUT: %016llx\n", (long long)asiv->outp_buff); } if (err_inj & ERR_INJ_SIZE) { asiv->inp_buff_len ^= 0xfffffffffull; asiv->outp_buff_len ^= 0xffffffffull; fprintf(stderr, "ERR_INJ_SIZE: %08lx/%08lx\n", (long)asiv->inp_buff_len, (long)asiv->outp_buff_len); } if (i < (preload -1)) cmd->next_addr = (unsigned long)(cmd + 1); else cmd->next_addr = 0x0; cmd++; } rc = accel_ddcb_execute(card, cmd_list, NULL, NULL); cmd = &cmd_list[0]; asv = (struct asv_memcpy *)&cmd->asv; *crc32 = __be32_to_cpu(asv->out_crc32); *adler32 = __be32_to_cpu(asv->out_adler32); *inp_processed = __be32_to_cpu(asv->inp_processed); *outp_returned = __be32_to_cpu(asv->outp_returned); return rc; } static void ddcb_print_dma_err(struct _asv_runtime_dma_error *d) { fprintf(stderr, " raddr: %016llx rfmt/chan/disc: %08x " "rdmae: %04x rsge: %04x\n" " waddr: %016llx wfmt/chan/disc: %08x " "wdmae: %04x wsge: %04x\n", (long long)__be64_to_cpu(d->raddr_be64), __be32_to_cpu(d->rfmt_chan_disccnt_be32), __be16_to_cpu(d->rdmae_be16), __be16_to_cpu(d->rsge_be16), (long long)__be64_to_cpu(d->waddr_be64), __be32_to_cpu(d->wfmt_chan_disccnt_be32), __be16_to_cpu(d->wdmae_be16), __be16_to_cpu(d->wsge_be16)); } static void *__memcpy_thread(void *data) { struct memcpy_thread_data *pt = (struct memcpy_thread_data *)data; struct memcpy_in_parms *ip = pt->ip; int err = 0; int errors = 0; int rc, i; uint8_t *obuf, *obuf4k; /* Output buffer */ struct ddcb_cmd *ddcb_list; struct ddcb_cmd *ddcb0 = NULL; struct timespec stime = { .tv_sec = 0, .tv_nsec = 0 }; struct timespec etime = { .tv_sec = 0, .tv_nsec = 0 }; uint32_t mcpy_inp_processed, mcpy_outp_returned; uint32_t mcpy_crc32, mcpy_adler32; uint64_t total_usec = 0; int memcopies = 0; int count = ip->count; long long bytes_copied = 0; /* Allocate output buffer */ if (ip->use_sglist) { pt->out_ats_type = ATS_TYPE_SGL_RDWR; obuf4k = memalign(ip->page_size, ip->data_buf_size + ip->pgoffs_o); if (ip->use_sglist > 1) accel_pin_memory(pt->accel, obuf4k, ip->data_buf_size + ip->pgoffs_o, 1); } else { pt->out_ats_type = ATS_TYPE_FLAT_RDWR; obuf4k = accel_malloc(pt->accel, ip->data_buf_size + ip->pgoffs_o); } if ((ip->data_buf_size != 0) && (obuf4k == NULL)) { pr_err("Can not allocate Output Buffer\n"); err = EX_MEMORY; goto __memcpy_exit_1; } memset(obuf4k, 0xff, ip->data_buf_size + ip->pgoffs_o); obuf = obuf4k + ip->pgoffs_o; /* Allocate ddcb list */ ddcb_list = (struct ddcb_cmd *) malloc(ip->preload * sizeof(struct ddcb_cmd)); if (NULL == ddcb_list) { pr_err("Can not allocate %d DDCB List\n", ip->preload); err = EX_MEMORY; goto __memcpy_exit_2; } VERBOSE1("Thread: %d memcopy: %p (in) to %p (out), pageoffs %d (in) " "%d (out), %d bytes Preload: %d\n", pt->thread, pt->ibuf, obuf, ip->pgoffs_i, ip->pgoffs_o, ip->data_buf_size, ip->preload); clock_gettime(CLOCK_MONOTONIC_RAW, &stime); pt->stime.tv_sec = stime.tv_sec; /* Save Start Time */ pt->stime.tv_nsec = stime.tv_nsec; /* Save Start Time */ for (count = 0; count < ip->count; count++) { if (stop_memcopying) break; int xerrno; /* preset output buffer when we check results */ if (ip->force_cmp) memset(obuf, 0x55, ip->data_buf_size); clock_gettime(CLOCK_MONOTONIC_RAW, &stime); rc = accel_memcpy(pt->accel, ddcb_list, ip->preload, obuf, ip->data_buf_size, pt->out_ats_type, pt->ibuf, ip->data_buf_size, ip->in_ats_type, &mcpy_crc32, &mcpy_adler32, &mcpy_inp_processed, &mcpy_outp_returned, ip->err_inj); xerrno = errno; clock_gettime(CLOCK_MONOTONIC_RAW, &etime); pt->etime.tv_sec = etime.tv_sec; /* Save End Time */ pt->etime.tv_nsec = etime.tv_nsec; /* Save End Time */ total_usec += tdiff_us(&etime, &stime); ddcb0 = ddcb_list; /* i only use the 1st ddcb */ if (rc != DDCB_OK) { struct _asv_runtime_dma_error *d; fprintf(stderr, "\nERR: Thread: %d MEMCOPY DDCB[%d] failed, " "%s (%d)\n" " errno=%d %s\n", pt->thread, pt->memcopies, ddcb_strerror(rc), rc, xerrno, strerror(xerrno)); fprintf(stderr, " RETC: %03x %s ATTN: %x PROGR: %x\n" " from card CRC32: %08x ADLER: %08x\n" " original CRC32: %08x ADLER: %08x\n", ddcb0->retc, ddcb_retc_strerror(ddcb0->retc), ddcb0->attn, ddcb0->progress, mcpy_crc32, mcpy_adler32, ip->mcpy_crc32, ip->mcpy_adler32); fprintf(stderr, " DEQUEUE=%016llx CMPLT=%016llx " "DISP=%016llx\n", (long long)ddcb0->deque_ts, (long long)ddcb0->cmplt_ts, (long long)ddcb0->disp_ts); if ((ddcb0->retc == DDCB_RETC_UNEXEC) && (ddcb0->attn == 0xe007)) { d = (struct _asv_runtime_dma_error *) ddcb0->asv; ddcb_print_dma_err(d); } ddcb_hexdump(stderr, ddcb0->asv, sizeof(ddcb0->asv)); err = EX_ERR_CARD; goto __memcpy_exit_3; } /* Check CRC and Adler */ if ((mcpy_crc32 != ip->mcpy_crc32) || (mcpy_adler32 != ip->mcpy_adler32)) { fprintf(stderr, "ERR: Thread: %d CRC/ADLER does not " "match!\n" " from card CRC32: %08x ADLER: %08x\n" " original CRC32: %08x ADLER: %08x " "at %d of %d loops\n", pt->thread, mcpy_crc32, mcpy_adler32, ip->mcpy_crc32, ip->mcpy_adler32, count, ip->count); errors++; } /* Was all data processed? */ if ((ip->data_buf_size != (int)mcpy_inp_processed) || (ip->data_buf_size != (int)mcpy_outp_returned)) { fprintf(stderr, "ERR: Thread: %d IN/OUT sizes do " "not match!\n" " from card IN: %08x OUT: %08x\n" " original IN: %08x OUT: %08x at %d of %d " "loops\n", pt->thread, mcpy_inp_processed, mcpy_outp_returned, ip->data_buf_size, ip->data_buf_size, count, ip->count); errors++; } if (ip->force_cmp || errors) { /* Check if data is correct ... */ for (i = 0; i < ip->data_buf_size; i++) { if (obuf[i] != pt->ibuf[i]) { EVERBOSE("\nERR: Thread: %d @ " "offs %08x\n" " RETC: %03x %s ATTN: %x " "PROGR: %x\n" " INP_PROCESSED: %08x " "OUTP_RETURNED: %08x\n", pt->thread, i, ddcb0->retc, ddcb_retc_strerror(ddcb0->retc), ddcb0->attn, ddcb0->progress, mcpy_inp_processed, mcpy_outp_returned); errors++; break; } } if (i < ip->data_buf_size) { int offs; unsigned int len; offs = i - 32; if (offs < 0) offs = 0; len = MIN(64, ip->data_buf_size - offs); EVERBOSE("memcopy src buffer (%p):\n", pt->ibuf); __hexdump(&pt->ibuf[offs], len, offs); EVERBOSE("memcopy dst buffer (%p):\n", obuf); __hexdump(&obuf[offs], len, offs); errors++; } } if (errors) break; memcopies += ip->preload; bytes_copied += (long long)ip->preload * ip->data_buf_size; } /* write output data if requested to do so only for 1st thread (0) */ if (0 == pt->thread) { if (NULL != ip->o_fp) { rc = fwrite(obuf, 1, ip->data_buf_size, ip->o_fp); if (rc != ip->data_buf_size) { pr_err("can not write output file !\n"); err = EX_ERRNO; } fclose(ip->o_fp); ip->o_fp = NULL; } } /* Return data to main */ pt->errors = errors; pt->memcopies = memcopies; pt->bytes_copied = bytes_copied; pt->total_usec = total_usec; __memcpy_exit_3: /* free my ddcb list */ free(ddcb_list); __memcpy_exit_2: /* Free output buffer */ if (ip->use_sglist) { if (ip->use_sglist > 1) accel_unpin_memory(pt->accel, obuf4k, ip->data_buf_size + ip->pgoffs_o); free(obuf4k); } else accel_free(pt->accel, obuf4k, ip->data_buf_size + ip->pgoffs_o); obuf4k = NULL; __memcpy_exit_1: pt->err = err; return NULL; } /* Free input buffer for each Thread */ static int __memcpy_free_ibuf(struct memcpy_in_parms *ip, struct memcpy_thread_data *pt) { /* the last one must free ibuf */ if (ip->use_sglist) { if (ip->use_sglist > 1) accel_unpin_memory(pt->accel, pt->ibuf4k, ip->data_buf_size + ip->pgoffs_i); free(pt->ibuf4k); } else accel_free(pt->accel, pt->ibuf4k, ip->data_buf_size + ip->pgoffs_i); pt->ibuf4k = NULL; return 0; } /* Allocate input buffer per Thread */ static int __memcpy_alloc_ibuf(struct memcpy_in_parms *ip, struct memcpy_thread_data *pt) { int i; size_t fread_size = 0; if (ip->use_sglist) { ip->in_ats_type = ATS_TYPE_SGL_RDWR; pt->ibuf4k = memalign(ip->page_size, ip->data_buf_size + ip->pgoffs_i); if (ip->use_sglist > 1) accel_pin_memory(pt->accel, pt->ibuf4k, ip->data_buf_size + ip->pgoffs_i, 0); } else { ip->in_ats_type = ATS_TYPE_FLAT_RD; pt->ibuf4k = accel_malloc(pt->accel, ip->data_buf_size + ip->pgoffs_i); } if ((ip->data_buf_size != 0) && (pt->ibuf4k == NULL)) { pr_err("Can not allocate Input memory\n"); return EX_MEMORY; } /* preset full input buffer */ memset(pt->ibuf4k, 0xee, ip->data_buf_size + ip->pgoffs_i); pt->ibuf = pt->ibuf4k + ip->pgoffs_i; /* preset partial input buffer in case pgoffs_i is set */ if (ip->fpattern) { fread_size = fread(pt->ibuf, 1, ip->data_buf_size, ip->fpattern); if ((int)fread_size != ip->data_buf_size) { pr_err("Can not read pattern file!\n"); return EX_ERRNO; } fclose(ip->fpattern); } else { for (i = 0; i < ip->data_buf_size; i++) /* preset inp buffer */ pt->ibuf[i] = (uint8_t)i; } if (0 == pt->thread) { /* Create Adler and CRC from Input buffer, which is thea same for each thread */ ip->mcpy_adler32 = adler32(0L, Z_NULL, 0); /* start value */ ip->mcpy_adler32 = adler32(ip->mcpy_adler32, pt->ibuf, ip->data_buf_size); ip->mcpy_crc32 = crc32(0L, Z_NULL, 0); /* start value */ ip->mcpy_crc32 = crc32(ip->mcpy_crc32 , pt->ibuf, ip->data_buf_size); } return 0; } int main(int argc, char *argv[]) { int cmd; char *endptr = NULL; pthread_t tid; int thread; char *out_f; /* Output File name used */ int err_code; unsigned long long frequency, wtime_usec = 0, wtime_e = 0; /* Summ for all threads */ long long bytes_copied = 0; uint64_t total_usec = 0; uint64_t total_msec = 0; int memcopies = 0; int errors = 0; int mib, kib; unsigned long kibs, mibs; struct memcpy_thread_data *tdata; struct memcpy_thread_data *pt; struct memcpy_in_parms ip; ip.card_no = 0; ip.card_type = DDCB_TYPE_GENWQE; ip.mode = DDCB_MODE_RDWR | DDCB_MODE_ASYNC; ip.quiet = false; /* not quiet */ ip.cpu = -1; ip.count = 1; ip.force_cmp = false; ip.use_sglist = 0; ip.preload = 1; ip.threads = 1; ip.o_fp = NULL; ip.fpattern = NULL; ip.in_ats_type = ATS_TYPE_FLAT_RD; /* default, no SGL */ ip.page_size = sysconf(_SC_PAGESIZE); ip.data_buf_size = 4096; /* for inbuff and outbuff */ ip.pgoffs_i = 0; ip.pgoffs_o = 0; ip.mcpy_crc32 = 0; ip.mcpy_adler32 = 0; ip.have_threads = 0; ip.err_inj = ERR_INJ_NONE; while (1) { int option_index = 0; static struct option long_options[] = { /* functions */ /* options */ { "card", required_argument, NULL, 'C' }, { "accelerator-type", required_argument, NULL, 'A' }, { "cpu", required_argument, NULL, 'X' }, { "use-sglist", no_argument, NULL, 'G' }, { "nonblocking", no_argument, NULL, 'n' }, { "bufsize", required_argument, NULL, 's' }, { "patternfile", required_argument, NULL, 'p' }, { "count", required_argument, NULL, 'c' }, { "preload", required_argument, NULL, 'l' }, { "pgoffs_i", required_argument, NULL, 'i' }, { "pgoffs_o", required_argument, NULL, 'o' }, { "force-compare", required_argument, NULL, 'F' }, { "threads", required_argument, NULL, 't' }, { "err-inject", required_argument, NULL, 'Y' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "debug", no_argument, NULL, 'D' }, { "quiet", no_argument, NULL, 'q' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; cmd = getopt_long(argc, argv, "nqGDFi:o:p:s:c:C:A:X:vVhl:t:Y:", long_options, &option_index); if (cmd == -1) /* all params processed ? */ break; switch (cmd) { case 'C': if (strcmp(optarg, "RED") == 0) { ip.card_no = ACCEL_REDUNDANT; break; } ip.card_no = strtol(optarg, (char **)NULL, 0); break; case 'A': /* set card number */ if (strcmp(optarg, "GENWQE") == 0) { ip.card_type = DDCB_TYPE_GENWQE; break; } if (strcmp(optarg, "CAPI") == 0) { ip.card_type = DDCB_TYPE_CAPI; break; } ip.card_type = strtol(optarg, (char **)NULL, 0); if ((DDCB_TYPE_GENWQE != ip.card_type) || (DDCB_TYPE_CAPI != ip.card_type)) { usage(argv[0]); exit(EXIT_FAILURE); } break; case 'X': ip.cpu = strtoul(optarg, (char **)NULL, 0); break; case 'G': ip.use_sglist++; break; case 'c': ip.count = strtol(optarg, (char **)NULL, 0); break; case 'i': ip.pgoffs_i = strtoul(optarg, &endptr, 0); if ((optarg && (((char *)optarg)[0] == '-')) || (*endptr != '\0')) { pr_err("illegal input offset!\n"); usage(argv[0]); exit(EXIT_FAILURE); } break; case 'o': ip.pgoffs_o = strtoul(optarg, &endptr, 0); if ((optarg && (((char *)optarg)[0] == '-')) || (*endptr != '\0')) { pr_err("illegal output offset!\n"); usage(argv[0]); exit(EXIT_FAILURE); } break; case 's': ip.data_buf_size = str_to_num(optarg); break; case 'p': ip.fpattern = fopen(optarg, "rb"); if (ip.fpattern == NULL) { pr_err("Pattern file %s not found!\n", optarg); } else { fseek(ip.fpattern, 0L, SEEK_END); ip.data_buf_size = ftell(ip.fpattern); fseek(ip.fpattern, 0L, SEEK_SET); } break; case 'l': /* preload */ ip.preload = strtol(optarg, (char **)NULL, 0); break; case 't': /* threads */ ip.threads = strtol(optarg, (char **)NULL, 0); break; case 'F': ip.force_cmp = true; break; case 'n': ip.mode |= DDCB_MODE_NONBLOCK; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'D': /* debug_flag++; *//*FIXME */ break; case 'q': ip.quiet = true; break; case 'Y': ip.err_inj = strtol(optarg, (char **)NULL, 0); break; case 'v': verbose_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (ACCEL_REDUNDANT == ip.card_no) { if (1 != ip.use_sglist) { pr_info("Option -G set when in redundant card " "mode!\n"); ip.use_sglist = 1; } } if (optind < argc) { /* output file */ out_f = argv[optind++]; ip.o_fp = fopen(out_f, "w+"); if (NULL == ip.o_fp) { pr_err("can not open output file '%s': %s\n", out_f, strerror(errno)); exit(EX_ERRNO); } } if (optind != argc) { /* now it must fit */ usage(argv[0]); exit(EXIT_FAILURE); } if ((ip.card_type != DDCB_TYPE_CAPI) && (1 != ip.preload)) { printf("Note: Use Preload option only on CAPI Card !\n"); exit(EXIT_FAILURE); } switch_cpu(ip.cpu, verbose_flag); if (verbose_flag > 1) ddcb_debug(verbose_flag - 1); /* Allocate Thread data */ tdata = (struct memcpy_thread_data*) malloc(ip.threads * sizeof(struct memcpy_thread_data)); if (NULL == tdata) { pr_err("Can not allocate memory Thread Data\n"); exit(EX_MEMORY); } ip.stime.tv_sec = -1;; ip.stime.tv_nsec = -1;; ip.etime.tv_sec = 0; ip.etime.tv_nsec = 0; signal(SIGINT, INT_handler); pt = &tdata[0]; for (thread = 0; thread < ip.threads; thread++, pt++) { pt->thread = thread; pt->ip = &ip; /* Set input parms */ pt->err = 0; pt->errors = 0; pt->bytes_copied = 0; pt->memcopies = 0; pt->total_usec = 0; pt->tid = 0; pt->accel = accel_open(ip.card_no, ip.card_type, ip.mode, &err_code, 0, DDCB_APPL_ID_IGNORE); if (NULL == pt->accel) { pr_err("Failed to open card %u type %u (%d/%s)\n", ip.card_no, ip.card_type, err_code, accel_strerror(pt->accel, err_code)); pt->err = EX_ERR_CARD; continue; } /* Alloc ibuf */ pt->err = __memcpy_alloc_ibuf(&ip, pt); } pt = &tdata[0]; for (thread = 0; thread < ip.threads; thread++, pt++) { if (0 == pt->err) { if (0 == pthread_create(&tid, NULL, &__memcpy_thread, pt)) { pt->tid = tid; ip.have_threads++; } } } pt = &tdata[0]; for (thread = 0; thread < ip.threads; thread++) { if (0 == pt->tid) { /* Skip if tid is not set */ errors++; VERBOSE0("Thread: %d, tid: 0 err: %d\n", thread, pt->err); continue; } pthread_join(pt->tid, NULL); /* wait for good tid */ ip.have_threads--; if (pt->err) { errors++; VERBOSE0("Thread: %d, err: %d\n", thread, pt->err); } else { if (false == ip.quiet) { kib = (int)(pt->bytes_copied / 1024); mib = kib / 1024; VERBOSE1("Thread: %d, memcopies: %d, done, " "%lld bytes, %lld usec, ", thread, pt->memcopies, (long long)pt->bytes_copied, (long long)pt->total_usec); /* FIXME: this is not 100 % good code, i know the format_flag is bad */ if (pt->total_usec < 100000) { kibs = ((pt->bytes_copied * 1000000) / 1024) / pt->total_usec; VERBOSE1("%d KiB, in %lld usec, " "%ld KiB/sec", kib, (long long)pt->total_usec, kibs); } else { total_msec = pt->total_usec / 1000; /* now msec */ mibs = (pt->bytes_copied * 1000) / (1024 * 1024) / total_msec; VERBOSE1("%d MiB, in %lld msec, " "%ld MiB/sec", mib, (long long)total_msec, mibs); } VERBOSE1(" %d errors.\n", pt->errors); } } bytes_copied += pt->bytes_copied; memcopies += pt->memcopies; errors += pt->errors; __memcpy_free_ibuf(&ip, pt); if (thread == ip.threads - 1) { wtime_e = accel_get_queue_work_time(pt->accel); frequency = accel_get_frequency(pt->accel); wtime_usec = frequency ? wtime_e / (frequency/1000000) : 0; } accel_close(pt->accel); VERBOSE1("Thread %02d Start: %08lld - %08lld " "End: %08lld - %08lld\n", thread, (long long)pt->stime.tv_sec, (long long)pt->stime.tv_nsec, (long long)pt->etime.tv_sec, (long long)pt->etime.tv_nsec); /* Update lowest start time */ time_low(&ip.stime, &pt->stime); /* Update highest end time */ time_high(&ip.etime, &pt->etime); pt->accel = NULL; pt++; } if (false == ip.quiet) { kib = (int)(bytes_copied / 1024); mib = kib / 1024; VERBOSE0("--- MEMCOPY statistics ---\n" "%d memcopies done, %lld bytes, ", memcopies, bytes_copied); total_usec = tdiff_us(&ip.etime, &ip.stime); /* Avoid div fault */ if (total_usec) { if (total_usec < 100000) { kibs = ((bytes_copied * 1000000) / 1024) / total_usec; VERBOSE0("%d KiB, in %lld/%lld usec, " "%ld KiB/sec,", kib, (long long)total_usec, wtime_usec, kibs); } else { total_msec = total_usec / 1000; /* now msec */ mibs = (bytes_copied * 1000) / (1024 * 1024) / total_msec; VERBOSE0("%d MiB, in %lld/%lld msec, " "%ld MiB/sec,", mib, (long long)total_msec, wtime_usec/1000, mibs); } } VERBOSE0(" %d errors.\n", errors); } free(tdata); if (errors != 0) exit(EX_ERR_DATA); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/tools/genwqe_mt_perf000077500000000000000000000216421303345043000201360ustar00rootroot00000000000000#!/bin/bash # # Copyright 2015, 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Test-script to measure and tune performance of zlib soft- or hardware # implementation. Use the data to figure out the #threads required to # get best throughput and when adding more threads does not help. # # For the hardware implementation, it will show how many threads in parallel # are needed to saturate the hardware. # # The buffersize test shows the influence of buffering and small buffers # on throughput. Hardware implementation will normally work best with large # buffers. # export ZLIB_ACCELERATOR=GENWQE export ZLIB_CARD=0 export ZLIB_DEFLATE_IMPL=0x01 # Use hardware by default export ZLIB_INFLATE_IMPL=0x01 threads=160 version="https://github.com/ibm-genwqe/genwqe-user" verbose="" test_data="/tmp/test_data.bin" logging=0 sadc=/usr/lib/sysstat/sadc # sadc is unfortunately at different location for different distributions # We try to figure out the location in the code below, if your sadc is at # a different spot, you might need to adopt the script. # Print usage message helper function function usage() { echo "Usage of $PROGRAM:" echo " [-A] use either GENWQE for the PCIe and CAPI for" echo " CAPI based solution available only on System p" echo " Use SW to use software compress/decompression" echo " [-C] set the compression card to use (0, 1, ... )." echo " RED (or -1) drive work to all available cards." echo " [-P] Use polling to detect work-request completion/only CAPI." echo " [-t] " echo " [-l] Enable system load logging" echo " sadc - System activity data collector and gnuplot" echo " must be installed" echo " [-M] Maximum number of threads to be used." echo " [-v] Print status and informational output." echo " [-V] Print program version (${version})" echo " [-h] Print this help message." echo echo "Input data is to be placed in ${test_data}." echo "If it does not exist, the script will generate random example data." echo "Using random data will cause performance to suffer, since it" echo "will not compress nicely. So using something more realistic is" echo "certainly a good idea." echo echo "Note that the path needs to be setup to find the zlib_mt_perf tool." echo echo "E.g. run as follows:" echo " Use GenWQE accelerator card 0:" echo " PATH=tools:\$PATH tools/zlib_mt_perf.sh -A GENWQE -C0" echo echo " Use CAPI accelerator card 0:" echo " PATH=tools:\$PATH tools/zlib_mt_perf.sh -A CAPI -C0" echo echo " Use software zlib:" echo " PATH=tools:\$PATH tools/zlib_mt_perf.sh -A SW" echo } ############################################################################### # System Load Logging ############################################################################### function system_load_find_sadc() { if [ -x /usr/lib64/sa/sadc ]; then sadc=/usr/lib64/sa/sadc elif [ -x /usr/lib/sysstat/sadc ]; then sadc=/usr/lib/sysstat/sadc else echo "Cannot find sadc tool for CPU load measurement!" exit 1 fi } function system_load_logging_start() { rm -f system_load.sar system_load.pid ${sadc} 1 system_load.sar & echo $! > system_load.pid } function system_load_logging_stop() { kill -9 `cat system_load.pid` # Skip the 1st 4 lines, since they container some header information cp system_load.sar system_load.$ZLIB_ACCELERATOR.sar LC_TIME=posix sar -u -f system_load.sar | tail -n +4 > system_load.txt grep -v Average system_load.txt > system_load.csv LC_TIME=posix sar -u -f system_load.sar > system_load.$ZLIB_ACCELERATOR.csv start=`head -n1 system_load.csv | cut -f1 -d' '` end=`tail -n1 system_load.csv | cut -f1 -d' '` cat < system_load.gnuplot # Gnuplot Config # set terminal pdf size 16,8 set output "system_load.pdf" set autoscale set title "System Load using $ZLIB_ACCELERATOR" set xdata time set timefmt "%H:%M:%S" set xlabel "Time" set xrange ["$start":"$end"] set ylabel "CPU Utilization" set yrange ["0.00":"100.00"] set style data lines set grid # set datafile separator " " plot "system_load.csv" using 1:3 title "%user" with lines lw 4, '' using 1:5 title "%system" with lines lw 4 EOF # Instructing gnuplot to generate a png with out CPU load statistics cat system_load.gnuplot | gnuplot # Safe it under an accelerator unique name mv system_load.pdf system_load.${ZLIB_ACCELERATOR}.pdf } # Parse any options given on the command line while getopts "M:A:C:t:PvVhl" opt; do case ${opt} in A) ZLIB_ACCELERATOR=${OPTARG}; ;; C) ZLIB_CARD=${OPTARG}; ;; P) export ZLIB_DEFLATE_IMPL=0x81; export ZLIB_INFLATE_IMPL=0x81; ;; M) threads=${OPTARG}; ;; t) test_data=${OPTARG}; ;; l) logging=1; ;; v) verbose="-v"; ;; V) echo "${version}" exit 0; ;; h) usage; exit 0; ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 exit 1; ;; esac done if [ $ZLIB_ACCELERATOR = "SW" ]; then export ZLIB_DEFLATE_IMPL=0x00; export ZLIB_INFLATE_IMPL=0x00; fi # Random data cannot being compressed. Performance values might be poor. # Text data e.g. logfiles work pretty well. Use those if available. # Download linux.tar.gz which is mainly text. That should perform well. # echo -n "Checking if example data is available ... " if [ ! -f ${test_data} ]; then echo "no" if [ ! -f cantrbry.tar.gz ]; then wget http://corpus.canterbury.ac.nz/resources/cantrbry.tar.gz if [ $? -ne 0 ]; then echo "cantrbry.tar.gz is missing. Please download it first."; echo echo "E.g.:"; echo " wget http://corpus.canterbury.ac.nz/resources/cantrbry.tar.gz"; echo exit -1; fi fi echo -n "Duplicating test_data " touch ${test_data} for ((i=0; i<16; i++)); do gzip -f -d -c cantrbry.tar.gz >> ${test_data} echo -n "." done echo " ok" # dd if=/dev/urandom of=${test_data} count=1024 bs=4096 else echo "yes, ${test_data} is there" fi echo -n "Compressing ${test_data} if needed ... " if [ ! -f ${test_data}.gz ]; then gzip -f -c ${test_data} > ${test_data}.gz echo "ok" else echo "no" fi export PATH=./tools:./genwqe-user/tools:/sbin:/usr/sbin:$PATH cpus=`cat /proc/cpuinfo | grep processor | wc -l` bufsize=1MiB count=1 # Generate core dumps, in case something needs debug ulimit -c unlimited echo uname -a echo "Accelerator: ${ZLIB_ACCELERATOR}" echo "Processors: $cpus" echo -n "Raw data: " du -h ${test_data} echo -n "Compressed data: " du -h ${test_data}.gz echo "IBM Processing accelerators:" lspci | grep "Processing accelerators: IBM" if [ $logging -eq 1 ]; then system_load_find_sadc system_load_logging_start fi echo echo "DEFLATE Figure out maximum throughput and #threads which work best" print_hdr="" for (( t=1; t<=$threads; t*=2 )); do zlib_mt_perf $verbose -i$bufsize -o$bufsize -D -f ${test_data} \ -c$count -t$t $print_hdr; if [ $? -ne 0 ]; then echo "ERROR Failed with $t Threads" echo -n "Version: " zlib_mt_perf --version echo " Called with:" echo " export ZLIB_ACCELERATOR=${ZLIB_ACCELERATOR}" echo " export ZLIB_CARD=${ZLIB_CARD}" echo " export ZLIB_DEFLATE_IMPL=${ZLIB_DEFLATE_IMPL}" echo " export ZLIB_INFLATE_IMPL=${ZLIB_INFLATE_IMPL}" echo " zlib_mt_perf $verbose -i$bufsize -o$bufsize -D -f ${test_data} -c$count -t$t $print_hdr" exit 1 fi # sleep 1 ; print_hdr="-N"; done echo echo "DEFLATE Use optimal #threads, guessing $cpus, influence of buffer size" print_hdr="" t=$cpus # FIXME ;-) for b in 1KiB 4KiB 64KiB 128KiB 1MiB 4MiB 8MiB ; do zlib_mt_perf $verbose -i$b -o$b -D -f ${test_data} -c$count -t$t \ $print_hdr; # sleep 1 ; print_hdr="-N"; done echo echo "INFLATE Figure out maximum throughput and #threads which work best" print_hdr="" for (( t=1; t<=$threads; t*=2 )); do zlib_mt_perf $verbose -i$bufsize -o$bufsize -f ${test_data}.gz \ -c$count -t$t $print_hdr; # sleep 1 ; print_hdr="-N"; done echo echo "INFLATE Use optimal #threads, guessing $cpus, influence of buffer size" t=$cpus # FIXME ;-) print_hdr="" for b in 1KiB 4KiB 64KiB 128KiB 1MiB 4MiB 8MiB ; do zlib_mt_perf $verbose -i$b -o$b -f ${test_data}.gz -c$count -t$t \ $print_hdr; # sleep 1 ; print_hdr="-N"; done if [ $logging -eq 1 ]; then system_load_logging_stop fi # Cleanup rm -f ${test_data} ${test_data}.gz exit 0 genwqe-user-4.0.18/tools/genwqe_parallel_echo000077500000000000000000000145721303345043000213000ustar00rootroot00000000000000#!/bin/bash # # Copyright 2015, 2016, International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # This testcase processes significant interrupt stress. Run that for a day # and you will know if your device driver and software can surive sudden # abborts while running a lot of interrupt stress. # # Start N echos and kill them after a couple of seconds. # card=0 tools_dir=tools verbose=0 iterations=100000 processes=160 killtimeout=2 preload=1 runpids="" polling="" count="" tracing=0 trace_file="genwqe_parallel_echo.log" PLATFORM=`uname -p` start_time=`date` do_build=0 # Set default accelerator based on platform we are running on if [ ${PLATFORM} == "ppc64le" ]; then accelerator=CAPI else accelerator=GENWQE fi function usage() { echo "Usage:" echo " genwqe_parallel_echo" echo " -A GENWQE|CAPI" echo " -C card to be used for the test" echo " -c send echos and than stop" echo " -i repeat multiple times for more testing" echo " -p how many processed in parallel" echo " -k kill timeout" echo " -l send echos in one shot N <= 64" echo " -P run echo in experimental polling mode (CAPI only)" echo " -T start traces (CAPI only)" echo " -t directory where the tools are located" echo " -b build the code before running the test" echo echo "Example:" echo " Repro the CAPI bitstream interrupt loss problem:" echo " ./scripts/parallel_echo.sh -ACAPI -C0 -i1000 -p160 -k3" echo } function start_job { # echo "Starting: $*" echo "$*" > echo_$s.cmd exec $* $parms & newpid=$! # echo "NewPID: $newpid" runpids=$runpids" "$newpid # echo "RunPIDs: $runpids" } function stop_jobs { echo "Running: "`jobs -rp` echo "Expected: ${runpids}" kill -SIGKILL `jobs -rp` wait echo "Still running: "`jobs -rp` runpids="" } function cleanup { echo "Stopping all jobs ..." stop_jobs sleep 1 echo "done" stop_cxl_traces exit 0 } function start_cxl_traces { if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then echo "Starting CXL tracing ..."; sudo sh -c 'echo 1 > /sys/kernel/debug/tracing/events/cxl/enable'; fi } function stop_cxl_traces { if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then echo "Stopping CXL tracing ..."; sudo sh -c 'echo 0 > /sys/kernel/debug/tracing/events/cxl/enable'; fi } function collect_cxl_traces { if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then echo "Collect CXL traces ..."; sudo sh -c 'cat /sys/kernel/debug/tracing/trace_pipe > $trace_file'; fi } trap cleanup SIGINT trap cleanup SIGKILL trap cleanup SIGTERM while getopts "TPA:C:c:p:i:k:l:t:bh" opt; do case $opt in A) accelerator=$OPTARG; ;; C) card=$OPTARG; ;; c) count="-c $OPTARG"; ;; i) iterations=$OPTARG; ;; p) processes=$OPTARG; ;; k) killtimeout=$OPTARG; ;; l) preload=$OPTARG; ;; T) tracing=1; ;; P) polling="-p" ;; h) usage; exit 0; ;; b) do_build=1; ;; t) tools_dir=$OPTARG; ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; esac done function test_echo () { ### Start in background ... echo "Starting genwqe_echo in the background ... " for s in `seq 1 $processes` ; do start_job $tools_dir/genwqe_echo -A ${accelerator} -C ${card} \ -l ${preload} ${count} -f ${polling} \ > echo_$s.stdout.log 2> echo_$s.stderr.log done echo "ok" if [ ${killtimeout} -ne -1 ]; then echo "Waiting ${killtimeout} seconds ..." for s in `seq 0 ${killtimeout}` ; do sleep 1; echo -n "." done echo " ok" echo "Sending SIGKILL to all ... " stop_jobs echo "ok" else echo "Skipp killing processes but wait until they terminate ..." fi } # Check if we have to do a build if [ ${do_build} -eq 1 ]; then echo "Build code ..." make -s -j32 || exit 1 fi echo "********************************************************************" echo "Parallel echo TEST for ${accelerator} card ${card} starting ${processes}" echo "********************************************************************" echo echo "********************************************************************" echo "Hardware Version" echo "********************************************************************" echo $tools_dir/genwqe_echo -A ${accelerator} -C ${card} --hardware-version echo "********************************************************************" echo "Remove old logfiles ..." echo "********************************************************************" echo rm -f echo_*.cmd echo_*.stdout.log echo_*.stderr.log start_cxl_traces for i in `seq 1 ${iterations}` ; do echo -n "(1) Check if card is replying to an echo request ($i) ... " date $tools_dir/genwqe_echo -A ${accelerator} -C ${card} -i0 -c5 if [ $? -ne 0 ]; then echo "Single echo took to long, please review results!" collect_cxl_traces stop_cxl_traces exit 1 fi echo "(2) Perform massive interrupt stress and killing applications ..." test_echo; echo "(3) Check logfiles for string \"err\" ..." grep err echo_*.stderr.log if [ $? -ne 1 ]; then echo "Found potential errors ... please check logfiles" collect_cxl_traces stop_cxl_traces exit 2 fi echo "(4) Check if card is still replying to an echo request ..." $tools_dir/genwqe_echo -A ${accelerator} -C ${card} -i0 -c5 if [ $? -ne 0 ]; then echo "Single echo took to long, please review results!" collect_cxl_traces stop_cxl_traces exit 3 fi echo "(5) Remove old logfiles ..." rm -f echo_*.cmd echo_*.stdout.log echo_*.stderr.log echo "Running since ${start_time} until now `date` ($i)" echo done stop_cxl_traces exit 0 genwqe-user-4.0.18/tools/genwqe_peek.c000066400000000000000000000212521303345043000176410ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "force_cpu.h" #include int verbose_flag = 0; static const char *version = GIT_VERSION; /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Usage: %s [-h] [-v,--verbose]\n" " -C,--card can be (0...3)\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available " "for System p\n" " -V, --version print version.\n" " -q, --quiet quiece output.\n" " -w, --width <32|64> access width, 64: default\n" " -X, --cpu only run on this CPU.\n" " -i, --interval interval in usec, 0: default.\n" " -c, --count number of peeks do be done, 1: default.\n" " -e, --must-be compare and exit if not equal.\n" " -n, --must-not-be compare and exit if equal.\n" " -a, --and-mask mask read value before compare.\n" " -p, --psl-bar access PSL bar (CAPI only)\n" " \n" "\n" "Example:\n" " genwqe_peek 0x0000\n" " [00000000] 000000021032a178\n\n" " for CAPI card (-A CAPI)\n" " Reg 0x0000 CAPI Card Version Reg 1 (RO)\n" " Reg 0x0008 CAPI Card Version Reg 2 (RO)\n" " Reg 0x0080 CAPI Card Free Run Timer in 4 nsec (RO)\n" " Reg 0x0180 Queue Work Time in 4 nsec (RO)\n" " Reg 0x1000 ... 0x1028 6 Fir Registers (RW)\n" "\n" " Only CAPI (debugging):\n" " genwqe_peek -ACAPI -C0 --psl-bar=2 --width=64 0x150\n" "\n", prog); } /** * Writing PSL BARs only works in CAPI mode. It directly opens the * PCIe device and bypasses therefore the CXL driver. Handle this with * care, since it can cause unexpected effects if wrong data is * written or accessed. * * We actually need this to setup a circumvention for MMIOs which can * timeout. This is required since the Linux driver could not be * changed as quickly as desired. * * MSB LSB * 11.1111.1111.2222.2222.2233_3333.3333.4444.4444.4455.5555.5555.6666 * 0123.4567.8901.2345.6789.0123.4567.8901_2345.6789.0123.4567.8901.2345.6789.0123 */ static int capi_read_psl_bar(unsigned int card_no, unsigned int res_no, int width, off_t offset, uint64_t *val) { int fd, rc = 0; struct stat sb; void *memblk, *addr; char res[128]; sprintf(res, "/sys/class/cxl/card%u/device/resource%u", card_no, res_no); fd = open(res, O_RDWR); if (fd < 0) { fprintf(stderr, "err: Can not open %s %s\n", res, strerror(errno)); exit(EXIT_FAILURE); } fstat(fd, &sb); memblk = mmap(NULL, sb.st_size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); if (memblk == MAP_FAILED) { fprintf(stderr, "err: Can not mmap %s\n", res); exit(EXIT_FAILURE); } addr = memblk + (offset & (sb.st_size - 1)); switch (width) { case 32: /* Write word */ if (val) *val = __be32_to_cpu(*((uint32_t *)addr)); break; case 64: /* Write double */ if (val) *val = __be64_to_cpu(*((uint64_t *)addr)); break; default: fprintf(stderr, "err: Illegal width %d\n", width); rc = -1; } munmap(memblk,sb.st_size); close(fd); return rc; } /** * Read accelerator specific registers. Must be called as root! */ int main(int argc, char *argv[]) { int ch, rc = 0; int card_no = 0; int card_type = DDCB_TYPE_GENWQE; accel_t card; int err_code = 0; int cpu = -1; int width = 64; uint32_t offs; uint64_t val = 0xffffffffffffffffull; uint64_t and_mask = 0xffffffffffffffffull; uint64_t equal_val = val; uint64_t not_equal_val = val; int equal = 0, not_equal = 0; int quiet = 0; unsigned long i, count = 1; unsigned long interval = 0; int mode = DDCB_MODE_WR; int psl_bar = -1; /* -1 disabled */ while (1) { int option_index = 0; static struct option long_options[] = { /* functions */ /* options */ { "card", required_argument, NULL, 'C' }, { "accelerator-type", required_argument, NULL, 'A' }, { "cpu", required_argument, NULL, 'X' }, { "width", required_argument, NULL, 'w' }, { "interval", required_argument, NULL, 'i' }, { "count", required_argument, NULL, 'c' }, { "must-be", required_argument, NULL, 'e' }, { "must-not-be", required_argument, NULL, 'n' }, { "and-mask", required_argument, NULL, 'a' }, /* CAPI specific tweakings */ { "psl-bar", required_argument, NULL, 'p' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "quiet", no_argument, NULL, 'q' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "p:C:A:X:w:i:c:e:n:a:Vqvh", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': card_no = strtol(optarg, (char **)NULL, 0); break; case 'A': /* set card number */ if (strcmp(optarg, "GENWQE") == 0) { card_type = DDCB_TYPE_GENWQE; break; } if (strcmp(optarg, "CAPI") == 0) { card_type = DDCB_TYPE_CAPI; break; } card_type = strtol(optarg, (char **)NULL, 0); break; case 'X': cpu = strtoul(optarg, NULL, 0); break; case 'w': width = strtoul(optarg, NULL, 0); break; case 'p': /* psl-bar */ psl_bar = strtol(optarg, (char **)NULL, 0); break; case 'i': /* interval */ interval = strtol(optarg, (char **)NULL, 0); break; case 'c': /* loop count */ count = strtol(optarg, (char **)NULL, 0); break; case 'e': equal = 1; equal_val = strtoull(optarg, NULL, 0); break; case 'n': not_equal = 1; not_equal_val = strtoull(optarg, NULL, 0); break; case 'a': and_mask = strtoull(optarg, NULL, 0); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'q': quiet++; break; case 'v': verbose_flag = 1; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (optind + 1 != argc) { usage(argv[0]); exit(EXIT_FAILURE); } offs = strtoull(argv[optind], NULL, 0); if (equal && not_equal) { usage(argv[0]); exit(EXIT_FAILURE); } switch_cpu(cpu, verbose_flag); if ((DDCB_TYPE_CAPI == card_type) && (psl_bar != -1)) { capi_read_psl_bar(card_no, psl_bar, width, offs, &val); goto print_result; } ddcb_debug(verbose_flag); /* CAPI need's master flag for Poke */ if (DDCB_TYPE_CAPI == card_type) mode |= DDCB_MODE_MASTER; if ((card_no < 0) || (card_no > 4)) { printf("(%d) is a invalid Card number !\n", card_no); usage(argv[0]); exit(EXIT_FAILURE); } card = accel_open(card_no, card_type, mode, &err_code, 0, DDCB_APPL_ID_IGNORE); if (card == NULL) { fprintf(stderr, "err: failed to open card %u type %u " "(%d/%s)\n", card_no, card_type, err_code, accel_strerror(card, err_code)); exit(EXIT_FAILURE); } for (i = 0; i < count; i++) { switch (width) { case 32: val = accel_read_reg32(card, offs, &rc); break; default: case 64: val = accel_read_reg64(card, offs, &rc); break; } if (rc != DDCB_OK) { fprintf(stderr, "err: could not read [%08x] rc=%d\n", offs, rc); accel_close(card); exit(EXIT_FAILURE); } if ((equal) && (equal_val != (val & and_mask))) { fprintf(stderr, "err: [%08x] %016llx != %016llx\n", offs, (long long)val, (long long)equal_val); accel_close(card); exit(EX_ERR_DATA); } if ((not_equal) && (not_equal_val == (val & and_mask))) { fprintf(stderr, "err: [%08x] %016llx == %016llx\n", offs, (long long)val, (long long)not_equal_val); accel_close(card); exit(EX_ERR_DATA); } if (interval) usleep(interval); } accel_close(card); print_result: if (!quiet) printf("[%08x] %016llx\n", offs, (long long)val); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/tools/genwqe_poke.c000066400000000000000000000222231303345043000176520ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "force_cpu.h" #include int verbose_flag = 0; static int quiet = 0; static const char *version = GIT_VERSION; /** * @brief Prints valid command line options * * @param prog current program name * * Example for CAPI specific MMIO timeout circumvention: * * Read out old value: * sudo ./tools/genwqe_peek -ACAPI -C1 --psl-bar=2 --width=64 0x150 * * Set some bits under mask only bits 36..38: * sudo ./tools/genwqe_poke -ACAPI -C1 --psl-bar=2 --width=64 --mask \ * 0xe000000 0x150 0x4000000 */ static void usage(const char *prog) { printf("Usage: %s [-h] [-v,--verbose]\n" " -C,--card can be (0...3)\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available " "for System p\n" " -V, --version print version.\n" " -q, --quiet quiece output.\n" " -w, --width <32|64> access width, 64: default\n" " -X, --cpu only run on this CPU.\n" " -i, --interval interval in usec, 0: default.\n" " -c, --count number of pokes, 1: default\n" " -r, --read-back read back and verify.\n" " -p, --psl-bar access PSL bar (CAPI only)\n" " -m, --mask x = (x & ~mask) | (val & mask)\n" " \n" "\n" "Example:\n" " genwqe_poke 0x0000000 0xdeadbeef\n" "\n" "Testcase to trigger error recovery code on genwqe card:\n" " Fatal GFIR:\n" " genwqe_poke -C0 0x00000008 0x001\n" " Info GFIR by writing to VF:\n" " genwqe_poke -C2 0x00020020 0x800\n" "\n" " Registers for Capi card (-A CAPI)\n" " FIR Reg: 0x1000 ... 0x1028, 6 Regs\n" " Err. Inj Reg: 0x1800 and 0x1808\n" " Agr. Regs: 0x2000 and 0x2078\n" " Gzip Regs: 0x2100 and 0x2178\n" "\n" " Only CAPI (debugging):\n" " genwqe_poke -ACAPI -C0 --psl-bar=2 --width=64 --mask \n" "\n", prog); } /** * Writing PSL BARs only works in CAPI mode. It directly opens the * PCIe device and bypasses therefore the CXL driver. Handle this with * care, since it can cause unexpected effects if wrong data is * written or accessed. * * We actually need this to setup a circumvention for MMIOs which can * timeout. This is required since the Linux driver could not be * changed as quickly as desired. */ static int capi_write_psl_bar(unsigned int card_no, unsigned int res_no, int width, off_t offset, uint64_t val, uint64_t mask) { int fd, rc = 0; struct stat sb; void *memblk, *addr; uint64_t val64; char res[128]; sprintf(res, "/sys/class/cxl/card%u/device/resource%u", card_no, res_no); fd = open(res, O_RDWR); if (fd < 0) { fprintf(stderr, "err: Can not open %s %s\n", res, strerror(errno)); exit(EXIT_FAILURE); } fstat(fd, &sb); memblk = mmap(NULL, sb.st_size, PROT_WRITE|PROT_READ, MAP_SHARED, fd, 0); if (memblk == MAP_FAILED) { fprintf(stderr, "err: Can not mmap %s\n", res); exit(EXIT_FAILURE); } addr = memblk + (offset & (sb.st_size - 1)); switch (width) { case 32: /* Write word */ if (mask == 0) *((uint32_t *)addr) = __cpu_to_be32(val); else { val64 = __be32_to_cpu(*((uint32_t *)addr)); /* old */ val64 = (val64 & ~mask) | (val & mask); /* new */ *((uint32_t *)addr) = __cpu_to_be32(val64); } break; case 64: /* Write double */ if (mask == 0) *((uint64_t *)addr) = __cpu_to_be64(val); else { val64 = __be64_to_cpu(*((uint32_t *)addr)); /* old */ val64 = (val64 & ~mask) | (val & mask); /* new */ *((uint64_t *)addr) = __cpu_to_be64(val64); } break; default: fprintf(stderr, "err: Illegal width %d\n", width); rc = -1; } munmap(memblk,sb.st_size); close(fd); return rc; } /** * @brief Tool to write to zEDC registers. Must be called as root! */ int main(int argc, char *argv[]) { int ch, rc, rbrc; int card_no = 0; int card_type = DDCB_TYPE_GENWQE; accel_t card; int err_code = 0; int cpu = -1; int width = 64; int rd_back = 0; uint32_t offs; uint64_t val, rbval; uint64_t mask = 0x0ull; /* default is using no mask */ unsigned long i, count = 1; unsigned long interval = 0; int mode = DDCB_MODE_WR; /* Default mode for CAPI and GENWQE Card's */ int psl_bar = -1; /* -1 disabled */ int xerrno; while (1) { int option_index = 0; static struct option long_options[] = { /* functions */ /* options */ { "card", required_argument, NULL, 'C' }, { "accelerator-type", required_argument, NULL, 'A' }, { "cpu", required_argument, NULL, 'X' }, { "width", required_argument, NULL, 'w' }, { "interval", required_argument, NULL, 'i' }, { "count", required_argument, NULL, 'c' }, { "rd-back", no_argument, NULL, 'r' }, /* CAPI specific tweakings */ { "psl-bar", required_argument, NULL, 'p' }, { "mask", required_argument, NULL, 'm' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "quiet", no_argument, NULL, 'q' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "m:p:C:A:X:w:i:c:Vqrvh", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': card_no = strtol(optarg, (char **)NULL, 0); break; case 'A': /* set card number */ if (strcmp(optarg, "GENWQE") == 0) { card_type = DDCB_TYPE_GENWQE; break; } if (strcmp(optarg, "CAPI") == 0) { card_type = DDCB_TYPE_CAPI; break; } card_type = strtol(optarg, (char **)NULL, 0); break; case 'X': cpu = strtoul(optarg, NULL, 0); break; case 'w': width = strtoul(optarg, NULL, 0); break; case 'p': /* psl-bar */ psl_bar = strtol(optarg, (char **)NULL, 0); break; case 'm': mask = strtoll(optarg, (char **)NULL, 0); break; case 'i': /* interval */ interval = strtol(optarg, (char **)NULL, 0); break; case 'c': /* loop count */ count = strtol(optarg, (char **)NULL, 0); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'q': quiet++; break; case 'r': rd_back++; break; case 'v': verbose_flag++; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (optind + 2 != argc) { usage(argv[0]); exit(EXIT_FAILURE); } offs = strtoull(argv[optind++], NULL, 0); val = strtoull(argv[optind++], NULL, 0); rbval = ~val; switch_cpu(cpu, verbose_flag); if ((DDCB_TYPE_CAPI == card_type) && (psl_bar != -1)) { capi_write_psl_bar(card_no, psl_bar, width, offs, val, mask); goto print_result; } ddcb_debug(verbose_flag); /* CAPI need's master flag for Poke */ if (DDCB_TYPE_CAPI == card_type) mode |= DDCB_MODE_MASTER; if ((card_no < 0) || (card_no > 4)) { fprintf(stderr, "err: (%d) is a invalid card number!\n", card_no); usage(argv[0]); exit(EXIT_FAILURE); } card = accel_open(card_no, card_type, mode, &err_code, 0, DDCB_APPL_ID_IGNORE); if (card == NULL) { fprintf(stderr, "err: failed to open card %u type %u " "(%d/%s)\n", card_no, card_type, err_code, accel_strerror(card, err_code)); exit(EXIT_FAILURE); } for (i = 0; i < count; i++) { switch (width) { case 32: rc = accel_write_reg32(card, offs, (uint32_t)val); xerrno = errno; if (rd_back) rbval = accel_read_reg32(card, offs, &rbrc); break; default: case 64: rc = accel_write_reg64(card, offs, val); xerrno = errno; if (rd_back) rbval = accel_read_reg64(card, offs, &rbrc); break; } if (rc != DDCB_OK) { fprintf(stderr, "err: could not write " "%016llx to [%08x]\n" " %s: %s\n", (unsigned long long)val, offs, accel_strerror(card, rc), strerror(xerrno)); accel_close(card); exit(EXIT_FAILURE); } if (rd_back) { if (rbrc != DDCB_OK) { fprintf(stderr, "err: read back failed\n"); accel_close(card); exit(EXIT_FAILURE); } if (val != rbval) { fprintf(stderr, "err: post verify failed\n"); accel_close(card); exit(EXIT_FAILURE); } } if (interval) usleep(interval); } accel_close(card); print_result: if (!quiet) printf("[%08x] %016llx\n", offs, (long long)val); exit(EXIT_SUCCESS); } genwqe-user-4.0.18/tools/genwqe_test_gz000077500000000000000000000262061303345043000201620ustar00rootroot00000000000000#!/bin/bash # # Copyright 2015, 2016 International Business Machines # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Purpose: Test hardware-accelerated compression/decompression for # identical results after 1 or more compression/decompression # iteration(s) of a given set of test files. # # Environment variables for the hardware compression zlib by default # distribute workload over all plugged cards. # export ZLIB_ACCELERATOR=GENWQE export ZLIB_CARD=-1 # Directories used INSTALL_DIR="/usr" # Tool RPM install directory TEST_DATA=${INSTALL_DIR}/share/testdata/testdata.tar.gz TMP_DIR="/tmp" # Temporary directory to use DATA_DIR="${TMP_DIR}/$$_testdata" # directory for testdata ORIG_DATA="${TMP_DIR}/orig" # directory for original data GZIPTOOL="genwqe_gzip" GUNZIPTOOL="genwqe_gunzip" # GenWQE Global values DEVICE_ID=":044b" SUPPORTED_APPID="GZIP" DRIVER_PREFIX=genwqe DRIVER=${DRIVER_PREFIX}_card PROGRAM=`basename $0` CWD=`pwd` ERRLOG=${TMP_DIR}/${PROGRAM}_$$_err.log CLASS_PATH="/sys/class/${DRIVER_PREFIX}/" # GenWQE add supported bit stream for version strings to this array: BIT_STREAMS=( 00000b0330342260.00000002475a4950 \ 0000000330353090.00000002475a4950 ) # Helper variables driver_check=1 # flag for GenWQE card driver iterations=100 # holds the number of repeats nr_instances=10 # number of parallel tests version="https://github.com/ibm-genwqe/genwqe-user" runpids= verbose=0 bitstream_warning=1 # Print helper function to output test depending on verbose flag function printv() { if [ $verbose -gt 0 ]; then echo $* fi } # Helper function to stop any running workload jobs function stop_jobs() { JOBS=`jobs -rp` printv "Running: "$JOBS printv "Expected: ${runpids}" printv "Wait while terminating jobs..." kill -n 15 $JOBS sleep 2 wait JOBS=`jobs -rp` printv "Still running: "$JOBS runpids="" cleanup } # Cleanup helper function called when CTRL-C is pressed or process is killed function cleanup() { rm -rf ${DATA_DIR}_* rm -f ${ORIG_DATA}/.lock_$$ # remove own lockfile which frees the # orig data for own process LOCKFILES=`ls ${ORIG_DATA}/.lock* 2>/dev/null` if [ "$LOCKFILES" == "" ]; then # all lock files removed? printv "Cleaning up source data..." rm -rf ${ORIG_DATA} unset ZLIB_CARD printv "Done" fi } trap stop_jobs SIGINT trap stop_jobs SIGKILL # Print usage message helper function function usage() { echo "Usage of $PROGRAM:" echo " [-h] Print this help message." echo " [-s] Run on software zlib if ${DRIVER} is not loaded." echo " [-S] Run on software zlib even if ${DRIVER} is loaded." echo " [-i] repeat multiple times for more testing." echo " Default: 100" echo " [-p] of this test to run in parallel." echo " Default: 10" echo " [-A] use either GENWQE for the PCIe and CAPI for" echo " CAPI based solution available only on System p" echo " Use SW to use software compress/decompression" echo " [-C] set the compression card to use (0, 1, ... )." echo " RED (or -1) drive work to all available cards." echo " [-t] Define alternate for test-data.tar.gz." echo " [-v] Print status and informational output." echo " -vv for more verbosity" echo " [-V] Print program version (${version})" echo } # Check for tools availability function check_tools { for t in ${GZIPTOOL} ${GUNZIPTOOL} ; do echo -n "Checking if ${t} is there ... " if [ ! -x `which ${t}` ]; then echo "failure" exit -3; fi echo "ok" done } # Check for and create directory holding the original data. # Pass location of original data to this function. function prep_orig_data() { local d=$1 local data=`basename $d` printv "Preparing data..." if [ ! -f ${d} ]; then echo "Testdata \"${d}\" is missing. Please install it first."; echo echo "E.g.:"; echo " wget http://corpus.canterbury.ac.nz/resources/cantrbry.tar.gz"; echo " sudo mkdir -p ${INSTALL_DIR}/share/testdata/"; echo " sudo cp cantrbry.tar.gz ${d}"; echo exit -1; fi if [ ! -d ${ORIG_DATA} ]; then mkdir -p $ORIG_DATA fi cp ${d} ${ORIG_DATA} cd ${ORIG_DATA} tar zxf ${data} rm -f ${ORIG_DATA}/${data} } function ffdc_msg_and_cleanup() { ls -alR ${ORIG_DATA} ${WORK_DIR} >> $ERRLOG; echo "---------------- dmesg output -------------" >> $ERRLOG; dmesg >> $ERRLOG; echo " ========= END OF LOG `date`==========" >> $ERRLOG; echo "$ERRLOG has been written." echo "Open an IBM-internal defect against $DRIVER development," echo "copy all console messages related to the error" echo "and attach $ERRLOG. Thanks." rm -rf ${WORK_DIR} } # Check for and create working directories. # These are deleted upon script termination. function prep_work_and_run_load() { WORK_DIR="${DATA_DIR}_$1" if [ ! -d ${WORK_DIR} ]; then mkdir -p $WORK_DIR fi touch ${ORIG_DATA}/.lock_$$ # mark source data as 'in use' cp -r * ${WORK_DIR}/ cd ${WORK_DIR} # Compress/decompress and compare in each iteration for i in `seq 1 $iterations` ; do if [ $verbose -gt 1 ]; then echo " Run #$i/Instance_$1 : unpacking, packing, comparing... "; fi for file in `ls`; do ${GZIPTOOL} $file; rc=$? if [ $rc -ne 0 ]; then echo "$PROGRAM: ERROR - ${GZIPTOOL} returned RC=$rc" \ | tee $ERRLOG ffdc_msg_and_cleanup exit -4 fi done for file in `ls`; do ${GUNZIPTOOL} $file; rc=$? if [ $rc -ne 0 ]; then echo "$PROGRAM: ERROR - ${GUNZIPTOOL} returned RC=$rc" \ |tee $ERRLOG ffdc_msg_and_cleanup exit -5 fi done for file in `ls`; do diff -q $file ${ORIG_DATA}/$file; rc=$? if [ $rc -ne 0 ]; then echo "$PROGRAM: ERROR - miscompare in run # $i/Instance $1:" \ "File ${WORK_DIR}/$file after successful compression" \ "decompression" | tee $ERRLOG; ffdc_msg_and_cleanup exit -6 fi done done } # Checks an input bitstream value against a fixed list of bit stream versions # in the way that global value bitstream_warning is set as follows # 0 if the bit stream is among the versions $DRIVER supports # 1 if an unsupported bit stream version is on the PCIe card # Parameters: 1. bit stream value of card to verify # 2. Application ID (String) of card to verify, 'GZIP' for comp. function is_supported() { if [ "$2" != "${SUPPORTED_APPID}" ]; then # ignore an unsupported appid, prevent bitstream warning bitstream_warning=0; else for i in `seq 0 $((${#BIT_STREAMS[@]}-1))`; do if [ "$1" == "${BIT_STREAMS[$i]}" ]; then bitstream_warning=0; fi done fi } # GenWQE Prerequisite: The genwqe driver needs to be loaded for this # function to be able to check the bit stream version # function genwqe_check_supported_bitstream_versions() { if [ $ZLIB_CARD -ge 0 ]; then # check APPID appid=`cat ${CLASS_PATH}${DRIVER_PREFIX}${ZLIB_CARD}_card/appid` # check bit stream version for a specific virtual or physical Function ver=`cat ${CLASS_PATH}${DRIVER_PREFIX}${ZLIB_CARD}_card/version` is_supported $ver $appid else # ZLIB_CARD is set to -1. Thus run against all cards, to # check the bit streams. # Determine the number of cards via lspci NR_CARDS=`lspci -d$DEVICE_ID|wc -l` for i in `seq 0 $((NR_CARDS - 1))`; do appid=`cat ${CLASS_PATH}${DRIVER_PREFIX}${i}_card/appid` ver=`cat ${CLASS_PATH}${DRIVER_PREFIX}${i}_card/version` is_supported $ver $appid done fi } # Parse any options given on the command line while getopts "t:A:vVhsSi:C:p:" opt; do case ${opt} in s) driver_check=0; ;; S) driver_check=0; # use software zlib rather than hardware. GZIPTOOL=gzip; GUNZIPTOOL=gunzip; ;; i) iterations=${OPTARG}; ;; p) nr_instances=${OPTARG}; ;; t) TEST_DATA=${OPTARG}; ;; A) export ZLIB_ACCELERATOR=${OPTARG}; ;; C) ZLIB_CARD=${OPTARG}; if [ "${OPTARG}" == "-1" -o "${OPTARG}" == "RED" ]; then ZLIB_CARD=-1; fi ;; v) verbose+=1; ;; V) echo "${version}" exit 0; ;; h) usage; exit 0; ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 exit 1; ;; esac done # Start of main program if [ $ZLIB_ACCELERATOR == "GENWQE" ]; then # warn user if the hardware acceleration for compression/decompression # is not ready driver_loaded=`lsmod|grep $DRIVER` if [ ${driver_check} -eq 1 ]; then if [ "${driver_loaded}" = "" ]; then echo "WARNING: ${DRIVER} is not loaded." \ "No Hardware compression available!" exit -2; fi fi if [ "${driver_loaded}" != "" -a ${driver_check} -eq 1 ]; then genwqe_check_supported_bitstream_versions $ZLIB_CARD if [ $bitstream_warning -eq 1 ]; then 2>&1 echo "ERROR: Unsupported FPGA image (bitstream) detected on one" \ "or more cards." 2>&1 echo " Check the bitstream versions on all of your cards and" 2>&1 echo " update to a supported version for the compression" \ "solution." exit -3; fi fi fi # Check if tools are available check_tools # provide source data prep_orig_data ${TEST_DATA} TStart=`date +%s` # Main part of test driving workload to card or software zlib for i in `seq 1 $nr_instances`; do prep_work_and_run_load $i & newpid=$! if [ $verbose -ge 2 ]; then echo "NewPID: $newpid"; fi runpids=$runpids" "$newpid if [ $verbose -ge 2 ]; then printv "RunPIDs: $runpids"; fi done # Workload is kicked off, wait for it to finish printv "Waiting for jobs to terminate ..." wait # Calculate duration TEnd=`date +%s` T=`expr ${TEnd} - ${TStart}` printv "Runtime: $((T/3600%24)):$((T/60%60)):$((T%60)) [H:M:S]" # Cleanup the test data and the copy of original data cleanup exit 0 genwqe-user-4.0.18/tools/genwqe_tools.h000066400000000000000000000100711303345043000200570ustar00rootroot00000000000000/* * Copyright 2015 International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __GENWQE_TOOLS_H__ #define __GENWQE_TOOLS_H__ #include #include #include #include #include #include #include #include #include #include #include /* clock_gettime and friends */ #include #include #include #include /* standard application exit codes */ #define GENWQE_TOOL_VERS_STRING "3.0.25" #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) ((sizeof((a))/sizeof((a)[0]))) #endif /* ANSI sequences for terminal IO */ #define PR_STD "\x1b[0m" #define PR_STD_BOLD "\x1b[1m" #define PR_STD_BLINK "\x1b[5m" #define PR_INVERS "\x1b[7m" #define PR_RED "\x1b[0;31m" #define PR_RED_BOLD "\x1b[1;31m" #define PR_CYAN "\x1b[0;36m" #define PR_CYAN_BOLD "\x1b[1;36m" #define PR_BLUE "\x1b[0;34m" #define PR_BLUE_BOLD "\x1b[1;34m" #define PR_GREEN "\x1b[0;32m" #define PR_GREEN_BOLD "\x1b[1;32m" #define PR_MAGENTA "\x1b[0;35m" #define PR_MAGENTA_BOLD "\x1b[1;35m" #define ANSI_ERASE "\x1b[2J" #define ANSI_HOME "\x1b[1;1H" #define ANSI_INIT ANSI_ERASE ANSI_HOME /*****************************************************************************/ /** Useful macros in case they are not defined somewhere else */ /*****************************************************************************/ #ifndef ARRAY_SIZE # define ARRAY_SIZE(a) (sizeof((a)) / sizeof((a)[0])) #endif #ifndef ABS # define ABS(a) (((a) < 0) ? -(a) : (a)) #endif #ifndef MAX # define MAX(a,b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a > _b ? _a : _b; }) #endif #ifndef MIN # define MIN(a,b) ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ _a < _b ? _a : _b; }) #endif /** * Common tool return codes * 0: EX_OK/EXIT_SUCCESS * 1: Catchall for general errors/EXIT_FAILURE * 2: Misuse of shell builtins (according to Bash documentation) * 64..78: predefined in sysexits.h * * 79..128: Exit codes for our applications * * 126: Command invoked cannot execute * 127: "command not found" * 128: Invalid argument to exit * 128+n: Fatal error signal "n" * 255: Exit status out of range (exit takes only integer args in the * range 0 - 255) */ #define EX_ERRNO 79 /* libc problem */ #define EX_MEMORY 80 /* mem alloc failed */ #define EX_ERR_DATA 81 /* data not as expected */ #define EX_ERR_CRC 82 /* CRC wrong */ #define EX_ERR_ADLER 83 /* Adler checksum wrong */ #define EX_ERR_CARD 84 /* accelerator problem */ #define EX_COMPRESS 85 /* compression did not work */ #define EX_DECOMPRESS 86 /* decompression failed */ #define EX_ERR_DICT 87 /* dictionary compare failed */ /** common error printf */ #define pr_err(fmt, ...) do { \ fprintf(stderr, "%s:%u: Error: " fmt, \ __FILE__, __LINE__, ## __VA_ARGS__); \ } while (0) /** _dbg_flag must be defined elsewhere */ extern int _dbg_flag; #define pr_dbg(fmt, ...) do { \ if (_dbg_flag) \ fprintf(stdout, fmt, ## __VA_ARGS__); \ } while(0) /** verbose_flag must be defined elsewhere */ extern int verbose_flag; #define pr_info(fmt, ...) do { \ if (verbose_flag) \ fprintf(stdout, fmt, ## __VA_ARGS__); \ } while (0) /* FIXME Fake this for old RHEL versions e.g. RHEL5.6 */ #ifndef CLOCK_MONOTONIC_RAW #define clock_gettime(clk_id, tp) ({ int val = 0; val; }) #endif const char *genwqe_regname(uint32_t addr); #endif /* __GENWQE_TOOLS_H__ */ genwqe-user-4.0.18/tools/genwqe_update.c000066400000000000000000000250161303345043000202010ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file genwqe_update.c * @brief Genwqe SW utility. * This utility updates the Genwqes Flash with an new image from a *.rbf file * */ #include #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "libcard.h" /* Sysfs entry to figure out the card type */ #define SYSFS_GENWQE_TYPE \ "/sys/class/" GENWQE_DEVNAME "/" GENWQE_DEVNAME "%d_card/type" int verbose_flag = 0; static const char *version = GIT_VERSION; static char sysfs_type[128] = "unknown"; struct genwqe_type { const char *card_id; size_t rbf_size; }; /** * The size of the update file *.rbf is an architected value depending * on the chip used. If we should ever support a different chip we * need to enhance this list or use the --force option. */ static const struct genwqe_type card_types[] = { { .card_id = "GenWQE5-A7", .rbf_size = 33747356 }, /* standard card */ { .card_id = "GenWQE5-A4", .rbf_size = 26724840 }, /* small card */ { .card_id = "GenWQE5-530", .rbf_size = 21465258 }, /* big old */ { .card_id = "GenWQE4-230", .rbf_size = 11819684 }, /* small old */ { .card_id = NULL, .rbf_size = 0 }, /* termination */ }; static size_t get_rbf_size(char *card_type) { const struct genwqe_type *t; for (t = &card_types[0]; t->card_id != NULL; t++) { if (strcmp(card_type, t->card_id) == 0) return t->rbf_size; } return 0; } /** * Find out the card type. The card type indicates the size of the * update file *.rbf. This we need to know to do some sanity checking * to prevent folks from shooting into their food. */ static int read_card_type(int card_no) { int rc; char sysfs[128]; FILE *fp; snprintf(sysfs, sizeof(sysfs), SYSFS_GENWQE_TYPE, card_no); sysfs_type[sizeof(sysfs)-1] = 0; fp = fopen(sysfs, "r"); if (fp == NULL) return -1; rc = fread(sysfs_type, 1, sizeof(sysfs_type), fp); sysfs_type[sizeof(sysfs_type)-1] = 0; if (rc <= 0) { fclose(fp); return -2; } sysfs_type[strlen(sysfs_type)-1] = 0; /* remove trailing '\n' */ fclose(fp); return 0; } /** * str_to_num - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { printf("Usage: %s [-h, --help] [-v,--verbose] [-C,--card ]\n" "\t[-V, --version]\n" "\t[-v, --verbose]\n" "\t[-f, --file ]\n" "\t[-p, --partition ] Default: 1\n" "\t[-x, --verify <0:no|1:yes>]\n" "\n" "This utility updates the Genwqes FLASH with an new image\n" "from an *.rbf file. Do not disconnect the card from power\n" "while updating. Ensure you have the correct update\n" "image. Use of incorrect images or interrupting the update\n" "will make the card unusable." "\nExample flashing a Genwqe default Partition (Partition 1):\n" " %s -C0 -f chip_a5_latest.rbf\n" "\nExample flashing a Genwqe backup Partition (Partition 0):\n" " %s -C0 -p 0 -f chip_a5_latest.rbf\n" "\n" "Please note that updating the card can take some time.\n" "So please be patient and do not try to abort this process,\n" "because this might corrupt the card image, and the card\n" "won't work as expected afterwards.\n" "\n", prog, prog, prog); } static void print_move_flash_results(int retc, int attn, int progress) { printf(" RETC: %x\n", retc); printf(" ATTN: %x ", attn); switch (attn) { case 0x0000: printf("OK\n"); break; case 0x0001: printf("Parse Error (length wrong, addr bad, ...)\n"); break; case 0x0002: printf("CRC Error (data)\n"); break; case 0x0003: printf("Flash programmer timeout/sequence err.\n"); break; case 0x0004: printf("DMA Timeout\n"); break; case 0x0005: printf("Out of Bound (Addr. collision with images)\n"); break; case 0xe001: printf("Allication logicIssued a RC not equal to " "0x102, 0x104, or 0x108\n"); break; case 0xe002: printf("Allication violated SQB protocol\n"); break; case 0xe003: printf("LEM Attention\n"); break; case 0xe004: printf("Timeout (recoverable). Application quieced " "successfully.\n"); break; case 0xe005: printf("Application times out, Quiece unsuccessful.\n"); break; case 0xe006: printf("Queue Access Error\n"); break; case 0xe007: printf("DMA engine override\n"); break; case 0xf000: printf("Bad ICRC"); break; case 0xf001: printf("Out of Sequence\n"); break; case 0xf002: printf("Unsupported Preamble\n"); break; case 0xf003: printf("Unsupported ACFUNC\n"); break; case 0xf004: printf("SHI mis-sequenced\n"); break; case 0xf005: printf("Illegal VF access\n"); break; default: printf("unknown\n"); break; } printf(" PROGRESS: %x ", progress); switch (progress) { case 0x0000: printf("Command Retrieved.\n"); break; case 0x0100: printf("Sector Number N erased\n"); break; case 0x0200: printf("All Secors Erased.\n"); break; case 0x0201: printf("1st Block flashed.\n"); break; case 0x0203: printf("Half Programmed.\n"); break; } } /** * @brief main function for update Genwqe's Image Flash */ int main(int argc, char *argv[]) { int ch, rc, err_code; int card_no = 0; int read_back = 0; int verify = 1; int force = 0; card_handle_t card; struct card_upd_params upd; char *env; char *pext; size_t rbf_size; memset(&upd, 0, sizeof(upd)); upd.partition = '1'; /* Set Default Partition */ while (1) { int option_index = 0; static struct option long_options[] = { { "file", required_argument, NULL, 'f' }, { "card", required_argument, NULL, 'C' }, { "verify", required_argument, NULL, 'x' }, /* options */ { "partition", required_argument, NULL, 'p' }, /* misc/support */ { "version", no_argument, NULL, 'V' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "C:f:vVhp:x:", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': card_no = strtol(optarg, (char **)NULL, 0); break; case 'f': upd.fname = optarg; break; case 'p': upd.partition = *optarg; break; case 'x': verify = strtol(optarg, (char **)NULL, 0); break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': printf("%s\n", version); exit(EXIT_SUCCESS); case 'v': verbose_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (optind < argc) /* input file */ upd.fname = argv[optind++]; if (optind != argc) { /* now it must fit */ usage(argv[0]); exit(EXIT_FAILURE); } genwqe_card_lib_debug(verbose_flag); /* simulation is not supported with this tool */ env = getenv("GENWQE_SIM"); if ((env) && (atoi(env) > 0)) { pr_err("driver / HW simulation active !\n"); exit(EXIT_FAILURE); } read_card_type(card_no); printf("Card Type: %s\n", sysfs_type); rbf_size = get_rbf_size(sysfs_type); printf("RBF Size: %d bytes\n", (int)rbf_size); if (upd.flength == 0) upd.flength = rbf_size; /* take default if nothing is set */ /* check consistency of parameters */ if (upd.fname == NULL) { pr_err("no input/output file!\n"); exit(EX_NOINPUT); } pext = strrchr(upd.fname, '.'); if (!force && ((pext == NULL) || (strncmp(pext, ".rbf", 5) != 0))) { pr_err("'%s' is not an *.rbf file!\n", upd.fname); exit(EX_NOINPUT); } /* Check for 0 and 1 only. Partition v is used for VPD */ if (upd.partition != '0' && upd.partition != '1') { pr_err("partition '%c' invalid\n", isprint(upd.partition) ? upd.partition : '?'); exit(EX_USAGE); } /* Open The Card */ card = genwqe_card_open(card_no, GENWQE_MODE_RDWR, &err_code, 0, GENWQE_APPL_ID_IGNORE); if (card == NULL) { pr_err("cannot open card %d! (err=%d)\n", card_no, err_code); exit(EXIT_FAILURE); } /* now do the flash update */ if (read_back) { if (upd.flength == 0) { pr_err("don't forget to specify a size!\n"); rc = EXIT_FAILURE; goto __exit; } rc = genwqe_flash_read(card, &upd); if (rc < 0) { int xerrno = errno; pr_err("reading bitstream failed!\n" " %s (errno=%d/%s)\n", card_strerror(rc), xerrno, strerror(xerrno)); print_move_flash_results(upd.retc, upd.attn, upd.progress); rc = EXIT_FAILURE; goto __exit; } } else { struct stat s; rc = lstat(upd.fname, &s); if (rc != 0) { pr_err("cannot find %s!\n", upd.fname); rc = EXIT_FAILURE; goto __exit; } if (!force && (s.st_size != (ssize_t)upd.flength)) { pr_err("file size %d bytes does not match required " "size of bitstream %d bytes!\n", (int)s.st_size, upd.flength); rc = EXIT_FAILURE; goto __exit; } rc = genwqe_flash_update(card, &upd, verify); if (rc < 0) { int xerrno = errno; if (xerrno == ENOSPC) { pr_info("old bitstream with broken readback. " "Skipping verification.\n"); rc = EXIT_SUCCESS; goto __exit; } pr_err("update process failed!\n" " %s (errno=%d/%s)\n" " Please ensure that you do not see " "HW222218 where we had problems reading " "flash.\n", card_strerror(rc), xerrno, strerror(xerrno)); print_move_flash_results(upd.retc, upd.attn, upd.progress); rc = EXIT_FAILURE; } } __exit: genwqe_card_close(card); if (rc == EXIT_SUCCESS) printf("update process succeeded\n"); exit(rc); } genwqe-user-4.0.18/tools/genwqe_vpd.csv000066400000000000000000000007601303345043000200600ustar00rootroot00000000000000// Example VPD for GenWQE PCIe card "RV",2,"X",102," VPD Revision Fied",,, "PN",7,"A","0ABCDE"," Card Part Number",,, "EC",7,"A","D77TBDX"," Card EC",,, "FN",7,"A","00ABCDE"," FRU Part Number",,, "SN",13,"A","0000Coxxx1111"," Serial Number",,, "FC",5,"A","FCTBD"," Feature Code",,, "CC",4,"A","CCIN"," IBM CCIN",,, "M0",6,"X","D1B2C3E4F5F6"," MAC 0",,, "M1",6,"X","A1A2A3A4A5A6"," This is MAC 1 Address",,, "CS",4,"X",7abc8b0e," Check Sum","more "," more "," more test" "// END of VPD",101,,,,,, genwqe-user-4.0.18/tools/genwqe_vpd_common.c000066400000000000000000000220051303345043000210530ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * * VPD utility for GENWQE Project * * VPD Tools from CSV file to binary file * used for making the vpd bin file from a cvs file. * The input file format is defined and fix. * This tool can alos convert a binary file back to * the original CSV file. */ #include #include #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "genwqe_vpd.h" #define MAX_LINE 512 #define GENWQE_VPD_BUFFER_SIZE (64*1024) extern int _dbg_flag; static uint32_t genwqe_crc32_lut[256]; // Search for this in collum 6 in order to add crc32 static char crc_token[]={"CS"}; void genwqe_crc32_setup_lut(void) { int i, j; uint32_t crc; for (i = 0; i < 256; i++) { crc = i << 24; for ( j = 0; j < 8; j++ ) { if (crc & 0x80000000) crc = (crc << 1) ^ CRC32_POLYNOMIAL; else crc = (crc << 1); } genwqe_crc32_lut[i] = crc; } } uint32_t genwqe_crc32_gen(uint8_t *buff, size_t len, uint32_t init) { int i; uint32_t crc; crc = init; while(len--) { i = ((crc >> 24) ^ *buff++) & 0xFF; crc = (crc << 8) ^ genwqe_crc32_lut[i]; } return crc; } static uint8_t a2h(char c) { if (c >= 'A') c -= 7; c = c & 0x0f; return (uint8_t)c; } /** * Converts BIN to CSV file * * op: Output File Pointer * fs: Size of input data in buffer * buffer: a buffer to read the data */ bool bin_2_csv(FILE *op, int fs, uint8_t *buffer) { char mode; char label[4]; int length; int n = 0; int vpd_check_line = 0; union swap_me swap; swap.ui32 = 0; while (fs) { label[0] = *buffer++; label[1] = *buffer++; label[2] = 0; length = *buffer++; mode = *buffer++; if (0 != strcmp(label, vpd_ref_102[vpd_check_line].label)) { pr_err("Binary File Got: %s Expect: %s\n", label, vpd_ref_102[vpd_check_line].label); return false; } if (length != vpd_ref_102[vpd_check_line].length) { pr_err("Binary File Got: %d Expect: %d\n", length, vpd_ref_102[vpd_check_line].length); return false; } if (mode != *vpd_ref_102[vpd_check_line].mode) { pr_err("Binary File Got: %c Expect: %c\n", mode, *vpd_ref_102[vpd_check_line].mode); return false; } fprintf(op, "\"%s\",%d,\"%c\",", label, length, mode); if ('A' == mode) { fprintf(op, "\""); for(n = 0; n < length; n++) { if (isprint(*buffer)) // Check if valid, ignore 0 fprintf(op, "%c", *buffer); buffer++; } fprintf(op, "\""); } if ('X' == mode) { if(0 == vpd_check_line) { /* the first Line must have the correct Version */ swap.BYTE.ub8[2] = *buffer; swap.BYTE.ub8[3] = *(buffer+1); swap.ui32 = __be32_to_cpu(swap.ui32); if (swap.ui32 != VPD_VERSION) { pr_err("Wrong Version: %x Expect: %x\n", swap.ui32, VPD_VERSION); return false; } } for(n = 0; n < length; n++) fprintf(op, "%2.2x", *buffer++); } fprintf(op, ",\n"); // Terminate line with , fs -= (4 + length); vpd_check_line++; } /* Check if i did match all tokens */ pr_info("Check for %d of %d tokens in bin file.\n", vpd_check_line, (int)LINES_IN_VPD); if (LINES_IN_VPD != vpd_check_line) { pr_err("%d of %d tokens fond in input.\n", vpd_check_line, (int)LINES_IN_VPD); return false; } return true; } /** * * Converts CSV file to binary file * * Returns true if no errors * * ip: Binary Input file * buffer: Ptr. to Buffer for input data * size: Ptr. to size of data in buffer * crc32_result: Ptr. to CRC of data from input stream (is 0 if match) * crc32_from_csv: Ptr. to the CRC value i found in the CVS file */ bool csv_2_bin(FILE *ip, uint8_t *buffer, int *size, uint32_t *crc32_result, uint32_t *crc32_from_csv) { char line[MAX_LINE]; char token[MAX_LINE]; uint8_t data[MAX_LINE]; unsigned line_nr = 0; uint32_t crc32; uint8_t mode = 0; int i, j; bool parse_error; bool get_crc_value = false; uint8_t ln, hn; // Low nibble and high nibble for converter int data_len, write_size, seek_offset, vpd_check_line; union swap_me csv_crc; union swap_me swap; int good_lines = 0; write_size = 0; seek_offset = 0; crc32 = CRC32_INIT_SEED; vpd_check_line = 0; swap.ui32 = 0; while (NULL != fgets(line, MAX_LINE, ip)) { int field_num = 0; int num_fields; int line_len; int n; ++line_nr; num_fields = 0; memset(token, 0, MAX_LINE); memset(data, 0, MAX_LINE); j = 0; line[strlen(line) - 2] = '\0'; /* remove newline character */ line_len = (int)strlen(line); pr_dbg("Line (#%d) %d: <%s>\n", line_nr, line_len, line); field_num = 0; parse_error = false; for (i = 0; i <= line_len; i++) { switch (line[i]) { case ',': case '\0': // End of Line switch (field_num) { case 0: /* 2 Bytes KEY */ n = strlen(&token[0]); if (2 == n) { data[0] = token[0]; data[1] = token[1]; if (0 == strncmp(crc_token, token, 2)) get_crc_value = true; // Set flag to read in crc value if (0 != strcmp(&token[0], vpd_ref_102[vpd_check_line].label)) parse_error = true; } else { i = line_len; // Exit this line only, no Error, just ignore it break; } break; case 1: /* 1 Byte LEN */ parse_error = true; n = sscanf(&token[0], "%d", &data_len); if (1 == n) { if (data_len == vpd_ref_102[vpd_check_line].length) { data[2] = data_len; write_size = 2 + 1 + 1 + data_len; // how many bytes to write parse_error = false; } } break; case 2: /* 1 Byte Mode can be A or X */ parse_error = true; if (1 == strlen(&token[0])) { mode = token[0]; if (0 == strcmp(&token[0], vpd_ref_102[vpd_check_line].mode)) parse_error = false; } break; case 3: /* Data */ parse_error = true; data[3] = mode; // Save mode in Output data n = strlen(&token[0]); if ('A' == mode) { // ASCII mode if (n <= data_len) { memcpy(&data[4], token, n); parse_error = false; } } else if ('X' == mode) { // HEX Mode if (n <= (2* data_len)) { /* start to convert from the end of the line */ while (n) { n--; ln = a2h(token[n]); if (n) { n--; hn = a2h(token[n]); ln |= hn << 4; // combine } data[3+data_len] = ln; data_len--; } parse_error = false; if (0 == vpd_check_line) { /* the first Line must have the correct Version */ swap.BYTE.ub8[2] = data[4]; swap.BYTE.ub8[3] = data[5]; swap.ui32 = __be32_to_cpu(swap.ui32); if (swap.ui32 != VPD_VERSION) { pr_err("Wrong VPD Version found %x\n", swap.ui32); parse_error = true; } } if (get_crc_value) { /* Get CRC from source data */ get_crc_value = false; csv_crc.BYTE.ub8[0] = data[4]; csv_crc.BYTE.ub8[1] = data[5]; csv_crc.BYTE.ub8[2] = data[6]; csv_crc.BYTE.ub8[3] = data[7]; csv_crc.ui32 = __be32_to_cpu(csv_crc.ui32); } } } if (!parse_error) good_lines++; break; default: break; } if (!parse_error) { j = 0; token[j] = '\0'; field_num++; num_fields++; } break; default: if (0x22 == line[i]) // Skip " continue; token[j++] = line[i]; token[j] = '\0'; break; } if (parse_error) break; } if (parse_error) { pr_err("Line# %d Field: %d Syndrom: <%s>\n", line_nr, field_num, token); return (false); } if (num_fields < 4) { pr_dbg("Skip Line# %d\n", line_nr); continue; } else { /* Add Data to Output buffer */ pr_dbg("Line# %d OK Num Fields %d Offset: %d Size: %d\n", line_nr, num_fields, seek_offset, write_size); memcpy(&buffer[seek_offset], data, write_size); seek_offset += write_size; crc32 = genwqe_crc32_gen(data, write_size, crc32); if (seek_offset > GENWQE_VPD_BUFFER_SIZE) { pr_err("Exit due to out of buffer size %d\n", seek_offset); parse_error = true; break; } } vpd_check_line++; } *size = seek_offset; *crc32_result = crc32; *crc32_from_csv = csv_crc.ui32; // Check if all is ok (FIXME: do i need to check the size also ?? // if (VPD_SIZE != seek_offset) // return (false); if ((parse_error) || (LINES_IN_VPD != good_lines)) return (false); return (true); } genwqe-user-4.0.18/tools/genwqe_vpdconv.c000066400000000000000000000137501303345043000204000ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * * Converter utility for GenWQE Project * * convert from CSV file to binary file * used for making the vpd bin file from a cvs file. * The input file format is defined and fix. * This tool can alos convert a binary file back to * the original CSV file. */ #include #include #include #include #include #include #include #include #include #include #include "genwqe_tools.h" #include "genwqe_vpd.h" static const char *version = GIT_VERSION; int verbose_flag = 0; int _dbg_flag = 0; static void usage(char *name) { printf("Usage: %s [OPTIONS]...\n" "\n" "Mandatory arguments to long options are mandatory for short options too.\n" " -h, --help print usage information\n" " -V, --version print version\n" " -i, --input=FILE input filename, uses stdin if option is missing\n" " -o, --output=FILE output filename, uses stdout if option is missing\n" " -v, --verbose verbose mode, multiple v's to increase verbosity\n" " --crcoff do not check and correct crc in output File\n" " --reverse takes as input a binaray file and creates a CSV output file\n" "\n" "This utility converts a comma separated VPD file (CSV file) for the GenWQE Card\n" "to a binary file which can be used for flash programming for VPD data.\n" "The CVS input file format (0x%x) is fix. Only the data can be changed.\n", name, VPD_VERSION); } /* Global flags modified by getopt_long() function */ static int make_crc = 1; static int reverse_mode = 0; static struct option long_options[] = { { "input", required_argument, NULL, 'i' }, { "output", required_argument, NULL, 'o' }, { "version", no_argument, NULL, 'V' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { "crcoff", no_argument, &make_crc, 0 }, { "reverse", no_argument, &reverse_mode, 1 }, { 0, no_argument, NULL, 0 }, }; /** * Get command line parameters and create the output file. */ int main(int argc, char *argv[]) { int option; char *input_file = NULL; char *output_file = NULL; FILE *ip = NULL, *op = NULL; uint8_t *buffer = NULL; uint32_t crc32, crc32_from_csv; int size, rc = EXIT_SUCCESS; union swap_me new_crc32; size_t file_size; /* Process command line args */ while (1) { int option_index = 0; option = getopt_long(argc, argv, "i:o:vVh", long_options, &option_index); if (EOF == option) /* all params processed ? */ break; switch (option) { case 0: /* Long options will go here, but i have nothing to do */ break; case 'i': if (NULL != optarg) input_file = optarg; else { usage(argv[0]); exit(EXIT_FAILURE); } break; case 'o': if (NULL != optarg) output_file = optarg; else { usage(argv[0]); exit(EXIT_FAILURE); } break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': fprintf(stdout, "%s\n", version); exit(EXIT_SUCCESS); break; case 'v': verbose_flag++; if (verbose_flag >1) _dbg_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); break; } } if (optind < argc) { input_file = argv[optind++]; if (optind < argc) { output_file = argv[optind++]; if (optind < argc) { pr_err("Too many args\n"); exit(EXIT_FAILURE); } } } if (input_file) { pr_dbg("Input File: <%s>\n", input_file); ip = fopen(input_file, "r"); if (NULL == ip) { pr_err("%s Open: <%s>\n", strerror(errno), input_file); exit (EXIT_FAILURE); } } if (output_file) { pr_dbg("Output File: <%s>\n", output_file); op = fopen(output_file, "w"); if (NULL == op) { pr_err("%s Open: <%s>\n", strerror(errno), output_file); if (ip) fclose(ip); exit (EXIT_FAILURE); } } /* Use stdin if ip not set */ if (NULL == ip) { pr_dbg("Read from stdin\n"); ip = stdin; } /* Use stdout if op not set */ if (NULL == op) { pr_dbg("Write to stdout\n"); op = stdout; } buffer = malloc(GENWQE_VPD_BUFFER_SIZE); if (buffer) { genwqe_crc32_setup_lut(); if (1 == reverse_mode) { // --reverse option was set file_size = fread(buffer, 1, GENWQE_VPD_BUFFER_SIZE, ip); pr_dbg("Bin file now in buffer = %d\n", (int)file_size); if (VPD_SIZE != file_size) { pr_err("Your Binary input does have %d of %d Bytes\n", (int)file_size, (int)VPD_SIZE); } else bin_2_csv(op, file_size, buffer); } else { if (csv_2_bin(ip, buffer, &size, &crc32, &crc32_from_csv)) { if ((0 != crc32) && (1 == make_crc)) { crc32 = genwqe_crc32_gen(buffer, size-4, CRC32_INIT_SEED); new_crc32.ui32 = __be32_to_cpu(crc32); buffer[size-4] = new_crc32.BYTE.ub8[0]; buffer[size-3] = new_crc32.BYTE.ub8[1]; buffer[size-2] = new_crc32.BYTE.ub8[2]; buffer[size-1] = new_crc32.BYTE.ub8[3]; pr_info("Input CRC: 0x%x -> Good CRC: 0x%x added to Output.\n", crc32_from_csv, crc32); } fwrite(buffer, 1, size, op); } else { rc = EXIT_FAILURE; } } free(buffer); } else { pr_err("Malloc(%d)\n", GENWQE_VPD_BUFFER_SIZE); rc = EXIT_FAILURE; } pr_dbg("Close Input and Output.\n"); fclose(ip); fclose(op); // Close Output may result in a empty file on error pr_info("Exit with rc: %d\n", rc); exit(rc); } genwqe-user-4.0.18/tools/genwqe_vpdupdate.c000066400000000000000000000166241303345043000207200ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * This utility updates the GenWQE flash with VPD data. This version * can only take a binary input file. */ #include #include #include #include #include #include #include #include #include "libcard.h" #include "genwqe_tools.h" #include "genwqe_vpd.h" int verbose_flag = 0; int _dbg_flag = 0; static const char *version = GIT_VERSION; /** * @brief prints valid command line options * * @param prog current program's name */ static void usage(const char *prog) { fprintf(stdout, "Usage: %s [OPTIONS]...\n" "\n" "Mandatory arguments to long options are mandatory for short options too.\n" " -h, --help print usage information\n" " -V, --version print version\n" " -C, --card=CARDNO\n" " -f, --file=VPD.BIN\n" " -d, --dump use multiple d to increase dump info\n" " -u, --update set this flag for update VPD\n" " -s, --show set this flag to display VPD from a card\n" " -v, --verbose verbose level, use multiple v's to increase\n" "\n" "This utility updates the Genwqes FLASH with new VPD\n" "information from a *.bin file. Do not disconnect the card from power\n" "while updating. Ensure you have the correct update\n" "image. Use of incorrect images or interrupting the update\n" "will make the card unusable. In this case you need a\n" "USB-Blaster utility or similar to get it working again.\n" "\n" "Example flashing new vpd to GenWQE card:\n" " %s -C0 -f vpd.bin\n" "Example to display vpd from GenWQE card to stdout:\n" " %s -C0 -s\n" "Example to display and dump vpd from GenWQE card to stdout:\n" " %s -C0 -s -d\n" "\n", prog, prog, prog, prog); } static int __dump_vpd(card_handle_t card, int dump_level, FILE *fp) { bool bin_ok = false; int rc = EXIT_FAILURE; genwqe_vpd vpd; uint32_t crc = 0; rc = genwqe_read_vpd(card, &vpd); if (GENWQE_OK == rc) { if (dump_level) genwqe_hexdump(fp, (uint8_t *)&vpd, VPD_SIZE); pr_info("Checking now Binary VPD data from Card\n"); crc = genwqe_crc32_gen((uint8_t *)&vpd, VPD_SIZE, CRC32_INIT_SEED); if (0 == crc) pr_info("Found Good VPD CRC\n"); else pr_err("Wrong CRC in VPD 0x%x\n", crc); pr_info("Display VPD data from Card\n"); bin_ok = bin_2_csv(fp, VPD_SIZE, (uint8_t *)&vpd); if (bin_ok) rc = EXIT_SUCCESS; else pr_err("Invalid VPD. Use -dd option to dump data.\n"); } else pr_err("Faild to read VPD from Card (%d). Check -C option.\n", rc); return rc; } static int __update_vpd(card_handle_t card, FILE *fp) { bool bin_ok = false; int rc = EXIT_FAILURE; genwqe_vpd vpd; int n; uint32_t crc = 0; n = fread((uint8_t *)&vpd, 1 , VPD_SIZE, fp); if (VPD_SIZE == n) { crc = genwqe_crc32_gen((uint8_t *)&vpd, VPD_SIZE, CRC32_INIT_SEED); if (0 == crc) { pr_dbg("Input data CRC OK, Updating Card Now.\n"); bin_ok = bin_2_csv(stdout, VPD_SIZE, (uint8_t *)&vpd); if (bin_ok) { rc = genwqe_write_vpd(card, &vpd); if (rc == GENWQE_OK) rc = EXIT_SUCCESS; } else pr_err("Invalid input file. Use -v option.\n"); } else pr_err("Invalid CRC: 0x%x in input file.\n", crc); } else pr_err("%s\n", strerror(errno)); return rc; } static struct option long_options[] = { /* functions */ { "read", no_argument, NULL, 'r' }, { "dump", no_argument, NULL, 'd' }, { "update", no_argument, NULL, 'u' }, { "show", no_argument, NULL, 's' }, { "file", required_argument, NULL, 'f' }, { "card", required_argument, NULL, 'C' }, { "version", no_argument, NULL, 'V' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; /** * @brief main function for update Genwqe's Image Flash */ int main(int argc, char *argv[]) { int ch, rc=0, err_code; int card_no = -1; int update_vpd = 0; int show_vpd = 0; card_handle_t card; char *env; char *fname = NULL; FILE *fp_in = NULL; // Input file FILE *fp_out = NULL; // Output file while (1) { int option_index = 0; ch = getopt_long(argc, argv, "vdusC:f:Vh", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'C': /* -C or --card */ card_no = strtol(optarg, (char **)NULL, 0); break; case 'f': /* -f or --file */ fname = optarg; break; case 'u': /* -u or --update */ update_vpd = 1; break; case 's': /* -s or --show */ show_vpd = 1; break; case 'h': usage(argv[0]); exit(EXIT_SUCCESS); break; case 'V': fprintf(stdout, "%s\n", version); exit(EXIT_SUCCESS); case 'v': verbose_flag++; break; case 'd': /* -d or --dump */ _dbg_flag++; break; default: usage(argv[0]); exit(EXIT_FAILURE); } } if (update_vpd && show_vpd) { fprintf(stderr, "Please give only -u or -s Option.\n"); exit(EXIT_FAILURE); } /* Check Options. i expect either show or update, set fp_in and fp_out */ if (update_vpd) { /* -i or --update is set */ if (NULL == fname) { fp_in = stdin; pr_info("Input from stdin.\n"); } else { pr_info("Input File: <%s>\n", fname); fp_in = fopen(fname, "r"); if (NULL == fp_in) { pr_err("%s Open Errno: <%s>\n", fname, strerror(errno)); exit (EXIT_FAILURE); } fp_out = stdout; } } if (show_vpd) { /* -s or --show is set */ if (NULL == fname) { fp_out = stdout; pr_info("Output to stdout.\n"); } else { fp_out = fopen(fname, "w"); if (NULL == fp_out) { pr_err("%s Open Err: <%s>\n", fname, strerror(errno)); exit (EXIT_FAILURE); } } } if ((NULL == fp_in) && (NULL == fp_out)) { fprintf(stderr, "Please give -u or -s Option\n"); exit(EXIT_FAILURE); } /* simulation is not supported with this tool */ env = getenv("GENWQE_SIM"); if ((env) && (atoi(env) > 0)) { pr_err("driver / HW simulation active !\n"); if (show_vpd && fname) fclose(fp_out); if (update_vpd && fname) fclose(fp_in); exit(EXIT_FAILURE); } /* Check for a valid card number */ if (-1 == card_no) { pr_err("Specify a valid GENWQE Card number (e.g. -C 0)\n"); if (show_vpd && fname) fclose(fp_out); if (update_vpd && fname) fclose(fp_in); exit(EXIT_FAILURE); } pr_info("Try to open Card: %d\n", card_no); card = genwqe_card_open(card_no, GENWQE_MODE_RDWR, &err_code, 0, GENWQE_APPL_ID_IGNORE); if (NULL == card) { pr_err("cannot open Genwqe Card: %d (err: %d)\n", card_no, err_code); if (show_vpd && fname) fclose(fp_out); if (update_vpd && fname) fclose(fp_in); exit(EXIT_FAILURE); } /* No do the Action */ genwqe_crc32_setup_lut(); /* Setup CRC lu table */ if (show_vpd) rc = __dump_vpd(card, _dbg_flag, fp_out); if (update_vpd) rc = __update_vpd(card, fp_in); genwqe_card_close(card); /* Close open files */ if (show_vpd && fname) fclose(fp_out); if (update_vpd && fname) fclose(fp_in); exit(rc); } genwqe-user-4.0.18/tools/gzFile_test.c000066400000000000000000000306251303345043000176320ustar00rootroot00000000000000/* * Copyright 2016, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * zpipe.c: example of proper use of zlib's inflate() and deflate() * Not copyrighted -- provided to the public domain * Version 1.4 11 December 2005 Mark Adler */ /* * Test the gzFile functionality provided by zlib.h. Not intended to * use for production and example. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SET_BINARY_MODE(file) /** common error printf */ #define pr_err(fmt, ...) do { \ fprintf(stderr, "gzFile_test: " fmt, ## __VA_ARGS__); \ } while (0) /** common error printf */ #define pr_info(fmt, ...) do { \ fprintf(stderr, fmt, ## __VA_ARGS__); \ } while (0) static const char *version = GIT_VERSION; static unsigned long CHUNK_i = 32 * 1024; static unsigned long CHUNK_o = 8 * 1024; /* too small ;-) */ static int verbose = 0; #if ZLIB_VERNUM < 0x1270 /* Testcase will only handle 32-bit offsets when using zlib version smaller than 1.2.7 */ static gzFile gzopen64(const char *path, const char *mode) { return gzopen(path, mode); } typedef off64_t z_off64_t; static z_off64_t gztell64(gzFile file) { return gztell(file); } static z_off_t gzseek64(gzFile file, z_off64_t offset, int whence) { return gzseek(file, offset, whence); } #endif /** * Common tool return codes * 0: EX_OK/EXIT_SUCCESS * 1: Catchall for general errors/EXIT_FAILURE * 2: Misuse of shell builtins (according to Bash documentation) * 64..78: predefined in sysexits.h * * 79..128: Exit codes for our applications * * 126: Command invoked cannot execute * 127: "command not found" * 128: Invalid argument to exit * 128+n: Fatal error signal "n" * 255: Exit status out of range (exit takes only integer args in the * range 0 - 255) */ #define EX_ERRNO 79 /* libc problem */ #define EX_MEMORY 80 /* mem alloc failed */ #define EX_ERR_DATA 81 /* data not as expected */ #define EX_ERR_CRC 82 /* CRC wrong */ #define EX_ERR_ADLER 83 /* Adler checksum wrong */ #define EX_ERR_CARD 84 /* accelerator problem */ #define EX_COMPRESS 85 /* compression did not work */ #define EX_DECOMPRESS 86 /* decompression failed */ #define EX_ERR_DICT 87 /* dictionary compare failed */ /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void usage(FILE *fp, char *prog) { fprintf(fp, "Usage: %s [OPTION]... [IN_FILE] [OUT_FILE]...\n" "\n" "Special options for testing and debugging:\n" " -v, --verbose\n" " -A, --accelerator-type=GENWQE|CAPI CAPI is only available for IBM System p\n" " -B, --card= -1 is for automatic card selection\n" " -O, --offset= Cut out data at this byte offset.\n" " -s, --size= Cut bytes out.\n" " -i, --i_bufsize input buffer size (%ld KiB)\n" " -o, --o_bufsize output buffer size (%ld KiB)\n" "\n" "Report bugs via https://github.com/ibm-genwqe/genwqe-user.\n" "\n", prog, CHUNK_i/1024, CHUNK_o/1024); } static inline ssize_t file_size(const char *fname) { int rc; struct stat s; rc = lstat(fname, &s); if (rc != 0) { fprintf(stderr, "err: Cannot find %s!\n", fname); return rc; } return s.st_size; } static inline ssize_t file_read(const char *fname, uint8_t *buff, size_t len) { int rc; FILE *fp; if ((fname == NULL) || (buff == NULL) || (len == 0)) return -EINVAL; fp = fopen(fname, "r"); if (!fp) { fprintf(stderr, "err: Cannot open file %s: %s\n", fname, strerror(errno)); return -ENODEV; } rc = fread(buff, len, 1, fp); if (rc == -1) { fprintf(stderr, "err: Cannot read from %s: %s\n", fname, strerror(errno)); fclose(fp); return -EIO; } fclose(fp); return rc; } static inline ssize_t file_write(const char *fname, const uint8_t *buff, size_t len) { int rc; FILE *fp; if ((fname == NULL) || (buff == NULL) || (len == 0)) return -EINVAL; fp = fopen(fname, "w+"); if (!fp) { fprintf(stderr, "err: Cannot open file %s: %s\n", fname, strerror(errno)); return -ENODEV; } rc = fwrite(buff, len, 1, fp); if (rc == -1) { fprintf(stderr, "err: Cannot write to %s: %s\n", fname, strerror(errno)); fclose(fp); return -EIO; } fclose(fp); return rc; } static int do_compress(const char *i_fname, const char *o_fname, size_t chunk_i, size_t chunk_o __attribute__((unused)), int level) { char mode[16]; gzFile ofp; FILE *ifp; ssize_t len; int rc; uint8_t *buf; ifp = fopen(i_fname, "r"); if (ifp == NULL) { pr_err("Could not open %s, %s\n", i_fname, strerror(errno)); return -1; } buf = malloc(chunk_i); if (NULL == buf) { pr_err("%s\n", strerror(errno)); goto err_ifp; } sprintf(mode, "wb%d", level); ofp = gzopen64(o_fname, mode); if (ofp == NULL) { pr_err("Could not open %s\n", o_fname); goto err_buf; } #if ZLIB_VERNUM >= 0x1270 rc = gzbuffer(ofp, chunk_o); if (rc != 0) { pr_err("Could not set gzFile buffer size %d\n", rc); goto err_ofp; } #endif do { len = fread(buf, 1, chunk_i, ifp); if (ferror(ifp)) { pr_err("ferror %d\n", (int)len); goto err_ofp; } rc = gzwrite(ofp, buf, len); if (rc == 0) { pr_err("gzwrite %d\n", rc); goto err_ofp; } if (verbose == 1) pr_info(" gztell64 returned %lld\n", (long long)gztell64(ofp)); } while (!feof(ifp)); gzclose(ofp); free(buf); fclose(ifp); return 0; err_ofp: gzclose(ofp); err_buf: free(buf); err_ifp: fclose(ifp); return -1; } static int do_decompress(const char *i_fname, const char *o_fname, size_t chunk_i, size_t chunk_o __attribute__((unused)), off64_t offs, ssize_t size) { gzFile ifp; FILE *ofp; ssize_t len, written_bytes = 0; int rc; uint8_t *buf; ofp = fopen(o_fname, "w+"); if (ofp == NULL) { pr_err("Could not open %s\n", o_fname); return -1; } buf = malloc(chunk_i); if (NULL == buf) { pr_err("%s\n", strerror(errno)); goto err_ofp; } ifp = gzopen(i_fname, "rb"); if (ifp == NULL) { pr_err("Could not open %s\n", i_fname); goto err_buf; } #if ZLIB_VERNUM >= 0x1270 rc = gzbuffer(ifp, chunk_o); if (rc != 0) { pr_err("Could not set gzFile buffer size %d\n", rc); goto err_ifp; } #endif /* If size is not 0, we intend to cut some data out. Seek to the right offset to start at the right position */ if (size != 0) { off64_t offs_rc; offs_rc = gzseek64(ifp, offs, SEEK_SET); if (offs_rc == offs) { pr_err("Could not seek %lld to desired offset %lld\n", (long long)offs_rc, (long long)offs); goto err_ifp; } } do { len = gzread(ifp, buf, chunk_i); if (len < 0) { pr_err("gzread error %d\n", (int)len); goto err_ifp; } if (verbose == 1) pr_info(" gztell64 returned %lld\n", (long long)gztell64(ifp)); if (verbose) pr_info(" read %lld bytes\n", (long long)len); if (len == 0) break; /* If size is not 0, we intend to cut some data out. */ /* We have read a little bit too much. */ if (size != 0) if (written_bytes + len > size) len = size - written_bytes; if (verbose) pr_info(" write %lld bytes\n", (long long)len); rc = fwrite(buf, len, 1, ofp); if (rc < 1) { pr_err("fwrite %d\n", rc); goto err_ifp; } written_bytes += len; /* If size is not 0, we intend to cut some data out. */ /* We have enough data. */ if ((size != 0ull) && (size == written_bytes)) break; if (verbose) pr_err("len=%lld chunk_i=%lld\n", (long long)len, (long long)chunk_i); } while (len <= (int)chunk_i); /* is this right? */ rc = gzclose(ifp); if (rc != Z_OK) { pr_err("gzclose error %d\n", rc); goto err_buf; } free(buf); fclose(ofp); return 0; err_ifp: rc = gzclose(ifp); if (rc != Z_OK) pr_err("gzclose error %d\n", rc); err_buf: free(buf); err_ofp: fclose(ofp); return -1; } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int rc = Z_OK; int level = Z_DEFAULT_COMPRESSION; char *prog = basename(argv[0]); unsigned long size = 0; off64_t offs = 0; const char *i_fname = NULL; /* input */ const char *o_fname = NULL; /* output */ bool use_compress = true; struct stat s; const char *accel = "GENWQE"; const char *accel_env = getenv("ZLIB_ACCELERATOR"); int card_no = 0; const char *card_no_env = getenv("ZLIB_CARD"); /* Use environment variables as defaults. Command line options can than overrule this. */ if (accel_env != NULL) accel = accel_env; if (card_no_env != NULL) card_no = atoi(card_no_env); /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "help", no_argument, NULL, 'h' }, /* our own options */ { "accelerator-type", required_argument, NULL, 'A' }, { "card_no", required_argument, NULL, 'B' }, { "size", required_argument, NULL, 's' }, { "offset", required_argument, NULL, 'O' }, { "decompress", no_argument, NULL, 'd' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "123456789A:B:di:o:s:O:h?Vv", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { case 'A': accel = optarg; break; case 'B': card_no = strtol(optarg, (char **)NULL, 0); break; case 's': size = strtol(optarg, (char **)NULL, 0); break; case 'O': offs = strtol(optarg, (char **)NULL, 0); break; case 'd': use_compress = false; break; case '1': level = Z_BEST_SPEED; break; case '2': level = 2; break; case '3': level = 3; break; case '4': level = 4; break; case '5': level = 5; break; case '6': level = 6; break; case '7': level = 7; break; case '8': level = 8; break; case '9': level = Z_BEST_COMPRESSION; break; case 'v': verbose++; break; case 'V': fprintf(stdout, "%s\n", version); exit(EXIT_SUCCESS); break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'o': CHUNK_o = str_to_num(optarg); break; case 'h': case '?': usage(stdout, prog); exit(EXIT_SUCCESS); break; } } zlib_set_accelerator(accel, card_no); zlib_set_inflate_impl(ZLIB_HW_IMPL); zlib_set_deflate_impl(ZLIB_HW_IMPL); if (optind < argc) { /* input file */ i_fname = argv[optind++]; rc = lstat(i_fname, &s); if (rc != 0) { pr_err("File %s does not exist!\n", i_fname); exit(EX_ERRNO); } if ((rc == 0) && S_ISLNK(s.st_mode)) { pr_err("%s: Too many levels of symbolic links\n", i_fname); exit(EXIT_FAILURE); } } if (optind < argc) { /* output file */ o_fname = argv[optind++]; } else { usage(stderr, prog); exit(EXIT_FAILURE); } if (optind != argc) { /* now it must fit */ usage(stderr, prog); exit(EXIT_FAILURE); } fprintf(stderr, "%sCompress %s to %s in %ld bytes, " "out %ld bytes chunks with level %d (size=%lld, offs=%lld)\n", use_compress ? "" : "De", i_fname, o_fname, CHUNK_i, CHUNK_o, level, (long long)size, (long long)offs); if (use_compress) rc = do_compress(i_fname, o_fname, CHUNK_i, CHUNK_o, level); else rc = do_decompress(i_fname, o_fname, CHUNK_i, CHUNK_o, offs, size); exit(rc); } genwqe-user-4.0.18/tools/zlib_mt_perf.c000066400000000000000000000465041303345043000200320ustar00rootroot00000000000000/* * Copyright 2015, International Business Machines * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* zpipe.c: example of proper use of zlib's inflate() and deflate() Not copyrighted -- provided to the public domain Version 1.4 11 December 2005 Mark Adler */ /* * Mount debugfs on old RHEL systems: * sudo mount -t debugfs /sys/kernel/debug * * Repro hardware multithreaded problem with: * export ZLIB_DEFLATE_IMPL=1 * export ZLIB_INFLATE_IMPL=1 * export ZLIB_IBUF_TOTAL=0 * export ZLIB_OBUF_TOTAL=0 * make && ./zlib_mt_perf -t4 -c1000 -i1KiB -o1KiB -d4KiB -P * * * Check the influence of multithreading on INFLATE performance: * * for t in 1 2 3 4 8 16 32 64 ; do \ * ZLIB_INFLATE_IMPL=0x01 ./tests/zlib/tools/zlib_mt_perf \ * -i32KiB -o32KiB -f test_data.bin.gz -c2 -t$t ; \ * done * * Same for DEFLATE: * for t in 1 2 3 4 8 16 32 64 ; do \ * ZLIB_INFLATE_IMPL=0x01 ./tests/zlib/tools/zlib_mt_perf -D \ * -i32KiB -o32KiB -f test_data.bin -c2 -t$t ; \ * done */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zlib.h" #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) # include # include # define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) #else # define SET_BINARY_MODE(file) #endif /* FIXME Fake this for old RHEL versions e.g. RHEL5.6 */ #ifndef CPU_ALLOC #define CPU_ALLOC(cpus) ({ void *ptr = NULL; ptr; }) #define CPU_ALLOC_SIZE(cpus) ({ int val = 0; val; }) #define CPU_ISSET_S(cpu, size, cpusetp) ({ int val = 0; val; }) #define CPU_FREE(cpusetp) #define CPU_ZERO_S(size, cpusetp) #define CPU_SET_S(run_cpu, size, cpusetp) #define sched_getcpu() ({ int val = 0; val; }) #define sched_setaffinity(x, size, cpusetp) ({ int val = 0; val; }) #endif /* FIXME Fake this for old RHEL versions e.g. RHEL5.6 */ #ifndef CLOCK_MONOTONIC_RAW #define clock_gettime(clk_id, tp) ({ int val = 0; val; }) #endif #ifndef MAX # define MAX(x, y) ((x) > (y) ? (x) : (y)) #endif static const char *version = GIT_VERSION; static pthread_mutex_t mutex; static bool print_hdr = true; static int verbose = 0; static unsigned int count = 0; static unsigned int CHUNK_i = 128 * 1024; /* 16384; */ static unsigned int CHUNK_o = 128 * 1024; /* 16384; */ static unsigned int threads = 1; static struct thread_data *d; static int exit_on_err = 0; static unsigned int infl_ndefl = 1; // inflate static char i_fname[128], c_fname[128]; static unsigned int pin_cpu_ena = 0; static unsigned long int time_ns_threads = 0; #define printfv(level, fmt, ...) do { \ if ((verbose) >= (level)) \ fprintf(stderr, fmt, ## __VA_ARGS__); \ } while (0) struct thread_data { pthread_t thread_id; // Thread id assigned by pthread_create() pid_t tid; // inp: thread id int thread_rc; // ret: rc of thread int cpu; // inp: cpu running on bool first_run; unsigned int comp_calls; // ret: # of compression calls unsigned int decomp_calls; // ret: # of decompression calls unsigned long defl_total; // ret: total bytes compressed unsigned long defl_time; // ret: total time used for compression unsigned long infl_total; // ret: total bytes decompressed unsigned long infl_time; // ret: total time used for decompression unsigned char *in; // inp: pre-alloc memory ptr unsigned char *out; // inp: pro-alloc memory ptr uint32_t checksum; } __attribute__((__may_alias__)); /** * Try to ping process to a specific CPU. Returns the CPU we are * currently running on. */ static int pin_to_cpu(int run_cpu) { cpu_set_t *cpusetp; size_t size; int num_cpus; num_cpus = CPU_SETSIZE; /* take default, currently 1024 */ cpusetp = CPU_ALLOC(num_cpus); if (cpusetp == NULL) return sched_getcpu(); size = CPU_ALLOC_SIZE(num_cpus); CPU_ZERO_S(size, cpusetp); CPU_SET_S(run_cpu, size, cpusetp); if (sched_setaffinity(0, size, cpusetp) < 0) { CPU_FREE(cpusetp); return sched_getcpu(); } /* figure out on which cpus we actually run */ CPU_FREE(cpusetp); return run_cpu; } static pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } static inline unsigned long get_nsec(void) { struct timespec ptime = { .tv_sec = 0, .tv_nsec = 0 }; clock_gettime(CLOCK_MONOTONIC_RAW, &ptime); return ptime.tv_sec * 1000000000 + ptime.tv_nsec; } static inline void *__malloc(size_t size) { int rc; void *ptr; rc = posix_memalign(&ptr, sysconf(_SC_PAGESIZE), size); if (rc != 0) { fprintf(stderr, "err: errno=%d %s\n", errno, strerror(errno)); return NULL; } return ptr; } static inline void __free(void *ptr) { if (!ptr) return; free(ptr); } /* Compress from file source to file dest until EOF on source. def() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_STREAM_ERROR if an invalid compression level is supplied, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int defl(struct thread_data *d, FILE *source, int level) { int ret, flush; //unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; unsigned long int time_ns_beg, time_ns_end; unsigned long int time_ns = 0; in = __malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = __malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate deflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; ret = deflateInit2(&strm, level, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY); if (ret != Z_OK) { __free(in); __free(out); return ret; } /* compress until end of file */ do { strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)deflateEnd(&strm); __free(in); __free(out); return Z_ERRNO; } flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; strm.next_in = in; /* run deflate() on input until output buffer not full, finish compression if all of source has been read in */ do { strm.avail_out = chunk_o; strm.next_out = out; time_ns_beg=get_nsec(); ret = deflate(&strm, flush); /* no bad ret value */ time_ns_end=get_nsec(); time_ns += (time_ns_end - time_ns_beg); d->comp_calls++; assert(ret != Z_STREAM_ERROR); /* not clobbered */ /* Throw away results, we just like to know how fast we are, no checking done. */ // have = chunk_o - strm.avail_out; // if (fwrite(out, 1, have, dest) != have || // ferror(dest)) { // (void)deflateEnd(&strm); // __free(in); // __free(out); // return Z_ERRNO; // } } while (strm.avail_out == 0); assert(strm.avail_in == 0); /* all input will be used */ /* done when last data in file processed */ } while (flush != Z_FINISH); assert(ret == Z_STREAM_END); /* stream will be complete */ d->defl_total += strm.total_in; d->defl_time += time_ns; ret = Z_OK; if (d->first_run) { d->checksum = strm.adler; d->first_run = false; } else if (strm.adler != d->checksum) { fprintf(stderr, "Err: checksum mismatch %08lx != %08x\n", strm.adler, d->checksum); ret = Z_STREAM_ERROR; } /* clean up and return */ (void)deflateEnd(&strm); __free(in); __free(out); return ret; } /* Decompress from file source to file dest until stream ends or EOF. inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be allocated for processing, Z_DATA_ERROR if the deflate data is invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and the version of the library linked do not match, or Z_ERRNO if there is an error reading or writing the files. */ static int infl(struct thread_data *d, FILE *source) { int ret; // unsigned have; z_stream strm; unsigned char *in; unsigned char *out; unsigned int chunk_i = CHUNK_i; unsigned int chunk_o = CHUNK_o; unsigned long int time_ns_beg, time_ns_end; unsigned long int time_ns = 0; in = __malloc(CHUNK_i); if (in == NULL) return Z_ERRNO; out = __malloc(CHUNK_o); if (out == NULL) return Z_ERRNO; /* allocate inflate state */ strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.avail_in = 0; strm.next_in = Z_NULL; ret = inflateInit2(&strm,31); // GZIP Format if (ret != Z_OK) { __free(in); __free(out); return ret; } /* decompress until deflate stream ends or end of file */ do { strm.avail_in = fread(in, 1, chunk_i, source); if (ferror(source)) { (void)inflateEnd(&strm); __free(in); __free(out); return Z_ERRNO; } if (strm.avail_in == 0) break; strm.next_in = in; /* run inflate() on input until output buffer not full */ do { strm.avail_out = chunk_o; strm.next_out = out; time_ns_beg=get_nsec(); ret = inflate(&strm, Z_NO_FLUSH /* Z_SYNC_FLUSH */); time_ns_end=get_nsec(); time_ns += (time_ns_end - time_ns_beg); d->decomp_calls++; /* assert(ret != Z_STREAM_ERROR); *//* not clobbered */ switch (ret) { case Z_NEED_DICT: ret = Z_DATA_ERROR; /* and fall through */ case Z_STREAM_ERROR: case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&strm); __free(in); __free(out); return ret; } /* Throw away results, we just like to know how fast we are, no checking done. */ // have = chunk_o - strm.avail_out; // if (fwrite(out, 1, have, dest) != have || // ferror(dest)) { // (void)inflateEnd(&strm); // __free(in); // __free(out); // return Z_ERRNO; // } } while (strm.avail_out == 0); /* done when inflate() says it's done */ } while (ret != Z_STREAM_END); d->infl_total += strm.total_out; d->infl_time += time_ns; if (d->first_run) { d->checksum = strm.adler; d->first_run = false; } else if (strm.adler != d->checksum) { fprintf(stderr, "Err: checksum mismatch %08lx != %08x\n", strm.adler, d->checksum); ret = Z_STREAM_ERROR; } /* clean up and return */ (void)inflateEnd(&strm); __free(in); __free(out); return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; } /* report a zlib or i/o error */ static void zerr(int ret) { int xerrno = errno; switch (ret) { case Z_ERRNO: fprintf(stderr, "errno=%d: %s\n", xerrno, strerror(xerrno)); if (ferror(stdin)) fputs("error reading stdin\n", stderr); if (ferror(stdout)) fputs("error writing stdout\n", stderr); break; case Z_STREAM_ERROR: fputs("stream error\n", stderr); break; case Z_DATA_ERROR: fprintf(stderr, "invalid or incomplete deflate data (%d)\n", ret); break; case Z_MEM_ERROR: fputs("out of memory\n", stderr); break; case Z_VERSION_ERROR: fputs("zlib version mismatch!\n", stderr); } } /** * str_to_num() - Convert string into number and cope with endings like * KiB for kilobyte * MiB for megabyte * GiB for gigabyte */ static inline uint64_t str_to_num(char *str) { char *s = str; uint64_t num = strtoull(s, &s, 0); if (*s == '\0') return num; if (strcmp(s, "KiB") == 0) num *= 1024; else if (strcmp(s, "MiB") == 0) num *= 1024 * 1024; else if (strcmp(s, "GiB") == 0) num *= 1024 * 1024 * 1024; return num; } static void usage(char *prog) { char *b = basename(prog); printf("%s usage: %s [OPTIONS]\n" " -X, --pin_cpu - pin each thread to own cpu\n" " -t, --threads threads in parallel\n" " -c, --count files to comp/decomp\n" " -i, --i_bufsize \n" " -o, --o_bufsize \n" " -D, --deflate - execute deflate. default: inflate\n" " -f --filename \n" " -v --verbose\n" " -V --version\n" "\n", b, b); } static void *libz_thread_defl(void *data) { int rc; unsigned int i; struct thread_data *d = (struct thread_data *)data; FILE *i_fp; d->defl_total=0; d->defl_time=0; d->comp_calls=0; d->tid = gettid(); d->cpu = sched_getcpu(); d->first_run = true; d->checksum = 0; i_fp = fopen(i_fname, "r"); /* original data */ if (i_fp == NULL) { fprintf(stderr, "ERROR: Can't open file %s\n",i_fname); exit(EXIT_FAILURE); } for (i = 0; (i < count) && (exit_on_err == 0); i++) { rc = defl(d, i_fp, Z_DEFAULT_COMPRESSION); if (rc != Z_OK) { fprintf(stderr, "err/def: rc=%d %s\n", rc, i_fname); zerr(rc); goto exit_failure; } rewind(i_fp); } fclose(i_fp); d->thread_rc = 0; pthread_exit(&d->thread_rc); exit_failure: fclose(i_fp); exit_on_err = 1; d->thread_rc = -2; pthread_exit(&d->thread_rc); } static void *libz_thread_infl(void *data) { int rc; unsigned int i; struct thread_data *d = (struct thread_data *)data; FILE *c_fp; d->infl_total = 0; d->infl_time = 0; d->decomp_calls = 0; d->tid = gettid(); d->cpu = sched_getcpu(); d->first_run = true; d->checksum = 0; printfv(1, " Thread %d using cpu %d\n",d->tid, d->cpu); c_fp = fopen(c_fname, "r"); /* original data */ if (c_fp == NULL) { fprintf(stderr, "Error: Can't open file %s\n",c_fname); exit(EXIT_FAILURE); } for (i = 0; (i < count) && (exit_on_err == 0); i++) { rc = infl(d, c_fp); if (rc != Z_OK) { fprintf(stderr, "%08x.%08x err/inf: rc=%d %s\n", getpid(), gettid(), rc, c_fname); zerr(rc); goto exit_failure; } rewind(c_fp); } fclose(c_fp); d->thread_rc = 0; pthread_exit(&d->thread_rc); exit_failure: fclose(c_fp); exit_on_err = 1; d->thread_rc = -2; pthread_exit(&d->thread_rc); } static int run_threads(struct thread_data *d, unsigned int threads) { int rc; unsigned int i; unsigned long int time_ns_beg, time_ns_end; for (i = 0; i < threads; i++) d[i].thread_rc = -1; time_ns_beg=get_nsec(); // Take system time at thread begins for (i = 0; i < threads; i++) { if ( pin_cpu_ena == 1 ) pin_to_cpu(i); // pin thread to cpu if (infl_ndefl == 1) { rc = pthread_create(&d[i].thread_id, NULL, &libz_thread_infl, &d[i]); } else { rc = pthread_create(&d[i].thread_id, NULL, &libz_thread_defl, &d[i]); } if (rc != 0) { fprintf(stderr, "starting %d. libz_thread failed!\n", i); return EXIT_FAILURE; } } for (i = 0; i < threads; i++) { rc = pthread_join(d[i].thread_id, NULL); if (rc != 0) { fprintf(stderr, "joining threads failed!\n"); return EXIT_FAILURE; } } time_ns_end=get_nsec(); // Take system time at thread ends time_ns_threads += (time_ns_end - time_ns_beg); return EXIT_SUCCESS; } static void __print_deflate_results(struct thread_data *d, unsigned int threads) { unsigned int i, error = 0; unsigned int comp_calls = 0; unsigned long int defl_total = 0; if (print_hdr) printfv(0, "thread ; TID ; err ; " " #defl ; bytes ; time msec ; " " throughput MiB/sec ; checksum ; in/out KiB\n"); for (i = 0; i < threads; i++) { printfv(1, "%6d ; %6ld ; %3d ; " "%6d ; %10ld ; %10ld ; %11.3f ; %08x ;\n", i, (unsigned long)d[i].tid, (int)d[i].thread_rc, d[i].comp_calls, d[i].defl_total, d[i].defl_time / 1000, /* msec */ d[i].defl_time ? d[i].defl_total*1000 / (double)d[i].defl_time : 0.0, d[i].checksum); if (d[i].thread_rc != 0) error = 1; comp_calls += d[i].comp_calls; defl_total += d[i].defl_total; } printfv(0, "%6d ; all ; ; " "%6d ; %10ld ; %10ld ; %11.3f ; %08x ; " "%d/%d\n", i, comp_calls, defl_total, time_ns_threads / 1000, /* msec */ time_ns_threads ? defl_total * 1000/(double)time_ns_threads : 0.0, d[0].checksum, CHUNK_i/1024, CHUNK_o/1024); if (error == 1) { fprintf(stderr, "Error: Thread failed\n"); return; } } static void __print_inflate_results(struct thread_data *d, unsigned int threads) { unsigned int i, error=0; unsigned int decomp_calls=0; unsigned long int infl_total=0; if (print_hdr) printfv(0, "thread ; TID ; err ; " " #defl ; bytes ; time msec ; " " throughput MiB/sec ; checksum ; in/out KiB\n"); for (i = 0; i < threads; i++) { printfv(1, "%6d ; %6ld ; %3d ; " "%6d ; %10ld ; %10ld ; %11.3f ; %08x ;\n", i, (unsigned long)d[i].tid, (int)d[i].thread_rc, d[i].decomp_calls, d[i].infl_total, d[i].infl_time / 1000, /* msec */ d[i].infl_time ? d[i].infl_total * 1000 / (double)d[i].infl_time : 0.0, d[i].checksum); if (d[i].thread_rc != 0) error = 1; decomp_calls += d[i].decomp_calls; infl_total += d[i].infl_total; } printfv(0, "%6d ; all ; ; " "%6d ; %10ld ; %10ld ; %11.3f ; %08x ; " "%d/%d\n", i, decomp_calls, infl_total, time_ns_threads / 1000, /* msec */ time_ns_threads ? infl_total * 1000 / (double)time_ns_threads : 0.0, d[0].checksum, CHUNK_i/1024, CHUNK_o/1024); if (error == 1) { fprintf(stderr, "Error: Thread failed\n"); return; } } static void print_results(void) { if (infl_ndefl) __print_inflate_results(d, threads); else __print_deflate_results(d, threads); } /* compress or decompress from stdin to stdout */ int main(int argc, char **argv) { int rc = EXIT_SUCCESS; /* avoid end-of-line conversions */ SET_BINARY_MODE(stdin); SET_BINARY_MODE(stdout); while (1) { int ch; int option_index = 0; static struct option long_options[] = { { "pin_cpu", no_argument, NULL, 'X' }, { "i_bufsize", required_argument, NULL, 'i' }, { "o_bufsize", required_argument, NULL, 'o' }, { "threads", required_argument, NULL, 't' }, { "count", required_argument, NULL, 'c' }, { "filename", required_argument, NULL, 'f' }, { "deflate", no_argument, NULL, 'D' }, { "pre-alloc-memory", no_argument, NULL, 'P' }, { "no-header", no_argument, NULL, 'N' }, { "version", no_argument, NULL, 'V' }, { "verbose", no_argument, NULL, 'v' }, { "help", no_argument, NULL, 'h' }, { 0, no_argument, NULL, 0 }, }; ch = getopt_long(argc, argv, "Xd:f:Dc:t:i:o:NVvh?", long_options, &option_index); if (ch == -1) /* all params processed ? */ break; switch (ch) { /* which card to use */ case 'X': pin_cpu_ena = 1; break; case 'v': verbose++; break; case 't': threads = str_to_num(optarg); break; case 'c': count = str_to_num(optarg); break; case 'i': CHUNK_i = str_to_num(optarg); break; case 'o': CHUNK_o = str_to_num(optarg); break; case 'f': sprintf(i_fname, "%s", optarg); sprintf(c_fname, "%s", i_fname); break; case 'D': infl_ndefl = 0; break; case 'N': print_hdr = false; break; case 'V': fprintf(stdout, "%s\n", version); exit(EXIT_SUCCESS); break; case 'h': case '?': usage(argv[0]); exit(EXIT_SUCCESS); break; } } d = calloc(threads, sizeof(struct thread_data)); if (d == NULL) return EXIT_FAILURE; atexit(print_results); rc = pthread_mutex_init(&mutex, NULL); if (rc != 0) fprintf(stderr, "err: initializing mutex failed!\n"); rc = run_threads(d, threads); pthread_mutex_destroy(&mutex); exit(rc); }