pax_global_header00006660000000000000000000000064141746552100014517gustar00rootroot0000000000000052 comment=7aef3ff544045681ecf7d3a75bc7dd585322e7e4 xbyak-6.02/000077500000000000000000000000001417465521000125645ustar00rootroot00000000000000xbyak-6.02/.github/000077500000000000000000000000001417465521000141245ustar00rootroot00000000000000xbyak-6.02/.github/workflows/000077500000000000000000000000001417465521000161615ustar00rootroot00000000000000xbyak-6.02/.github/workflows/main.yml000066400000000000000000000003101417465521000176220ustar00rootroot00000000000000name: test on: [push] jobs: build: name: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - run: sudo apt install nasm yasm g++-multilib tcsh - run: make test xbyak-6.02/.gitignore000066400000000000000000000000201417465521000145440ustar00rootroot00000000000000/build* # cmake xbyak-6.02/CMakeLists.txt000066400000000000000000000027611417465521000153320ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.6...3.0.2) project(xbyak LANGUAGES CXX VERSION 6.02) file(GLOB headers xbyak/*.h) if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2) include(GNUInstallDirs) add_library(${PROJECT_NAME} INTERFACE) add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories( ${PROJECT_NAME} INTERFACE "$" "$" ) install( TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}-targets INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} ) include(CMakePackageConfigHelpers) configure_package_config_file( cmake/config.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} ) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" COMPATIBILITY SameMajorVersion ) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} ) install( EXPORT ${PROJECT_NAME}-targets NAMESPACE ${PROJECT_NAME}:: DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} ) elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR) set(CMAKE_INSTALL_INCLUDEDIR "include") endif() install( FILES ${headers} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak ) xbyak-6.02/COPYRIGHT000066400000000000000000000027351417465521000140660ustar00rootroot00000000000000 Copyright (c) 2007 MITSUNARI Shigeo All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the copyright owner nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xbyak-6.02/Makefile000066400000000000000000000004741417465521000142310ustar00rootroot00000000000000PREFIX?=/usr/local INSTALL_DIR=$(PREFIX)/include/xbyak all: $(MAKE) -C sample clean: $(MAKE) -C sample clean install: mkdir -p $(INSTALL_DIR) cp -pR xbyak/*.h $(INSTALL_DIR) uninstall: rm -i $(INSTALL_DIR)/*.h rmdir $(INSTALL_DIR) update: $(MAKE) -C gen test: $(MAKE) -C test test .PHONY: test update xbyak-6.02/cmake/000077500000000000000000000000001417465521000136445ustar00rootroot00000000000000xbyak-6.02/cmake/config.cmake.in000066400000000000000000000001221417465521000165130ustar00rootroot00000000000000@PACKAGE_INIT@ include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") xbyak-6.02/cmake/meson-config.cmake.in000066400000000000000000000003201417465521000176320ustar00rootroot00000000000000@PACKAGE_INIT@ if(NOT TARGET @TARGET_NAME@) add_library(@TARGET_NAME@ INTERFACE IMPORTED) set_target_properties(@TARGET_NAME@ PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "@ABSOLUTE_INCLUDE_DIR@" ) endif() xbyak-6.02/gen/000077500000000000000000000000001417465521000133355ustar00rootroot00000000000000xbyak-6.02/gen/Makefile000066400000000000000000000020251417465521000147740ustar00rootroot00000000000000TARGET=../xbyak/xbyak_mnemonic.h BIN=sortline gen_code gen_avx512 CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers all: $(TARGET) ../CMakeLists.txt ../meson.build sortline: sortline.cpp $(CXX) $(CFLAGS) $< -o $@ gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp $(CXX) $(CFLAGS) $< -o $@ gen_avx512: gen_avx512.cpp ../xbyak/xbyak.h avx_type.hpp $(CXX) $(CFLAGS) $< -o $@ $(TARGET): $(BIN) ./gen_code | ./sortline > $@ echo "#ifdef XBYAK_ENABLE_OMITTED_OPERAND" >> $@ ./gen_code omit | ./sortline >> $@ echo "#endif" >>$@ ./gen_code fixed >> $@ echo "#ifndef XBYAK_DISABLE_AVX512" >> $@ ./gen_avx512 | ./sortline >> $@ echo "#ifdef XBYAK64" >> $@ ./gen_avx512 64 | ./sortline >> $@ echo "#endif" >> $@ echo "#endif" >> $@ VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*") ../CMakeLists.txt: $(TARGET) sed -i -e 's/CXX VERSION [0-9.]*/CXX VERSION $(VER)/' $@ ../meson.build: $(TARGET) sed -i -e "s/version: '[0-9.]*',/version: '$(VER)',/" $@ clean: $(RM) $(BIN) $(TARGET) xbyak-6.02/gen/avx_type.hpp000066400000000000000000000075761417465521000157240ustar00rootroot00000000000000#include // copy CodeGenerator::AVXtype enum AVXtype { // low 3 bit T_N1 = 1, T_N2 = 2, T_N4 = 3, T_N8 = 4, T_N16 = 5, T_N32 = 6, T_NX_MASK = 7, // T_N_VL = 1 << 3, // N * (1, 2, 4) for VL T_DUP = 1 << 4, // N = (8, 32, 64) T_66 = 1 << 5, // pp = 1 T_F3 = 1 << 6, // pp = 2 T_F2 = T_66 | T_F3, // pp = 3 T_ER_R = 1 << 7, // reg{er} T_0F = 1 << 8, T_0F38 = 1 << 9, T_0F3A = 1 << 10, T_L0 = 1 << 11, T_L1 = 1 << 12, T_W0 = 1 << 13, T_W1 = 1 << 14, T_EW0 = 1 << 15, T_EW1 = 1 << 16, T_YMM = 1 << 17, // support YMM, ZMM T_EVEX = 1 << 18, T_ER_X = 1 << 19, // xmm{er} T_ER_Y = 1 << 20, // ymm{er} T_ER_Z = 1 << 21, // zmm{er} T_SAE_X = 1 << 22, // xmm{sae} T_SAE_Y = 1 << 23, // ymm{sae} T_SAE_Z = 1 << 24, // zmm{sae} T_MUST_EVEX = 1 << 25, // contains T_EVEX T_B32 = 1 << 26, // m32bcst T_B64 = 1 << 27, // m64bcst T_B16 = T_B32 | T_B64, // m16bcst T_M_K = 1 << 28, // mem{k} T_VSIB = 1 << 29, T_MEM_EVEX = 1 << 30, // use evex if mem T_FP16 = 1 << 31, T_MAP5 = T_FP16 | T_0F, T_MAP6 = T_FP16 | T_0F38, T_XXX }; // T_66 = 1, T_F3 = 2, T_F2 = 3 uint32_t getPP(int type) { return (type >> 5) & 3; } const int NONE = 256; // same as Xbyak::CodeGenerator::NONE std::string type2String(int type) { std::string str; int low = type & T_NX_MASK; if (0 < low) { const char *tbl[8] = { "T_N1", "T_N2", "T_N4", "T_N8", "T_N16", "T_N32" }; assert(low < int(sizeof(tbl) / sizeof(tbl[0]))); str = tbl[low - 1]; } if (type & T_N_VL) { if (!str.empty()) str += " | "; str += "T_N_VL"; } if (type & T_DUP) { if (!str.empty()) str += " | "; str += "T_DUP"; } if (type & T_F2) { if (!str.empty()) str += " | "; switch (type & T_F2) { case T_66: str += "T_66"; break; case T_F3: str += "T_F3"; break; case T_F2: str += "T_F2"; break; default: break; } } if (type & T_0F) { if (!str.empty()) str += " | "; if (type & T_FP16) { str += "T_MAP5"; } else { str += "T_0F"; } } if (type & T_0F38) { if (!str.empty()) str += " | "; if (type & T_FP16) { str += "T_MAP6"; } else { str += "T_0F38"; } } if (type & T_0F3A) { if (!str.empty()) str += " | "; str += "T_0F3A"; } if (type & T_L0) { if (!str.empty()) str += " | "; str += "VEZ_L0"; } if (type & T_L1) { if (!str.empty()) str += " | "; str += "VEZ_L1"; } if (type & T_W0) { if (!str.empty()) str += " | "; str += "T_W0"; } if (type & T_W1) { if (!str.empty()) str += " | "; str += "T_W1"; } if (type & T_EW0) { if (!str.empty()) str += " | "; str += "T_EW0"; } if (type & T_EW1) { if (!str.empty()) str += " | "; str += "T_EW1"; } if (type & T_YMM) { if (!str.empty()) str += " | "; str += "T_YMM"; } if (type & T_EVEX) { if (!str.empty()) str += " | "; str += "T_EVEX"; } if (type & T_ER_X) { if (!str.empty()) str += " | "; str += "T_ER_X"; } if (type & T_ER_Y) { if (!str.empty()) str += " | "; str += "T_ER_Y"; } if (type & T_ER_Z) { if (!str.empty()) str += " | "; str += "T_ER_Z"; } if (type & T_ER_R) { if (!str.empty()) str += " | "; str += "T_ER_R"; } if (type & T_SAE_X) { if (!str.empty()) str += " | "; str += "T_SAE_X"; } if (type & T_SAE_Y) { if (!str.empty()) str += " | "; str += "T_SAE_Y"; } if (type & T_SAE_Z) { if (!str.empty()) str += " | "; str += "T_SAE_Z"; } if (type & T_MUST_EVEX) { if (!str.empty()) str += " | "; str += "T_MUST_EVEX"; } if (type & T_B32) { if (!str.empty()) str += " | "; if (type & T_B64) { str += "T_B16"; // T_B16 = T_B32 | T_B64 } else { str += "T_B32"; } } else if (type & T_B64) { if (!str.empty()) str += " | "; str += "T_B64"; } if (type & T_M_K) { if (!str.empty()) str += " | "; str += "T_M_K"; } if (type & T_VSIB) { if (!str.empty()) str += " | "; str += "T_VSIB"; } if (type & T_MEM_EVEX) { if (!str.empty()) str += " | "; str += "T_MEM_EVEX"; } return str; } xbyak-6.02/gen/b2hex.cpp000066400000000000000000000004221417465521000150470ustar00rootroot00000000000000#include int main() { puts("enum {"); for (int i = 0; i < 256; i++) { printf(" B"); for (int j = 0; j < 8; j++) { putchar(i & (1 << (7 - j)) ? '1' : '0'); } printf("= %d", i); if (i < 255) putchar(','); putchar('\n'); } puts("};"); return 0; }xbyak-6.02/gen/gen_avx512.cpp000066400000000000000000001461161417465521000157310ustar00rootroot00000000000000#define XBYAK_DONT_READ_LIST #include #include #include "../xbyak/xbyak.h" #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) using namespace Xbyak; #ifdef _MSC_VER #pragma warning(disable : 4996) // scanf #define snprintf _snprintf_s #endif #include "avx_type.hpp" void putOpmask(bool only64bit) { if (only64bit) { puts("void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }"); puts("void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }"); return; } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "kadd", 0x4A }, { "kand", 0x41 }, { "kandn", 0x42 }, { "kor", 0x45 }, { "kxnor", 0x46 }, { "kxor", 0x47 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %sw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x%02X); }\n", p.name, p.code); printf("void %sq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x%02X); }\n", p.name, p.code); printf("void %sb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); printf("void %sd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); } printf("void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }\n"); printf("void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); }\n"); printf("void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); }\n"); } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "knot", 0x44 }, { "kortest", 0x98 }, { "ktest", 0x99 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %sw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x%02X); }\n", p.name, p.code); printf("void %sq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x%02X); }\n", p.name, p.code); printf("void %sb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x%02X); }\n", p.name, p.code); printf("void %sd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x%02X); }\n", p.name, p.code); } } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "kshiftl", 0x32 }, { "kshiftr", 0x30 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %sw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code); printf("void %sq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x%02X, imm); }\n", p.name, p.code + 1); printf("void %sb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code); printf("void %sd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x%02X, imm); }\n", p.name, p.code + 1); } } puts("void kmovw(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W0, 0x90); }"); puts("void kmovq(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_W1, 0x90); }"); puts("void kmovb(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }"); puts("void kmovd(const Opmask& k, const Operand& op) { if (!op.isMEM() && !op.isOPMASK()) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(k, 0, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }"); puts("void kmovw(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W0, 0x91); }"); puts("void kmovq(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_W1, 0x91); }"); puts("void kmovb(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }"); puts("void kmovd(const Address& addr, const Opmask& k) { opVex(k, 0, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }"); puts("void kmovw(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_W0, 0x92); }"); puts("void kmovw(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_W0, 0x93); }"); puts("void kmovb(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }"); puts("void kmovb(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }"); puts("void kmovd(const Opmask& k, const Reg32& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }"); puts("void kmovd(const Reg32& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }"); } // vcmppd(k, x, op) void putVcmp() { const struct Tbl { uint8_t code; const char *name; int type; bool hasIMM; } tbl[] = { { 0xC2, "vcmppd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_YMM | T_66 | T_B64, true }, { 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true }, { 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true }, { 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true }, { 0xC2, "vcmpph", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B16, true }, { 0xC2, "vcmpsh", T_F3 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, { 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, { 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, { 0x76, "vpcmpeqd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_B32, false }, { 0x29, "vpcmpeqq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x64, "vpcmpgtb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, { 0x65, "vpcmpgtw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false }, { 0x66, "vpcmpgtd", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0x37, "vpcmpgtq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x3F, "vpcmpb", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, { 0x3E, "vpcmpub", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0, true }, { 0x3F, "vpcmpw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, { 0x3E, "vpcmpuw", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1, true }, { 0x1F, "vpcmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, { 0x1E, "vpcmpud", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, true }, { 0x1F, "vpcmpq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, { 0x1E, "vpcmpuq", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, true }, { 0x26, "vptestmb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, { 0x26, "vptestmw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0x27, "vptestmd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0x27, "vptestmq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x26, "vptestnmb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, { 0x26, "vptestnmw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0x27, "vptestnmd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0x27, "vptestnmq", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }"); puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }"); } void putVcmpAlias() { const char pred[32][16] = { "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" }; const char suf[][4] = { "pd", "ps", "sd", "ss" }; for (int i = 0; i < 4; i++) { const char *s = suf[i]; for (int j = 0; j < 32; j++) { printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j); } } } // XM_X void putX_XM() { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x6F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x6F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x6F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x6F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x6F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x6F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z }, { 0x7B, "vcvtpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, // putCvt { 0x79, "vcvtpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, { 0x79, "vcvtps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_ER_Z }, { 0xE6, "vcvtqq2pd", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_ER_Z }, { 0x7A, "vcvttpd2qq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, { 0x78, "vcvttpd2uqq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, { 0x78, "vcvttps2udq", T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, { 0x7A, "vcvtudq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z }, { 0x7A, "vcvtuqq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z }, { 0x88, "vexpandpd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, { 0x88, "vexpandps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x89, "vpexpandd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, { 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z }, { 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z }, { 0x42, "vgetexpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, { 0x7D, "vcvtph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, { 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, { 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, { 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z }, { 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, { 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); } puts("void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }"); puts("void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }"); puts("void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }"); puts("void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }"); puts("void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }"); puts("void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }"); puts("void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }"); } void putM_X() { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x7F, "vmovdqa32", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqa64", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu8", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K }, { 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K }, { 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); } } void putXM_X() { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x8A, "vcompresspd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, { 0x8A, "vcompressps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x8B, "vpcompressd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 }, { 0x8B, "vpcompressq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 }, { 0x63, "vcompressb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N1 }, { 0x63, "vcompressw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N2 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p->name, type.c_str(), p->code); } } void putX_X_XM_IMM() { const struct Tbl { uint8_t code; const char *name; int type; bool hasIMM; } tbl[] = { { 0x03, "valignd", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM, true }, { 0x03, "valignq", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM, true }, { 0xDB, "vpandd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, { 0xDB, "vpandq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, { 0xDF, "vpandnd", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW0 | T_B32, false }, { 0xDF, "vpandnq", T_MUST_EVEX | T_YMM | T_66 | T_0F | T_EW1 | T_B64, false }, { 0x3D, "vpmaxsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x3F, "vpmaxuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x39, "vpminsq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x3B, "vpminuq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0xE2, "vpsraq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_N16, false }, { 0x46, "vpsravq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x11, "vpsravw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0x12, "vpsllvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0x10, "vpsrlvw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0xEB, "vpord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0xEB, "vporq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0xEF, "vpxord", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, false }, { 0xEF, "vpxorq", T_66 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x40, "vpmullq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, false }, { 0x8D, "vpermb", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, false }, { 0x8D, "vpermw", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, false }, { 0x65, "vblendmpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x65, "vblendmps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x66, "vpblendmb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, { 0x66, "vpblendmw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, { 0x64, "vpblendmd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x64, "vpblendmq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x7D, "vpermt2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, { 0x7D, "vpermt2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, { 0x7E, "vpermt2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x7E, "vpermt2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x7F, "vpermt2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x7F, "vpermt2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x75, "vpermi2b", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, false }, { 0x75, "vpermi2w", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, false }, { 0x76, "vpermi2d", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x76, "vpermi2q", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x77, "vpermi2ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x77, "vpermi2pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x25, "vpternlogd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true }, { 0x25, "vpternlogq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true }, { 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false }, { 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false }, { 0x43, "vgetexpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, { 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, { 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, { 0x27, "vgetmantsh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, { 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true }, { 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true }, { 0x55, "vfixupimmsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N8, true }, { 0x55, "vfixupimmss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N4, true }, { 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false }, { 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false }, { 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false }, { 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false }, { 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false }, { 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false }, { 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false }, { 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, true }, { 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, true }, { 0x0A, "vrndscalesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, true }, { 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false }, { 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false }, { 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, { 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, { 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false }, { 0x2D, "vscalefsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false }, { 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true }, { 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x15, "vprolvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x15, "vprolvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x14, "vprorvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x14, "vprorvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0xCB, "vrcp28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, { 0xCB, "vrcp28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, { 0xCD, "vrsqrt28sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, false }, { 0xCD, "vrsqrt28ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, false }, { 0x50, "vrangepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, { 0x50, "vrangeps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, { 0x51, "vrangesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, { 0x51, "vrangess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, { 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true }, { 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true }, { 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true }, { 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x70, "vpshldw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, { 0x71, "vpshldd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, { 0x71, "vpshldq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, { 0x70, "vpshldvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, { 0x71, "vpshldvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x71, "vpshldvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, { 0x72, "vpshrdw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, true }, { 0x73, "vpshrdd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, true }, { 0x73, "vpshrdq", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, true }, { 0x72, "vpshrdvw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, { 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x5A, "vcvtsd2sh", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false }, { 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, { 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false }, { 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } void putShift() { const struct Tbl { const char *name; uint8_t code; int idx; int type; } tbl[] = { { "vpsraq", 0x72, 4, T_0F | T_66 | T_YMM | T_MUST_EVEX |T_EW1 | T_B64 }, { "vprold", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, { "vprolq", 0x72, 1, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, { "vprord", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 }, { "vprorq", 0x72, 0, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void %s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } void putExtractInsert() { { const struct Tbl { const char *name; uint8_t code; int type; bool isZMM; } tbl[] = { { "vextractf32x4", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, { "vextractf64x2", 0x19, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, { "vextractf32x8", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, { "vextractf64x4", 0x1B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, { "vextracti32x4", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, { "vextracti64x2", 0x39, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, { "vextracti32x8", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, { "vextracti64x4", 0x3B, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); const char *kind = p.isZMM ? "Operand::MEM | Operand::YMM" : "Operand::MEM | Operand::XMM"; printf("void %s(const Operand& op, const %s& r, uint8_t imm) { if (!op.is(%s)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, %s, 0x%2X, imm); }\n", p.name, p.isZMM ? "Zmm" : "Ymm", kind, type.c_str(), p.code); } } { const struct Tbl { const char *name; uint8_t code; int type; bool isZMM; } tbl[] = { { "vinsertf32x4", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, { "vinsertf64x2", 0x18, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, { "vinsertf32x8", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, { "vinsertf64x4", 0x1A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, { "vinserti32x4", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N16, false }, { "vinserti64x2", 0x38, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N16, false }, { "vinserti32x8", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_YMM | T_N32, true }, { "vinserti64x4", 0x3A, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_YMM | T_N32, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); const char *x = p.isZMM ? "Zmm" : "Ymm"; const char *cond = p.isZMM ? "op.is(Operand::MEM | Operand::YMM)" : "(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))"; printf("void %s(const %s& r1, const %s& r2, const Operand& op, uint8_t imm) {" "if (!%s) XBYAK_THROW(ERR_BAD_COMBINATION) " "opVex(r1, &r2, op, %s, 0x%2X, imm); }\n", p.name, x, x, cond, type.c_str(), p.code); } } } void putBroadcast(bool only64bit) { { const struct Tbl { uint8_t code; const char *name; int type; int reg; } tbl[] = { { 0x7A, "vpbroadcastb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 8 }, { 0x7B, "vpbroadcastw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 16 }, { 0x7C, "vpbroadcastd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 32 }, { 0x7C, "vpbroadcastq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 64}, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); if ((only64bit && p.reg == 64) || (!only64bit && p.reg != 64)) { printf("void %s(const Xmm& x, const Reg%d& r) { opVex(x, 0, r, %s, 0x%02X); }\n", p.name, p.reg, type.c_str(), p.code); } } } if (only64bit) return; puts("void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }"); puts("void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }"); puts("void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }"); puts("void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }"); puts("void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }"); puts("void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }"); puts("void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }"); puts("void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }"); puts("void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }"); puts("void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }"); } void putCvt() { const struct Tbl { uint8_t code; const char *name; int type; int ptn; } tbl[] = { { 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 }, { 0x79, "vcvtss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X, 0 }, { 0x78, "vcvttsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X, 0 }, { 0x78, "vcvttss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X, 0 }, { 0x2D, "vcvtsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 }, { 0x79, "vcvtsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 }, { 0x2C, "vcvttsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 }, { 0x78, "vcvttsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 }, { 0x7B, "vcvtps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 }, { 0x79, "vcvtps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 }, { 0x7A, "vcvttps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 }, { 0x78, "vcvttps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 }, { 0x7A, "vcvtudq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 1 }, { 0x5B, "vcvtph2dq", T_66 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 }, { 0x13, "vcvtph2psx", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, { 0x79, "vcvtph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 }, { 0x5B, "vcvttph2dq", T_F3 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, { 0x78, "vcvttph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 }, { 0x79, "vcvtpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, { 0x5B, "vcvtqq2ps", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, { 0x78, "vcvttpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 2 }, { 0x7A, "vcvtuqq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 }, { 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, { 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, { 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, { 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, { 0x7A, "vcvttph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, { 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, { 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, { 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 }, { 0x5A, "vcvtpd2ph", T_66 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, { 0x5B, "vcvtqq2ph", T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, { 0x7A, "vcvtuqq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 }, { 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 }, { 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); switch (p.ptn) { case 0: printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 1: printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 2: printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 3: printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 4: printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 5: printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); break; case 6: printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code); break; } } puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }"); } enum { // same as xbyak.h xx_yy_zz = 0, xx_yx_zy = 1, xx_xy_yz = 2, }; void putGather() { const struct Tbl { const char *name; int type; uint8_t code; int mode; } tbl[] = { { "vpgatherdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x90, xx_yy_zz }, { "vpgatherdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x90, xx_yx_zy }, { "vpgatherqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x91, xx_xy_yz }, { "vpgatherqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x91, xx_yy_zz }, { "vgatherdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x92, xx_yy_zz }, { "vgatherdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x92, xx_yx_zy }, { "vgatherqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, 0x93, xx_xy_yz }, { "vgatherqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, 0x93, xx_yy_zz }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type | T_VSIB); printf("void %s(const Xmm& x, const Address& addr) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); } } void putScatter() { const struct Tbl { const char *name; int type; uint8_t code; int mode; // reverse of gather } tbl[] = { { "vpscatterdd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA0, xx_yy_zz }, { "vpscatterdq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA0, xx_yx_zy }, { "vpscatterqd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA1, xx_xy_yz }, { "vpscatterqq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA1, xx_yy_zz }, { "vscatterdps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA2, xx_yy_zz }, { "vscatterdpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA2, xx_yx_zy }, { "vscatterqps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4 | T_M_K, 0xA3, xx_xy_yz }, { "vscatterqpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8 | T_M_K, 0xA3, xx_yy_zz }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type | T_VSIB); printf("void %s(const Address& addr, const Xmm& x) { opGather2(x, addr, %s, 0x%02X, %d); }\n", p.name, type.c_str(), p.code, p.mode); } } void putShuff() { puts("void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }"); puts("void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }"); puts("void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }"); puts("void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }"); } void putMov() { puts("void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }"); puts("void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }"); puts("void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }"); puts("void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }"); puts("void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }"); puts("void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }"); puts("void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }"); puts("void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }"); { const struct Tbl { uint8_t code; const char *name; int type; int mode; } tbl[] = { { 0x32, "vpmovqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, { 0x22, "vpmovsqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, { 0x12, "vpmovusqb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N2 | T_N_VL, false }, { 0x34, "vpmovqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x24, "vpmovsqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x14, "vpmovusqw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x35, "vpmovqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x25, "vpmovsqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x15, "vpmovusqd", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x31, "vpmovdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x21, "vpmovsdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x11, "vpmovusdb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N4 | T_N_VL, false }, { 0x33, "vpmovdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x23, "vpmovsdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x13, "vpmovusdw", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x30, "vpmovwb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x20, "vpmovswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, { 0x10, "vpmovuswb", T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_N8 | T_N_VL, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void %s(const Operand& op, const Xmm& x) { opVmov(op, x, %s, 0x%02X, %s); }\n", p.name, type.c_str(), p.code, p.mode ? "true" : "false"); } } } void putX_XM_IMM() { const struct Tbl { uint8_t code; const char *name; int type; bool hasIMM; } tbl[] = { { 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, { 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, { 0x26, "vgetmantph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, { 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false }, { 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false }, { 0x51, "vsqrtph", T_MAP5| T_YMM | T_MUST_EVEX | T_EW0 | T_ER_Z | T_B16, false }, { 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, { 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, { 0x08, "vrndscaleph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, { 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x44, "vplzcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false }, { 0x44, "vplzcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false }, { 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true }, { 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true }, { 0x56, "vreduceph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true }, { 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false }, { 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false }, { 0x55, "vpopcntd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x55, "vpopcntq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, { 0x62, "vpexpandb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_N1, false }, { 0x62, "vpexpandw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_N2, false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x, const Operand& op%s) { opAVX_X_XM_IMM(x, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } void putMisc() { puts("void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }"); puts("void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }"); { const struct Tbl { const char *name; int zm; int type; uint8_t code; bool isZmm; } tbl[] = { { "vgatherpf0dps", 1, T_EW0 | T_N4, 0xC6, true }, { "vgatherpf0qps", 1, T_EW0 | T_N4, 0xC7, true }, { "vgatherpf0dpd", 1, T_EW1 | T_N8, 0xC6, false }, { "vgatherpf0qpd", 1, T_EW1 | T_N8, 0xC7, true }, { "vgatherpf1dps", 2, T_EW0 | T_N4, 0xC6, true }, { "vgatherpf1qps", 2, T_EW0 | T_N4, 0xC7, true }, { "vgatherpf1dpd", 2, T_EW1 | T_N8, 0xC6, false }, { "vgatherpf1qpd", 2, T_EW1 | T_N8, 0xC7, true }, { "vscatterpf0dps", 5, T_EW0 | T_N4, 0xC6, true }, { "vscatterpf0qps", 5, T_EW0 | T_N4, 0xC7, true }, { "vscatterpf0dpd", 5, T_EW1 | T_N8, 0xC6, false }, { "vscatterpf0qpd", 5, T_EW1 | T_N8, 0xC7, true }, { "vscatterpf1dps", 6, T_EW0 | T_N4, 0xC6, true }, { "vscatterpf1qps", 6, T_EW0 | T_N4, 0xC7, true }, { "vscatterpf1dpd", 6, T_EW1 | T_N8, 0xC6, false }, { "vscatterpf1qpd", 6, T_EW1 | T_N8, 0xC7, true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type | T_66 | T_0F38 | T_MUST_EVEX | T_M_K | T_VSIB); printf("void %s(const Address& addr) { opGatherFetch(addr, zm%d, %s, 0x%2X, Operand::%s); }\n" , p.name, p.zm, type.c_str(), p.code, p.isZmm ? "ZMM" : "YMM"); } } puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }"); puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }"); puts("void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }"); puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }"); puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }"); puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }"); puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }"); puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }"); puts("void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }"); puts("void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }"); } void putV4FMA() { puts("void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }"); puts("void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }"); puts("void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }"); puts("void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }"); puts("void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }"); puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }"); } void putFP16_1() { const struct Tbl { uint8_t code; const char *name; } tbl[] = { { 0x58, "add" }, { 0x5C, "sub" }, { 0x59, "mul" }, { 0x5E, "div" }, { 0x5F, "max" }, { 0x5D, "min" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code); printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x%02X); }\n", p->name, p->code); } } void putFP16_FMA() { const struct Tbl { uint8_t code; const char *name; bool isPH; } tbl[] = { { 0x06, "vfmaddsub", true }, { 0x07, "vfmsubadd", true }, { 0x08, "vfmadd", true }, { 0x0C, "vfnmadd", true }, { 0x0A, "vfmsub", true }, { 0x0E, "vfnmsub", true }, { 0x09, "vfmadd", false }, { 0x0D, "vfnmadd", false }, { 0x0B, "vfmsub", false }, { 0x0F, "vfnmsub", false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { for (int k = 0; k < 3; k++) { const struct Ord { const char *str; uint8_t code; } ord[] = { { "132", 0x90 }, { "213", 0xA0 }, { "231", 0xB0 }, }; int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX; const char *suf = 0; if (tbl[i].isPH) { t |= T_ER_Z | T_YMM | T_B16; suf = "ph"; } else { t |= T_ER_X | T_N2; suf = "sh"; } std::string type = type2String(t); printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" , tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code); } } } void putFP16_FMA2() { const struct Tbl { uint8_t code; const char *name; bool isPH; } tbl[] = { { 0x56, "maddc", true }, { 0xD6, "mulc", true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { for (int j = 0; j < 2; j++) { int t = T_MAP6 | T_EW0 | T_MUST_EVEX; if (j == 0) { t |= T_F2; } else { t |= T_F3; } const char *suf = 0; if (tbl[i].isPH) { t |= T_ER_Z | T_YMM | T_B32; suf = "ph"; } else { t |= T_ER_X | T_N2; suf = "sh"; } std::string type = type2String(t); printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" , j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code); } } } void putFP16_2() { { int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2; std::string type = type2String(t); printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str()); printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str()); } { int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2; std::string type = type2String(t); printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str()); printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str()); } } void putFP16() { putFP16_1(); putFP16_FMA(); putFP16_FMA2(); putFP16_2(); } int main(int argc, char *[]) { bool only64bit = argc == 2; putOpmask(only64bit); putBroadcast(only64bit); if (only64bit) { return 0; } putVcmp(); putVcmpAlias(); putX_XM(); putM_X(); putXM_X(); putX_X_XM_IMM(); putShift(); putExtractInsert(); putCvt(); putGather(); putShuff(); putMov(); putX_XM_IMM(); putMisc(); putScatter(); putV4FMA(); putFP16(); } xbyak-6.02/gen/gen_code.cpp000066400000000000000000002312731417465521000156140ustar00rootroot00000000000000#define XBYAK_DONT_READ_LIST #include #include #include "xbyak/xbyak.h" #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) using namespace Xbyak; #ifdef _MSC_VER #pragma warning(disable : 4996) // scanf #define snprintf _snprintf_s #endif #include "avx_type.hpp" /* reg = cx/ecx/rcx insert 0x67 if prefix is true */ void put_jREGz(const char *reg, bool prefix) { printf("void j%sz(std::string label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); printf("void j%sz(const Label& label) { %sopJmp(label, T_SHORT, 0xe3, 0, 0); }\n", reg, prefix ? "db(0x67); " : ""); } struct GenericTbl { const char *name; uint8_t code1; uint8_t code2; uint8_t code3; uint8_t code4; }; void putGeneric(const GenericTbl *p, size_t n) { for (size_t i = 0; i < n; i++) { printf("void %s() { db(0x%02X); ", p->name, p->code1); if (p->code2) printf("db(0x%02X); ", p->code2); if (p->code3) printf("db(0x%02X); ", p->code3); if (p->code4) printf("db(0x%02X); ", p->code4); printf("}\n"); p++; } } void putX_X_XM(bool omitOnly) { // (x, x, x/m[, imm]) or (y, y, y/m[, imm]) { const struct Tbl { uint8_t code; const char *name; int type; bool hasIMM; bool enableOmit; int mode; // 1 : sse, 2 : avx, 3 : sse + avx } tbl[] = { { 0x0D, "blendpd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, { 0x0C, "blendps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, { 0x41, "dppd", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x40, "dpps", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, { 0x42, "mpsadbw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, { 0x0E, "pblendw", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 3 }, { 0x02, "pblendd", T_0F3A | T_66 | T_W0 | T_YMM, true, true, 2 }, { 0x0B, "roundsd", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x0A, "roundss", T_0F3A | T_66 | T_W0, true, true, 3 }, { 0x44, "pclmulqdq", T_0F3A | T_66 | T_W0 | T_YMM | T_EVEX, true, true, 3 }, { 0x0C, "permilps", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, { 0x0D, "permilpd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, { 0x47, "psllvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, { 0x47, "psllvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, { 0x46, "psravd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, { 0x45, "psrlvd", T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32, false, false, 2 }, { 0x45, "psrlvq", T_0F38 | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64, false, false, 2 }, { 0xC2, "cmppd", T_0F | T_66 | T_YMM, true, true, 2 }, { 0xC2, "cmpps", T_0F | T_YMM, true, true, 2 }, { 0xC2, "cmpsd", T_0F | T_F2, true, true, 2 }, { 0xC2, "cmpss", T_0F | T_F3, true, true, 2 }, { 0x5A, "cvtsd2ss", T_0F | T_F2 | T_EVEX | T_EW1 | T_N8 | T_ER_X, false, true, 2 }, { 0x5A, "cvtss2sd", T_0F | T_F3 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, false, true, 2 }, { 0x21, "insertps", T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, true, true, 2 }, { 0x63, "packsswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x6B, "packssdw", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0x67, "packuswb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x2B, "packusdw", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0xFC, "paddb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xFD, "paddw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xFE, "paddd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0xD4, "paddq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0xEC, "paddsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xED, "paddsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xDC, "paddusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xDD, "paddusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x0F, "palignr", T_0F3A | T_66 | T_YMM | T_EVEX, true, true, 2 }, { 0xDB, "pand", T_0F | T_66 | T_YMM, false, true, 2 }, { 0xDF, "pandn", T_0F | T_66 | T_YMM, false, true, 2 }, { 0xE0, "pavgb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xE3, "pavgw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x74, "pcmpeqb", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x75, "pcmpeqw", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x76, "pcmpeqd", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x29, "pcmpeqq", T_0F38 | T_66 | T_YMM, false, true, 3 }, { 0x64, "pcmpgtb", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x65, "pcmpgtw", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x66, "pcmpgtd", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x37, "pcmpgtq", T_0F38 | T_66 | T_YMM, false, true, 3 }, { 0x01, "phaddw", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x02, "phaddd", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x03, "phaddsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x05, "phsubw", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x06, "phsubd", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x07, "phsubsw", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0xF5, "pmaddwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x04, "pmaddubsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x3C, "pmaxsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, { 0xEE, "pmaxsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x3D, "pmaxsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0xDE, "pmaxub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x3E, "pmaxuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, { 0x3F, "pmaxud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0x38, "pminsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, { 0xEA, "pminsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x39, "pminsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0xDA, "pminub", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x3A, "pminuw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 3 }, { 0x3B, "pminud", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0xE4, "pmulhuw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x0B, "pmulhrsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xE5, "pmulhw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xD5, "pmullw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x40, "pmulld", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 3 }, { 0xF4, "pmuludq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0x28, "pmuldq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 3 }, { 0xEB, "por", T_0F | T_66 | T_YMM, false, true, 2 }, { 0xF6, "psadbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x00, "pshufb", T_0F38 | T_66 | T_YMM | T_EVEX, false, false, 2 }, { 0x08, "psignb", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x09, "psignw", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0x0A, "psignd", T_0F38 | T_66 | T_YMM, false, true, 2 }, { 0xF1, "psllw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, { 0xF2, "pslld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, { 0xF3, "psllq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, { 0xE1, "psraw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, { 0xE2, "psrad", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, { 0xD1, "psrlw", T_0F | T_66 | T_YMM | T_EVEX | T_N16, false, true, 2 }, { 0xD2, "psrld", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_N16, false, true, 2 }, { 0xD3, "psrlq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_N16, false, true, 2 }, { 0xF8, "psubb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xF9, "psubw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xFA, "psubd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0xFB, "psubq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0xE8, "psubsb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xE9, "psubsw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xD8, "psubusb", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0xD9, "psubusw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x68, "punpckhbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x69, "punpckhwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x6A, "punpckhdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0x6D, "punpckhqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0x60, "punpcklbw", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x61, "punpcklwd", T_0F | T_66 | T_YMM | T_EVEX, false, true, 2 }, { 0x62, "punpckldq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0x6C, "punpcklqdq", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0xEF, "pxor", T_0F | T_66 | T_YMM, false, true, 2 }, { 0x53, "rcpss", T_0F | T_F3, false, true, 2 }, { 0x52, "rsqrtss", T_0F | T_F3, false, true, 2 }, { 0xC6, "shufpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, true, 2 }, { 0xC6, "shufps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, true, true, 2 }, { 0x51, "sqrtsd", T_0F | T_F2 | T_EVEX | T_EW1 | T_ER_X | T_N8, false, true, 2 }, { 0x51, "sqrtss", T_0F | T_F3 | T_EVEX | T_EW0 | T_ER_X | T_N4, false, true, 2 }, { 0x15, "unpckhpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0x15, "unpckhps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0x14, "unpcklpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, false, true, 2 }, { 0x14, "unpcklps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32, false, true, 2 }, { 0xCF, "gf2p8affineinvqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, { 0xCE, "gf2p8affineqb", T_66 | T_0F3A | T_W1 | T_EVEX | T_YMM | T_EW1 | T_SAE_Z | T_B64, true, false, 3 }, { 0xCF, "gf2p8mulb", T_66 | T_0F38 | T_W0 | T_EVEX | T_YMM | T_EW0 | T_SAE_Z, false, false, 3 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (omitOnly) { if (p->enableOmit) { printf("void v%s(const Xmm& x, const Operand& op%s) { v%s(x, x, op%s); }\n", p->name, p->hasIMM ? ", uint8_t imm" : "", p->name, p->hasIMM ? ", imm" : ""); } } else { if (p->mode & 1) { if (p->hasIMM) { printf("void %s(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); }\n", p->name, p->code); } else { printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x66, isXMM_XMMorMEM, NONE, 0x38); }\n", p->name, p->code); } } if (p->mode & 2) { printf("void v%s(const Xmm& x1, const Xmm& x2, const Operand& op%s) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } } } void putMemOp(const char *name, uint8_t prefix, uint8_t ext, uint8_t code1, int code2, int bit = 32) { printf("void %s(const Address& addr) { ", name); if (prefix) printf("db(0x%02X); ", prefix); printf("opModM(addr, Reg%d(%d), 0x%02X, 0x%02X); }\n", bit, ext, code1, code2); } void putLoadSeg(const char *name, uint8_t code1, int code2 = NONE) { printf("void %s(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, 0x%02X, 0x%02X); }\n", name, code1, code2); } void put() { const int NO = CodeGenerator::NONE; { char buf[16]; unsigned int v = VERSION; if (v & 0xF) { snprintf(buf, sizeof(buf), "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF); } else { snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF); } printf("const char *getVersionString() const { return \"%s\"; }\n", buf); } const int B = 1 << 0; const int W = 1 << 1; const int D = 1 << 2; const int Q = 1 << 3; { const struct Tbl { uint8_t code; const char *name; } tbl[] = { // MMX { 0x6B, "packssdw" }, { 0x63, "packsswb" }, { 0x67, "packuswb" }, { 0xDB, "pand" }, { 0xDF, "pandn" }, { 0xF5, "pmaddwd" }, { 0xE4, "pmulhuw" }, { 0xE5, "pmulhw" }, { 0xD5, "pmullw" }, { 0xEB, "por" }, { 0x68, "punpckhbw" }, { 0x69, "punpckhwd" }, { 0x6A, "punpckhdq" }, { 0x60, "punpcklbw" }, { 0x61, "punpcklwd" }, { 0x62, "punpckldq" }, { 0xEF, "pxor" }, // MMX2 { 0xE0, "pavgb" }, { 0xE3, "pavgw" }, { 0xEE, "pmaxsw" }, { 0xDE, "pmaxub" }, { 0xEA, "pminsw" }, { 0xDA, "pminub" }, { 0xF6, "psadbw" }, // { 0xD4, "paddq" }, { 0xF4, "pmuludq" }, { 0xFB, "psubq" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" , p->name, p->code); } } { const struct Tbl { uint8_t code; int mode; const char *name; } tbl[] = { { 0xFC, B|W|D, "padd" }, { 0xEC, B|W , "padds" }, { 0xDC, B|W , "paddus" }, { 0x74, B|W|D, "pcmpeq" }, { 0x64, B|W|D, "pcmpgt" }, { 0xF0, W|D|Q, "psll" }, { 0xE0, W|D , "psra" }, { 0xD0, W|D|Q, "psrl" }, { 0xF8, B|W|D, "psub" }, { 0xE8, B|W , "psubs" }, { 0xD8, B|W , "psubus" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; static const char modTbl[][4] = { "b", "w", "d", "q" }; for (int j = 0; j < 4; j++) { // B(0), W(1), D(2), Q(3) if (!(p->mode & (1 << j))) continue; printf("void %s%s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X); }\n" , p->name, modTbl[j] , p->code | j ); } } } { const struct Tbl { uint8_t code; int ext; int mode; const char *name; } tbl[] = { { 0x70, 6, W|D|Q, "psll" }, { 0x70, 4, W|D , "psra" }, { 0x70, 2, W|D|Q, "psrl" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; static const char modTbl[][4] = { "b", "w", "d", "q" }; for (int j = 0; j < 4; j++) { // B(0), W(1), D(2), Q(3) if (!(p->mode & (1 << j))) continue; printf("void %s%s(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x%02X, %d); }\n" , p->name, modTbl[j] , p->code | j , p->ext ); } } printf("void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 7); printf("void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x%02X, %d); }\n", 0x73, 3); } { const struct Tbl { uint8_t code; uint8_t pref; const char *name; } tbl[] = { { 0x70, 0, "pshufw" }, { 0x70, 0xF2, "pshuflw" }, { 0x70, 0xF3, "pshufhw" }, { 0x70, 0x66, "pshufd" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x%02X, 0x%02X, imm8); }\n", p->name, p->code, p->pref); } } { const struct MmxTbl6 { uint8_t code; // for (reg, reg/[mem]) uint8_t code2; // for ([mem], reg) int pref; const char *name; } mmxTbl6[] = { { 0x6F, 0x7F, 0x66, "movdqa" }, { 0x6F, 0x7F, 0xF3, "movdqu" }, // SSE2 { 0x28, 0x29, NO, "movaps" }, { 0x10, 0x11, 0xF3, "movss" }, { 0x10, 0x11, NO, "movups" }, { 0x28, 0x29, 0x66, "movapd" }, { 0x10, 0x11, 0xF2, "movsd" }, { 0x10, 0x11, 0x66, "movupd" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(mmxTbl6); i++) { const MmxTbl6 *p = &mmxTbl6[i]; printf("void %s(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); printf("void %s(const Address& addr, const Xmm& xmm) { ", p->name); if (p->pref != NO) printf("db(0x%02X); ", p->pref); printf("opModM(addr, xmm, 0x0F, 0x%02X); }\n", p->code2); } } { enum { PS = 1 << 0, SS = 1 << 1, PD = 1 << 2, SD = 1 << 3 }; const struct { int code; const char *name; } sufTbl[] = { { NO, "ps" }, { 0xF3, "ss" }, { 0x66, "pd" }, { 0xF2, "sd" }, }; const struct Tbl { uint8_t code; int mode; const char *name; bool hasImm; } tbl[] = { { 0x58, PS|SS|PD|SD, "add" }, { 0x55, PS|PD , "andn" }, { 0x54, PS|PD , "and" }, { 0xC2, PS|SS|PD|SD, "cmp", true }, { 0x5E, PS|SS|PD|SD, "div" }, { 0x5F, PS|SS|PD|SD, "max" }, { 0x5D, PS|SS|PD|SD, "min" }, { 0x59, PS|SS|PD|SD, "mul" }, { 0x56, PS|PD , "or" }, { 0x53, PS|SS , "rcp" }, { 0x52, PS|SS , "rsqrt" }, { 0xC6, PS|PD , "shuf", true }, { 0x51, PS|SS|PD|SD, "sqrt" }, { 0x5C, PS|SS|PD|SD, "sub" }, { 0x15, PS|PD , "unpckh" }, { 0x14, PS|PD , "unpckl" }, { 0x57, PS|PD , "xor" }, // }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; for (size_t j = 0; j < NUM_OF_ARRAY(sufTbl); j++) { if (!(p->mode & (1 << j))) continue; if (p->hasImm) { // don't change uint8_t to int because NO is not in byte printf("void %s%s(const Xmm& xmm, const Operand& op, uint8_t imm8) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM, imm8); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } else { printf("void %s%s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%2X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, sufTbl[j].name, p->code, sufTbl[j].code); } } } } { // (XMM, XMM) const struct Tbl { uint8_t code; uint8_t pref; const char *name; } tbl[] = { { 0xF7, 0x66, "maskmovdqu" }, { 0x12, 0 , "movhlps" }, { 0x16, 0 , "movlhps" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Xmm& reg1, const Xmm& reg2) { ", p->name); if (p->pref) printf("db(0x%02X); ", p->pref); printf(" opModR(reg1, reg2, 0x0F, 0x%02X); }\n", p->code); } } { // (XMM, XMM|MEM) const struct Tbl { uint8_t code; int pref; const char *name; } tbl[] = { { 0x6D, 0x66, "punpckhqdq" }, { 0x6C, 0x66, "punpcklqdq" }, { 0x2F, NO , "comiss" }, { 0x2E, NO , "ucomiss" }, { 0x2F, 0x66, "comisd" }, { 0x2E, 0x66, "ucomisd" }, { 0x5A, 0x66, "cvtpd2ps" }, { 0x5A, NO , "cvtps2pd" }, { 0x5A, 0xF2, "cvtsd2ss" }, { 0x5A, 0xF3, "cvtss2sd" }, { 0xE6, 0xF2, "cvtpd2dq" }, { 0xE6, 0x66, "cvttpd2dq" }, { 0xE6, 0xF3, "cvtdq2pd" }, { 0x5B, 0x66, "cvtps2dq" }, { 0x5B, 0xF3, "cvttps2dq" }, { 0x5B, NO , "cvtdq2ps" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM); }\n", p->name, p->code, p->pref); } } { // special type const struct Tbl { uint8_t code; int pref; const char *name; const char *cond; } tbl[] = { { 0x2A, NO , "cvtpi2ps", "isXMM_MMXorMEM" }, { 0x2D, NO , "cvtps2pi", "isMMX_XMMorMEM" }, { 0x2A, 0xF3, "cvtsi2ss", "isXMM_REG32orMEM" }, { 0x2D, 0xF3, "cvtss2si", "isREG32_XMMorMEM" }, { 0x2C, NO , "cvttps2pi", "isMMX_XMMorMEM" }, { 0x2C, 0xF3, "cvttss2si", "isREG32_XMMorMEM" }, { 0x2A, 0x66, "cvtpi2pd", "isXMM_MMXorMEM" }, { 0x2D, 0x66, "cvtpd2pi", "isMMX_XMMorMEM" }, { 0x2A, 0xF2, "cvtsi2sd", "isXMM_REG32orMEM" }, { 0x2D, 0xF2, "cvtsd2si", "isREG32_XMMorMEM" }, { 0x2C, 0x66, "cvttpd2pi", "isMMX_XMMorMEM" }, { 0x2C, 0xF2, "cvttsd2si", "isREG32_XMMorMEM" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& reg, const Operand& op) { opGen(reg, op, 0x%02X, 0x%02X, %s); }\n", p->name, p->code, p->pref, p->cond); } } { // prefetch const struct Tbl { int ext; const char *name; int code; } tbl[] = { { 1, "t0", 0x18}, { 2, "t1", 0x18}, { 3, "t2", 0x18}, { 0, "nta", 0x18}, { 2, "wt1", 0x0D}, { 1, "w", 0x0D}, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void prefetch%s(const Address& addr) { opModM(addr, Reg32(%d), 0x0F, 0x%02X); }\n", p->name, p->ext, p->code); } } { const struct Tbl { uint8_t code; int pref; const char *name; } tbl[] = { { 0x16, NO, "movhps" }, { 0x12, NO, "movlps" }, { 0x16, 0x66, "movhpd" }, { 0x12, 0x66, "movlpd" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x%02X, 0x%02X); }\n", p->name, p->code, p->pref); } } { // cmov const struct Tbl { uint8_t ext; const char *name; } tbl[] = { { 0, "o" }, { 1, "no" }, { 2, "b" }, { 2, "c" }, { 2, "nae" }, { 3, "nb" }, { 3, "ae" }, { 3, "nc" }, { 4, "e" }, { 4, "z" }, { 5, "ne" }, { 5, "nz" }, { 6, "be" }, { 6, "na" }, { 7, "nbe" }, { 7, "a" }, { 8, "s" }, { 9, "ns" }, { 10, "p" }, { 10, "pe" }, { 11, "np" }, { 11, "po" }, { 12, "l" }, { 12, "nge" }, { 13, "nl" }, { 13, "ge" }, { 14, "le" }, { 14, "ng" }, { 15, "nle" }, { 15, "g" }, }; const char *msg = "//-V524"; // disable warning of PVS-Studio for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void cmov%s(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | %d); }%s\n", p->name, p->ext, msg); printf("void j%s(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); printf("void j%s(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); printf("void j%s(const char *label, LabelType type = T_AUTO) { j%s(std::string(label), type); }%s\n", p->name, p->name, msg); printf("void j%s(const void *addr) { opJmpAbs(addr, T_NEAR, 0x%02X, 0x%02X, 0x%02X); }%s\n", p->name, p->ext | 0x70, p->ext | 0x80, 0x0F, msg); printf("void set%s(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | %d); }%s\n", p->name, p->ext, msg); } } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "loop", 0xE2 }, { "loope", 0xE1 }, { "loopne", 0xE0 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(std::string label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); printf("void %s(const Label& label) { opJmp(label, T_SHORT, 0x%02X, 0, 0); }\n", p->name, p->code); printf("void %s(const char *label) { %s(std::string(label)); }\n", p->name, p->name); } } //////////////////////////////////////////////////////////////// { const GenericTbl tbl[] = { { "bnd", 0xf2 }, /* 0xf2 prefix for MPX */ { "cbw", 0x66, 0x98 }, { "cdq", 0x99 }, { "clc", 0xF8 }, { "cld", 0xFC }, { "cli", 0xFA }, { "cmc", 0xF5 }, { "cpuid", 0x0F, 0xA2 }, { "cwd", 0x66, 0x99 }, { "cwde", 0x98 }, { "cmpsb", 0xA6 }, { "cmpsw", 0x66, 0xA7 }, { "cmpsd", 0xA7 }, { "endbr32", 0xF3, 0x0F, 0x1E, 0xFB }, { "endbr64", 0xF3, 0x0F, 0x1E, 0xFA }, { "hlt", 0xF4 }, { "int3", 0xCC }, { "scasb", 0xAE }, { "scasw", 0x66, 0xAF }, { "scasd", 0xAF }, { "movsb", 0xA4 }, { "leave", 0xC9 }, { "lodsb", 0xAC }, { "lodsw", 0x66, 0xAD }, { "lodsd", 0xAD }, { "movsw", 0x66, 0xA5 }, { "movsd", 0xA5 }, { "outsb", 0x6E }, { "outsw", 0x66, 0x6F }, { "outsd", 0x6F }, { "stosb", 0xAA }, { "stosw", 0x66, 0xAB }, { "stosd", 0xAB }, { "rep", 0xF3 }, { "repe", 0xF3 }, { "repz", 0xF3 }, { "repne", 0xF2 }, { "repnz", 0xF2 }, { "lahf", 0x9F }, { "lock", 0xF0 }, { "sahf", 0x9E }, { "stc", 0xF9 }, { "std", 0xFD }, { "sti", 0xFB }, { "sysenter", 0x0F, 0x34 }, { "sysexit", 0x0F, 0x35 }, { "emms", 0x0F, 0x77 }, { "pause", 0xF3, 0x90 }, { "sfence", 0x0F, 0xAE, 0xF8 }, { "lfence", 0x0F, 0xAE, 0xE8 }, { "mfence", 0x0F, 0xAE, 0xF0 }, { "monitor", 0x0F, 0x01, 0xC8 }, { "mwait", 0x0F, 0x01, 0xC9 }, { "rdmsr", 0x0F, 0x32 }, { "rdpmc", 0x0F, 0x33 }, { "rdtsc", 0x0F, 0x31 }, { "rdtscp", 0x0F, 0x01, 0xF9 }, { "ud2", 0x0F, 0x0B }, { "wait", 0x9B }, { "fwait", 0x9B }, { "wbinvd", 0x0F, 0x09 }, { "wrmsr", 0x0F, 0x30 }, { "xlatb", 0xD7 }, { "popf", 0x9D }, { "pushf", 0x9C }, { "stac", 0x0F, 0x01, 0xCB }, { "vzeroall", 0xC5, 0xFC, 0x77 }, { "vzeroupper", 0xC5, 0xF8, 0x77 }, { "xgetbv", 0x0F, 0x01, 0xD0 }, // FPU { "f2xm1", 0xD9, 0xF0 }, { "fabs", 0xD9, 0xE1 }, { "faddp", 0xDE, 0xC1 }, { "fchs", 0xD9, 0xE0 }, { "fclex", 0x9B, 0xDB, 0xE2 }, { "fnclex", 0xDB, 0xE2 }, { "fcom", 0xD8, 0xD1 }, { "fcomp", 0xD8, 0xD9 }, { "fcompp", 0xDE, 0xD9 }, { "fcos", 0xD9, 0xFF }, { "fdecstp", 0xD9, 0xF6 }, { "fdivp", 0xDE, 0xF9 }, { "fdivrp", 0xDE, 0xF1 }, { "fincstp", 0xD9, 0xF7 }, { "finit", 0x9B, 0xDB, 0xE3 }, { "fninit", 0xDB, 0xE3 }, { "fld1", 0xD9, 0xE8 }, { "fldl2t", 0xD9, 0xE9 }, { "fldl2e", 0xD9, 0xEA }, { "fldpi", 0xD9, 0xEB }, { "fldlg2", 0xD9, 0xEC }, { "fldln2", 0xD9, 0xED }, { "fldz", 0xD9, 0xEE }, { "fmulp", 0xDE, 0xC9 }, { "fnop", 0xD9, 0xD0 }, { "fpatan", 0xD9, 0xF3 }, { "fprem", 0xD9, 0xF8 }, { "fprem1", 0xD9, 0xF5 }, { "fptan", 0xD9, 0xF2 }, { "frndint", 0xD9, 0xFC }, { "fscale", 0xD9, 0xFD }, { "fsin", 0xD9, 0xFE }, { "fsincos", 0xD9, 0xFB }, { "fsqrt", 0xD9, 0xFA }, { "fsubp", 0xDE, 0xE9 }, { "fsubrp", 0xDE, 0xE1 }, { "ftst", 0xD9, 0xE4 }, { "fucom", 0xDD, 0xE1 }, { "fucomp", 0xDD, 0xE9 }, { "fucompp", 0xDA, 0xE9 }, { "fxam", 0xD9, 0xE5 }, { "fxch", 0xD9, 0xC9 }, { "fxtract", 0xD9, 0xF4 }, { "fyl2x", 0xD9, 0xF1 }, { "fyl2xp1", 0xD9, 0xF9 }, // AMD Zen { "monitorx", 0x0F, 0x01, 0xFA }, { "mwaitx", 0x0F, 0x01, 0xFB }, { "clzero", 0x0F, 0x01, 0xFC }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); puts("void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }"); puts("void int_(uint8_t x) { db(0xCD); db(x); }"); putLoadSeg("lss", 0x0F, 0xB2); putLoadSeg("lfs", 0x0F, 0xB4); putLoadSeg("lgs", 0x0F, 0xB5); } { const struct Tbl { uint8_t code; // (reg, reg) uint8_t ext; // (reg, imm) const char *name; } tbl[] = { { 0x10, 2, "adc" }, { 0x00, 0, "add" }, { 0x20, 4, "and_" }, { 0x38, 7, "cmp" }, { 0x08, 1, "or_" }, { 0x18, 3, "sbb" }, { 0x28, 5, "sub" }, { 0x30, 6, "xor_" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x%02X, %d); }\n", p->name, p->code, p->ext); } } { const struct Tbl { uint8_t code; uint8_t ext; const char *name; } tbl[] = { { 0x48, 1, "dec" }, { 0x40, 0, "inc" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op) { opIncDec(op, 0x%02X, %d); }\n", p->name, p->code, p->ext); } } { const struct Tbl { uint8_t code; uint8_t ext; const char *name; } tbl[] = { { 0xa3, 4, "bt" }, { 0xab, 5, "bts" }, { 0xb3, 6, "btr" }, { 0xbb, 7, "btc" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, uint8_t imm) { opR_ModM(op, 16|32|64, %d, 0x0f, 0xba, NONE, false, 1); db(imm); }\n", p->name, p->ext); } } { const struct Tbl { uint8_t code; uint8_t ext; const char *name; } tbl[] = { { 0xF6, 6, "div" }, { 0xF6, 7, "idiv" }, { 0xF6, 5, "imul" }, { 0xF6, 4, "mul" }, { 0xF6, 3, "neg" }, { 0xF6, 2, "not_" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; const std::string name = p->name; printf("void %s(const Operand& op) { opR_ModM(op, 0, %d, 0x%02X); }\n", p->name, p->ext, p->code); } } { const struct Tbl { const char *name; uint8_t ext; } tbl[] = { { "rcl", 2 }, { "rcr", 3 }, { "rol", 0 }, { "ror", 1 }, { "sar", 7 }, { "shl", 4 }, { "shr", 5 }, { "sal", 4 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op, int imm) { opShift(op, imm, %d); }\n", p->name, p->ext); printf("void %s(const Operand& op, const Reg8& _cl) { opShift(op, _cl, %d); }\n", p->name, p->ext); } } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "shld", 0xA4 }, { "shrd", 0xAC }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(op, reg, imm, 0x%02X); }\n", p->name, p->code); printf("void %s(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0x%02X, &_cl); }\n", p->name, p->code); } } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "bsf", 0xBC }, { "bsr", 0xBD }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x%02X); }\n", p->name, p->code); } } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "popcnt", 0xB8 }, { "tzcnt", 0xBC }, { "lzcnt", 0xBD }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0x%02X); }\n", p->name, p->code); } } // SSSE3 { const struct Tbl { uint8_t code; const char *name; } tbl[] = { { 0x00, "pshufb" }, { 0x01, "phaddw" }, { 0x02, "phaddd" }, { 0x03, "phaddsw" }, { 0x04, "pmaddubsw" }, { 0x05, "phsubw" }, { 0x06, "phsubd" }, { 0x07, "phsubsw" }, { 0x08, "psignb" }, { 0x09, "psignw" }, { 0x0a, "psignd" }, { 0x0b, "pmulhrsw" }, { 0x1c, "pabsb" }, { 0x1d, "pabsw" }, { 0x1e, "pabsd" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x%02X, 0x66, NONE, 0x38); }\n", p->name, p->code); } printf("void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); }\n"); } { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "pclmullqlqdq", 0 }, { "pclmulhqlqdq", 1 }, { "pclmullqhdq", 0x10 }, { "pclmulhqhdq", 0x11 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x%02X); }\n", p->name, p->code); } } { const struct Tbl { uint8_t code1; int code2; uint8_t ext; const char *name; uint8_t prefix; } tbl[] = { { 0x0F, 0xAE, 2, "ldmxcsr", 0 }, { 0x0F, 0xAE, 3, "stmxcsr", 0 }, { 0x0F, 0xAE, 7, "clflush", 0 }, { 0x0F, 0xAE, 7, "clflushopt", 0x66 }, { 0xDF, NONE, 4, "fbld", 0 }, { 0xDF, NONE, 6, "fbstp", 0 }, { 0xD9, NONE, 5, "fldcw", 0 }, { 0xD9, NONE, 4, "fldenv", 0 }, { 0xDD, NONE, 4, "frstor", 0 }, { 0xDD, NONE, 6, "fsave", 0x9B }, { 0xDD, NONE, 6, "fnsave", 0 }, { 0xD9, NONE, 7, "fstcw", 0x9B }, { 0xD9, NONE, 7, "fnstcw", 0 }, { 0xD9, NONE, 6, "fstenv", 0x9B }, { 0xD9, NONE, 6, "fnstenv", 0 }, { 0xDD, NONE, 7, "fstsw", 0x9B }, { 0xDD, NONE, 7, "fnstsw", 0 }, { 0x0F, 0xAE, 1, "fxrstor", 0 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; putMemOp(p->name, p->prefix, p->ext, p->code1, p->code2); } puts("void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); }"); puts("void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); }"); } { const struct Tbl { uint8_t code; const char *name; } tbl[] = { { 0x2B, "movntpd" }, { 0xE7, "movntdq" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; // cast xmm register to 16bit register to put 0x66 printf("void %s(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x%02X); }\n", p->name, p->code); } } { const struct Tbl { uint8_t code; const char *name; } tbl[] = { { 0xBE, "movsx" }, { 0xB6, "movzx" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0x%02X); }\n", p->name, p->code); } } { // in/out puts("void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); }"); puts("void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); }"); puts("void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); }"); puts("void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); }"); } // mpx { puts("void bndcl(const BoundsReg& bnd, const Operand& op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); puts("void bndcu(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }"); puts("void bndcn(const BoundsReg& bnd, const Operand& op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }"); puts("void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, 0x0F, 0x1A); }"); puts("void bndmk(const BoundsReg& bnd, const Address& addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }"); puts("void bndmov(const BoundsReg& bnd, const Operand& op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }"); puts("void bndmov(const Address& addr, const BoundsReg& bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }"); puts("void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }"); } // misc { puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }"); puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }"); puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }"); puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }"); puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }"); puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }"); puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }"); puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }"); puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }"); puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }"); puts("void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); }"); puts("void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); }"); puts("void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); }"); puts("void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }"); puts("void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, 0, imm); }"); puts("void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, 0x3A); }"); puts("void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); puts("void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, 0x3A); }"); puts("void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }"); puts("void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModR(reg1, reg2, 0x0F, 0xF7); }"); puts("void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, 0x50); }"); puts("void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }"); puts("void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, 0x2B); }"); puts("void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }"); puts("void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }"); puts("void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0xC3); }"); puts("void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opModM(addr, mmx, 0x0F, 0xE7); }"); puts("void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }"); puts("void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); puts("void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }"); puts("void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); puts("void movq2dq(const Xmm& xmm, const Mmx& mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }"); puts("void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }"); puts("void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }"); puts("void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }"); puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }"); puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }"); } { const struct Tbl { uint8_t m16; uint8_t m32; uint8_t m64; uint8_t ext; const char *name; uint8_t m64ext; } tbl[] = { { 0x00, 0xD8, 0xDC, 0, "fadd" }, { 0xDE, 0xDA, 0x00, 0, "fiadd" }, { 0x00, 0xD8, 0xDC, 2, "fcom" }, { 0x00, 0xD8, 0xDC, 3, "fcomp" }, { 0x00, 0xD8, 0xDC, 6, "fdiv" }, { 0xDE, 0xDA, 0x00, 6, "fidiv" }, { 0x00, 0xD8, 0xDC, 7, "fdivr" }, { 0xDE, 0xDA, 0x00, 7, "fidivr" }, { 0xDE, 0xDA, 0x00, 2, "ficom" }, { 0xDE, 0xDA, 0x00, 3, "ficomp" }, { 0xDF, 0xDB, 0xDF, 0, "fild", 5 }, { 0xDF, 0xDB, 0x00, 2, "fist" }, { 0xDF, 0xDB, 0xDF, 3, "fistp", 7 }, { 0xDF, 0xDB, 0xDD, 1, "fisttp" }, { 0x00, 0xD9, 0xDD, 0, "fld" }, { 0x00, 0xD8, 0xDC, 1, "fmul" }, { 0xDE, 0xDA, 0x00, 1, "fimul" }, { 0x00, 0xD9, 0xDD, 2, "fst" }, { 0x00, 0xD9, 0xDD, 3, "fstp" }, { 0x00, 0xD8, 0xDC, 4, "fsub" }, { 0xDE, 0xDA, 0x00, 4, "fisub" }, { 0x00, 0xD8, 0xDC, 5, "fsubr" }, { 0xDE, 0xDA, 0x00, 5, "fisubr" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Address& addr) { opFpuMem(addr, 0x%02X, 0x%02X, 0x%02X, %d, %d); }\n", p->name, p->m16, p->m32, p->m64, p->ext, p->m64ext); } } { const struct Tbl { uint32_t code1; uint32_t code2; const char *name; } tbl[] = { { 0xD8C0, 0xDCC0, "fadd" }, { 0x0000, 0xDEC0, "faddp" }, { 0xDAC0, 0x00C0, "fcmovb" }, { 0xDAC8, 0x00C8, "fcmove" }, { 0xDAD0, 0x00D0, "fcmovbe" }, { 0xDAD8, 0x00D8, "fcmovu" }, { 0xDBC0, 0x00C0, "fcmovnb" }, { 0xDBC8, 0x00C8, "fcmovne" }, { 0xDBD0, 0x00D0, "fcmovnbe" }, { 0xDBD8, 0x00D8, "fcmovnu" }, { 0xDBF0, 0x00F0, "fcomi" }, { 0xDFF0, 0x00F0, "fcomip" }, { 0xDBE8, 0x00E8, "fucomi" }, { 0xDFE8, 0x00E8, "fucomip" }, { 0xD8F0, 0xDCF8, "fdiv" }, { 0x0000, 0xDEF8, "fdivp" }, { 0xD8F8, 0xDCF0, "fdivr" }, { 0x0000, 0xDEF0, "fdivrp" }, { 0xD8C8, 0xDCC8, "fmul" }, { 0x0000, 0xDEC8, "fmulp" }, { 0xD8E0, 0xDCE8, "fsub" }, { 0x0000, 0xDEE8, "fsubp" }, { 0xD8E8, 0xDCE0, "fsubr" }, { 0x0000, 0xDEE0, "fsubrp" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); // omit st0 version(like nasm) if (p->code1) { printf("void %s(const Fpu& reg1) { opFpuFpu(st0, reg1, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); } else { printf("void %s(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x%04X, 0x%04X); }\n", p->name, p->code1, p->code2); } } } { const struct Tbl { uint8_t code1; uint8_t code2; const char *name; } tbl[] = { { 0xD8, 0xD0, "fcom" }, { 0xD8, 0xD8, "fcomp" }, { 0xDD, 0xC0, "ffree" }, { 0xD9, 0xC0, "fld" }, { 0xDD, 0xD0, "fst" }, { 0xDD, 0xD8, "fstp" }, { 0xDD, 0xE0, "fucom" }, { 0xDD, 0xE8, "fucomp" }, { 0xD9, 0xC8, "fxch" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void %s(const Fpu& reg) { opFpu(reg, 0x%02X, 0x%02X); }\n", p->name, p->code1, p->code2); } } // AVX { // pd, ps, sd, ss const struct Tbl { uint8_t code; const char *name; bool only_pd_ps; } tbl[] = { { 0x58, "add", false }, { 0x5C, "sub", false }, { 0x59, "mul", false }, { 0x5E, "div", false }, { 0x5F, "max", false }, { 0x5D, "min", false }, { 0x54, "and", true }, { 0x55, "andn", true }, { 0x56, "or", true }, { 0x57, "xor", true }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code); printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code); if (p->only_pd_ps) continue; printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x%02X); }\n", p->name, p->code); printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x%02X); }\n", p->name, p->code); } } putX_X_XM(false); // (x, x/m[, imm]) or (y, y/m[, imm]) { const struct Tbl { uint8_t code; const char *name; int type; bool hasIMM; int mode; // 1 : SSE, 2 : AVX, 3 : SSE + AVX } tbl[] = { { 0x15, "blendvpd", T_0F38 | T_66, false, 1 }, { 0x14, "blendvps", T_0F38 | T_66, false, 1 }, { 0x10, "pblendvb", T_0F38 | T_66, false, 1 }, { 0xDF, "aeskeygenassist", T_0F3A | T_66, true, 3 }, { 0xDB, "aesimc", T_0F38 | T_66 | T_W0, false, 3 }, { 0x09, "roundpd", T_0F3A | T_66 | T_YMM, true, 3 }, { 0x08, "roundps", T_0F3A | T_66 | T_YMM, true, 3 }, { 0x05, "permilpd", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64, true, 2 }, { 0x04, "permilps", T_0F3A | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, { 0x61, "pcmpestri", T_0F3A | T_66, true, 3 }, { 0x60, "pcmpestrm", T_0F3A | T_66, true, 3 }, { 0x63, "pcmpistri", T_0F3A | T_66, true, 3 }, { 0x62, "pcmpistrm", T_0F3A | T_66, true, 3 }, { 0x0E, "testps", T_0F38 | T_66 | T_YMM, false, 2 }, { 0x0F, "testpd", T_0F38 | T_66 | T_YMM, false, 2 }, { 0x2F, "comisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, { 0x2F, "comiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, { 0x5B, "cvtdq2ps", T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, { 0x5B, "cvtps2dq", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_ER_Z, false, 2 }, { 0x5B, "cvttps2dq", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_SAE_Z, false, 2 }, { 0x28, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, { 0x28, "movaps", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, { 0x12, "movddup", T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_DUP, false, 3 }, { 0x6F, "movdqa", T_0F | T_66 | T_YMM, false, 2 }, { 0x6F, "movdqu", T_0F | T_F3 | T_YMM, false, 2 }, { 0x16, "movshdup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, { 0x12, "movsldup", T_0F | T_F3 | T_YMM | T_EVEX | T_EW0, false, 3 }, { 0x10, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1, false, 2 }, { 0x10, "movups", T_0F | T_YMM | T_EVEX | T_EW0, false, 2 }, { 0x1C, "pabsb", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, { 0x1D, "pabsw", T_0F38 | T_66 | T_YMM | T_EVEX, false, 2 }, { 0x1E, "pabsd", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, false, 2 }, { 0x41, "phminposuw", T_0F38 | T_66, false, 3 }, { 0x20, "pmovsxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, { 0x21, "pmovsxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, { 0x22, "pmovsxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, { 0x23, "pmovsxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, { 0x24, "pmovsxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, { 0x25, "pmovsxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, { 0x30, "pmovzxbw", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, { 0x31, "pmovzxbd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, { 0x32, "pmovzxbq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N2 | T_N_VL, false, 3 }, { 0x33, "pmovzxwd", T_0F38 | T_66 | T_YMM | T_EVEX | T_N8 | T_N_VL, false, 3 }, { 0x34, "pmovzxwq", T_0F38 | T_66 | T_YMM | T_EVEX | T_N4 | T_N_VL, false, 3 }, { 0x35, "pmovzxdq", T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0 | T_N8 | T_N_VL, false, 3 }, { 0x70, "pshufd", T_0F | T_66 | T_YMM | T_EVEX | T_EW0 | T_B32, true, 2 }, { 0x70, "pshufhw", T_0F | T_F3 | T_YMM | T_EVEX, true, 2 }, { 0x70, "pshuflw", T_0F | T_F2 | T_YMM | T_EVEX, true, 2 }, { 0x17, "ptest", T_0F38 | T_66 | T_YMM, false, 3 }, { 0x53, "rcpps", T_0F | T_YMM, false, 2 }, { 0x52, "rsqrtps", T_0F | T_YMM, false, 2 }, { 0x51, "sqrtpd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_ER_Z | T_B64, false, 2 }, { 0x51, "sqrtps", T_0F | T_YMM | T_EVEX | T_EW0 | T_ER_Z | T_B32, false, 2 }, { 0x2E, "ucomisd", T_0F | T_66 | T_EVEX | T_EW1 | T_SAE_X | T_N8, false, 2 }, { 0x2E, "ucomiss", T_0F | T_EVEX | T_EW0 | T_SAE_X | T_N4, false, 2 }, { 0xCC, "sha1rnds4", T_0F3A, true, 1 }, { 0xC8, "sha1nexte", T_0F38, false, 1 }, { 0xC9, "sha1msg1", T_0F38, false, 1 }, { 0xCA, "sha1msg2", T_0F38, false, 1 }, { 0xCB, "sha256rnds2", T_0F38, false, 1 }, { 0xCC, "sha256msg1", T_0F38, false, 1 }, { 0xCD, "sha256msg2", T_0F38, false, 1 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { const char *immS1 = p->hasIMM ? ", uint8_t imm" : ""; const char *immS2 = p->hasIMM ? ", imm" : ", NONE"; const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" }; const char *pref = prefTbl[getPP(p->type)]; const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE"; printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf); } if (p->mode & 2) { printf("void v%s(const Xmm& xm, const Operand& op%s) { opAVX_X_XM_IMM(xm, op, %s, 0x%02X%s); }\n" , p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : ""); } } } // (m, x), (m, y) { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x29, "movapd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K }, { 0x29, "movaps", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K }, { 0x7F, "movdqa", T_0F | T_66 | T_YMM }, { 0x7F, "movdqu", T_0F | T_F3 | T_YMM }, { 0x11, "movupd", T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_M_K }, { 0x11, "movups", T_0F | T_YMM | T_EVEX | T_EW0 | T_M_K }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void v%s(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, %s, 0x%02X); }\n" , p->name, type.c_str(), p->code); } } // (x, x/m), (y, y/m), (x, x, x/m), (y, y, y/m) { const struct Tbl { uint8_t code; const char *name; int type; int mode; // 1 : sse, 2 : avx, 3 : sse + avx } tbl[] = { { 0xD0, "addsubpd", T_0F | T_66 | T_YMM, 3 }, { 0xD0, "addsubps", T_0F | T_F2 | T_YMM, 3 }, { 0x7C, "haddpd", T_0F | T_66 | T_YMM, 3 }, { 0x7C, "haddps", T_0F | T_F2 | T_YMM, 3 }, { 0x7D, "hsubpd", T_0F | T_66 | T_YMM, 3 }, { 0x7D, "hsubps", T_0F | T_F2 | T_YMM, 3 }, { 0xDC, "aesenc", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, { 0xDD, "aesenclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, { 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, { 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 }, }; const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); if (p->mode & 1) { uint8_t pref = ppTbl[getPP(p->type)]; printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : ""); } if (p->mode & 2) { printf("void v%s(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, %s, 0x%02X); }\n" , p->name, type.c_str(), p->code); } } } // vmaskmov { const char suf[][8] = { "ps", "pd" }; for (int i = 0; i < 2; i++) { printf("void vmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2C + i); printf("void vmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x%02X); }\n", suf[i], 0x2E + i); } } // vpmaskmov { const char suf[][8] = { "d", "q" }; for (int i = 0; i < 2; i++) { printf("void vpmaskmov%s(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8C); printf("void vpmaskmov%s(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W%d | T_YMM, 0x%02X); }\n", suf[i], i, 0x8E); } } // vpermd, vpermps { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x36, "vpermd", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, { 0x36, "vpermq", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_B64 }, { 0x16, "vpermps", T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX | T_EW0 | T_B32 }, { 0x16, "vpermpd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_YMM | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void %s(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); } } // vpermq, vpermpd { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x00, "vpermq", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, { 0x01, "vpermpd", T_0F3A | T_66 | T_W1 | T_YMM | T_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void %s(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, %s, 0x%02X, imm); }\n", p.name, type.c_str(), p.code); } } // vcmpeqps { const char pred[32][16] = { "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" }; const char suf[][4] = { "pd", "ps", "sd", "ss" }; for (int i = 0; i < 4; i++) { const char *s = suf[i]; for (int j = 0; j < 32; j++) { if (j < 8) { printf("void cmp%s%s(const Xmm& x, const Operand& op) { cmp%s(x, op, %d); }\n", pred[j], s, s, j); } printf("void vcmp%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmp%s(x1, x2, op, %d); }\n", pred[j], s, s, j); } } } // vmov(h|l)(pd|ps) { const struct Tbl { bool isH; bool isPd; uint8_t code; } tbl[] = { { true, true, 0x16 }, { true, false, 0x16 }, { false, true, 0x12 }, { false, false, 0x12 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; char c = p.isH ? 'h' : 'l'; const char *suf = p.isPd ? "pd" : "ps"; const char *type = p.isPd ? "T_0F | T_66 | T_EVEX | T_EW1 | T_N8" : "T_0F | T_EVEX | T_EW0 | T_N8"; printf("void vmov%c%s(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, %s, 0x%02X); }\n" , c, suf, type, p.code); printf("void vmov%c%s(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s, 0x%02X); }\n" , c, suf, type, p.code + 1); } } // FMA { const struct Tbl { uint8_t code; const char *name; bool supportYMM; } tbl[] = { { 0x08, "vfmadd", true }, { 0x09, "vfmadd", false }, { 0x06, "vfmaddsub", true }, { 0x07, "vfmsubadd", true }, { 0x0A, "vfmsub", true }, { 0x0B, "vfmsub", false }, { 0x0C, "vfnmadd", true }, { 0x0D, "vfnmadd", false }, { 0x0E, "vfnmsub", true }, { 0x0F, "vfnmsub", false }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { for (int j = 0; j < 2; j++) { const char sufTbl[][2][8] = { { "pd", "ps" }, { "sd", "ss" }, }; for (int k = 0; k < 3; k++) { const struct Ord { const char *str; uint8_t code; } ord[] = { { "132", 0x90 }, { "213", 0xA0 }, { "231", 0xB0 }, }; int t = T_0F38 | T_66 | T_EVEX; t |= (j == 0) ? (T_W1 | T_EW1) : (T_W0 | T_EW0); if (tbl[i].supportYMM) t |= T_YMM; const std::string suf = sufTbl[tbl[i].supportYMM ? 0 : 1][j]; if (suf == "pd") { t |= T_B64; } else if (suf == "ps") { t |= T_B32; } else if (suf == "sd") { t |= T_ER_X | T_N8; } else { // ss t |= T_ER_X | T_N4; } std::string type = type2String(t); printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n" , tbl[i].name, ord[k].str, suf.c_str(), type.c_str(), tbl[i].code + ord[k].code); } } } } // FMA others { printf("void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }\n"); printf("void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }\n"); printf("void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }\n"); const struct Tbl { const char *name; uint8_t code; int type; bool ew1; } tbl[] = { { "vbroadcastss", 0x18, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, { "vpbroadcastb", 0x78, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N1 }, { "vpbroadcastw", 0x79, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N2 }, { "vpbroadcastd", 0x58, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_N4 }, { "vpbroadcastq", 0x59, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void %s(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code); } puts("void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }"); puts("void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }"); puts("void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }"); puts("void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }"); puts("void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }"); puts("void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }"); puts("void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }"); puts("void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }"); puts("void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }"); puts("void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }"); puts("void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }"); puts("void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }"); puts("void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }"); puts("void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }"); puts("void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }"); puts("void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }"); puts("void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }"); puts("void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }"); puts("void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }"); puts("void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); }"); } // (x, x, imm), (x, imm) { const struct Tbl { const char *name; uint8_t code; int idx; int type; } tbl[] = { { "pslldq", 0x73, 7, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, { "psrldq", 0x73, 3, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, { "psllw", 0x71, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, { "pslld", 0x72, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, { "psllq", 0x73, 6, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, { "psraw", 0x71, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, { "psrad", 0x72, 4, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, { "psrlw", 0x71, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX }, { "psrld", 0x72, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW0 | T_B32 }, { "psrlq", 0x73, 2, T_0F | T_66 | T_YMM | T_EVEX | T_MEM_EVEX | T_EW1 | T_B64 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; std::string type = type2String(p.type); printf("void v%s(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), %d), x, op, %s, 0x%02X, imm); }\n", p.name, p.idx, type.c_str(), p.code); } } // 4-op { const struct Tbl { const char *name; uint8_t code; } tbl[] = { { "vblendvpd", 0x4B }, { "vblendvps", 0x4A }, { "vpblendvb", 0x4C }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x%02X, x4.getIdx() << 4); }\n", p.name, p.code); } } // mov { printf("void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }\n"); printf("void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }\n"); printf("void vmovq(const Xmm& x, const Address& addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }\n"); printf("void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }\n"); printf("void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }\n"); printf("void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }\n"); printf("void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }\n"); printf("void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }\n"); printf("void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }\n"); puts("void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }"); puts("void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }"); puts("void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }"); puts("void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }"); // vmovsd, vmovss for (int i = 0; i < 2; i++) { char c1 = i == 0 ? 'd' : 's'; int type = T_0F | T_EVEX; type |= i == 0 ? (T_F2 | T_EW1 | T_N8) : (T_F3 | T_EW0 | T_N4); std::string s = type2String(type); printf("void vmovs%c(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", c1, s.c_str()); printf("void vmovs%c(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, %s | T_M_K, 0x11); }\n", c1, s.c_str()); } } // cvt { puts("void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }"); puts("void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }"); puts("void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }"); puts("void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }"); puts("void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); puts("void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }"); puts("void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }"); puts("void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }"); puts("void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }"); puts("void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }"); puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }"); puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }"); } // haswell gpr(reg, reg, r/m) { const struct Tbl { const char *name; int type; uint8_t code; } tbl[] = { { "andn", T_0F38, 0xF2 }, { "mulx", T_F2 | T_0F38, 0xF6 }, { "pdep", T_F2 | T_0F38, 0xF5 }, { "pext", T_F3 | T_0F38, 0xF5 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, %s, 0x%x, true); }\n", p.name, type2String(p.type).c_str(), p.code); } } // gpr(reg, r/m, reg) { const struct Tbl { const char *name; int type; uint8_t code; } tbl[] = { { "bextr", T_0F38, 0xF7 }, { "bzhi", T_0F38, 0xF5 }, { "sarx", T_0F38 | T_F3, 0xF7 }, { "shlx", T_0F38 | T_66, 0xF7 }, { "shrx", T_0F38 | T_F2, 0xF7 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, %s, 0x%x, false); }\n", p.name, type2String(p.type).c_str(), p.code); } puts("void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opGpr(r, op, Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }"); } // gpr(reg, r/m) { const struct Tbl { const char *name; int type; uint8_t code; uint8_t idx; } tbl[] = { { "blsi", T_0F38, 0xF3, 3 }, { "blsmsk", T_0F38, 0xF3, 2 }, { "blsr", T_0F38, 0xF3, 1 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %s(const Reg32e& r, const Operand& op) { opGpr(Reg32e(%d, r.getBit()), op, r, %s, 0x%x, false); }\n", p.name, p.idx, type2String(p.type).c_str(), p.code); } } // gather { const int y_vx_y = 0; const int y_vy_y = 1; const int x_vy_x = 2; const struct Tbl { const char *name; uint8_t code; int w; int mode; } tbl[] = { { "vgatherdpd", 0x92, 1, y_vx_y }, { "vgatherqpd", 0x93, 1, y_vy_y }, { "vgatherdps", 0x92, 0, y_vy_y }, { "vgatherqps", 0x93, 0, x_vy_x }, { "vpgatherdd", 0x90, 0, y_vy_y }, { "vpgatherqd", 0x91, 0, x_vy_x }, { "vpgatherdq", 0x90, 1, y_vx_y }, { "vpgatherqq", 0x91, 1, y_vy_y }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode); } } // vnni { const struct Tbl { uint8_t code; const char *name; int type; } tbl[] = { { 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, { 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; std::string type = type2String(p->type); printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code); } } } void put32() { put_jREGz("cx", true); put_jREGz("ecx", false); const GenericTbl tbl[] = { { "aaa", 0x37 }, { "aad", 0xD5, 0x0A }, { "aam", 0xD4, 0x0A }, { "aas", 0x3F }, { "daa", 0x27 }, { "das", 0x2F }, { "into", 0xCE }, { "popad", 0x61 }, { "popfd", 0x9D }, { "pusha", 0x60 }, { "pushad", 0x60 }, { "pushfd", 0x9C }, { "popa", 0x61 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); putLoadSeg("lds", 0xC5, NONE); putLoadSeg("les", 0xC4, NONE); } void put64() { put_jREGz("ecx", true); put_jREGz("rcx", false); const GenericTbl tbl[] = { { "cdqe", 0x48, 0x98 }, { "cqo", 0x48, 0x99 }, { "cmpsq", 0x48, 0xA7 }, { "popfq", 0x9D }, { "pushfq", 0x9C }, { "lodsq", 0x48, 0xAD }, { "movsq", 0x48, 0xA5 }, { "scasq", 0x48, 0xAF }, { "stosq", 0x48, 0xAB }, { "syscall", 0x0F, 0x05 }, { "sysret", 0x0F, 0x07 }, }; putGeneric(tbl, NUM_OF_ARRAY(tbl)); putMemOp("cmpxchg16b", 0, 1, 0x0F, 0xC7, 64); putMemOp("fxrstor64", 0, 1, 0x0F, 0xAE, 64); puts("void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }"); puts("void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }"); puts("void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }"); puts("void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, 0x3A); }"); puts("void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, 0x3A); }"); puts("void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }"); puts("void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }"); puts("void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }"); puts("void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }"); puts("void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }"); puts("void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }"); } void putAMX_TILE() { puts("void ldtilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_0F38 | T_W0, 0x49); }"); puts("void sttilecfg(const Address& addr) { opVex(tmm0, &tmm0, addr, T_66 | T_0F38 | T_W0, 0x49); }"); puts("void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2 | T_0F38 | T_W0, 0x4b); }"); puts("void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66 | T_0F38 | T_W0, 0x4b); }"); puts("void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); }"); puts("void tilestored(const Address& addr, const Tmm& tm) { opVex(tm, &tmm0, addr, T_F3 | T_0F38 | T_W0, 0x4b); }"); puts("void tilezero(const Tmm& Tmm) { opVex(Tmm, &tmm0, tmm0, T_F2 | T_0F38 | T_W0, 0x49); }"); } void putAMX_INT8() { puts("void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66 | T_0F38 | T_W0, 0x5e); }"); puts("void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38 | T_W0, 0x5e); }"); } void putAMX_BF16() { puts("void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3 | T_0F38 | T_W0, 0x5c); }"); } void putFixed() { puts("#ifdef XBYAK64"); put64(); putAMX_TILE(); putAMX_INT8(); putAMX_BF16(); puts("#else"); put32(); puts("#endif"); puts("#ifndef XBYAK_NO_OP_NAMES"); const char *tbl[] = { "and", "or", "xor", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; printf("void %s(const Operand& op1, const Operand& op2) { %s_(op1, op2); }\n", name, name); printf("void %s(const Operand& op, uint32_t imm) { %s_(op, imm); }\n", name, name); } puts("void not(const Operand& op) { not_(op); }"); puts("#endif"); } void putOmit() { puts("void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); }"); puts("void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); }"); puts("void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); }"); puts("void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); }"); puts("void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); }"); puts("void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); }"); { const char pred[32][16] = { "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord", "eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt", "true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s", "eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us" }; const char suf[][4] = { "pd", "ps", "sd", "ss" }; for (int i = 0; i < 4; i++) { const char *s = suf[i]; for (int j = 0; j < 32; j++) { printf("void vcmp%s%s(const Xmm& x, const Operand& op) { vcmp%s%s(x, x, op); }\n", pred[j], s, pred[j], s); } } } { const char *tbl[] = { "pslldq", "psrldq", "psllw", "pslld", "psllq", "psraw", "psrad", "psrlw", "psrld", "psrlq", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; printf("void v%s(const Xmm& x, uint8_t imm) { v%s(x, x, imm); }\n", name, name); } } { const char *tbl[] = { "vblendvpd", "vblendvps", "vpblendvb", }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const char *name = tbl[i]; printf("void %s(const Xmm& x1, const Operand& op, const Xmm& x4) { %s(x1, x1, op, x4); }\n", name, name); } } putX_X_XM(true); } int main(int argc, char *argv[]) { std::string mode = argc == 2 ? argv[1] : ""; if (mode == "") { put(); } else if (mode == "fixed") { putFixed(); } else { putOmit(); } } xbyak-6.02/gen/sortline.cpp000066400000000000000000000007041417465521000157010ustar00rootroot00000000000000#include #include #include #include typedef std::set StrSet; int main() { StrSet ss; std::string line; while (std::getline(std::cin, line)) { if (!line.empty() && line[line.size() - 1] == '\n') { line.resize(line.size() - 1); } if (!line.empty()) { ss.insert(line); } } for (StrSet::const_iterator i = ss.begin(), ie = ss.end(); i != ie; ++i) { std::cout << *i << std::endl; } } xbyak-6.02/gen/update.bat000066400000000000000000000010051417465521000153030ustar00rootroot00000000000000@echo off set OPT=/EHsc -I../ /W4 -D_CRT_SECURE_NO_WARNINGS set TARGET=..\\xbyak\\xbyak_mnemonic.h set SORT=sortline cl gen_code.cpp %OPT% gen_code | %SORT% > %TARGET% echo #ifdef XBYAK_ENABLE_OMITTED_OPERAND>> %TARGET% gen_code omit | %SORT% >> %TARGET% echo #endif>>%TARGET% gen_code fixed >> %TARGET% cl gen_avx512.cpp %OPT% echo #ifndef XBYAK_DISABLE_AVX512>> %TARGET% gen_avx512 | %SORT% >> %TARGET% echo #ifdef XBYAK64>> %TARGET% gen_avx512 64 | %SORT% >> %TARGET% echo #endif>> %TARGET% echo #endif>> %TARGET% xbyak-6.02/meson.build000066400000000000000000000022601417465521000147260ustar00rootroot00000000000000# SPDX-FileCopyrightText: 2021 Andrea Pappacoda # # SPDX-License-Identifier: BSD-3-Clause project( 'xbyak', 'cpp', version: '6.02', license: 'BSD-3-Clause', default_options: 'b_ndebug=if-release' ) install_subdir('xbyak', install_dir: get_option('includedir')) xbyak_dep = declare_dependency(include_directories: include_directories('.')) if meson.version().version_compare('>=0.54.0') meson.override_dependency(meson.project_name(), xbyak_dep) endif import('pkgconfig').generate( name: meson.project_name(), description: 'JIT assembler for x86(IA32), x64(AMD64, x86-64)', version: meson.project_version(), url: 'https://github.com/herumi/xbyak' ) if meson.version().version_compare('>=0.50.0') cmake = import('cmake') cmake.write_basic_package_version_file( name: meson.project_name(), version: meson.project_version() ) cmake_conf = configuration_data() cmake_conf.set('TARGET_NAME', meson.project_name() + '::' + meson.project_name()) cmake_conf.set('ABSOLUTE_INCLUDE_DIR', get_option('prefix')/get_option('includedir')) cmake.configure_package_config_file( name: meson.project_name(), input: 'cmake'/'meson-config.cmake.in', configuration: cmake_conf ) endif xbyak-6.02/readme.md000066400000000000000000000616441417465521000143560ustar00rootroot00000000000000[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml) #lXbyak 6.02 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. The pronunciation of Xbyak is `kÉ™i-bja-k`. It is named from a Japanese word [é–‹é—¢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world. ## Feature * header file only * Intel/MASM like syntax * fully support AVX-512 **Note**: Use `and_()`, `or_()`, ... instead of `and()`, `or()`. If you want to use them, then specify `-fno-operator-names` option to gcc/clang. ### News - strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error. - define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd. - add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump. - vnni instructions such as vpdpbusd supports vex encoding. - (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. - (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. - support exception-less mode see. [Exception-less mode](#exception-less-mode) - `XBYAK_USE_MMAP_ALLOCATOR` will be defined on Linux/macOS unless `XBYAK_DONT_USE_MMAP_ALLOCATOR` is defined. ### Supported OS * Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit) * Linux(32bit, 64bit) * Intel macOS ### Supported Compilers Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin. ## Install The following files are necessary. Please add the path to your compile directory. * xbyak.h * xbyak_mnemonic.h * xbyak_util.h Linux: ``` make install ``` These files are copied into `/usr/local/include/xbyak`. ## How to use it Inherit `Xbyak::CodeGenerator` class and make the class method. ``` #include struct Code : Xbyak::CodeGenerator { Code(int x) { mov(eax, x); ret(); } }; ``` Or you can pass the instance of CodeGenerator without inheriting. ``` void genCode(Xbyak::CodeGenerator& code, int x) { using namespace Xbyak::util; code.mov(eax, x); code.ret(); } ``` Make an instance of the class and get the function pointer by calling `getCode()` and call it. ``` Code c(5); int (*f)() = c.getCode(); printf("ret=%d\n", f()); // ret = 5 ``` ## Syntax Similar to MASM/NASM syntax with parentheses. ``` NASM Xbyak mov eax, ebx --> mov(eax, ebx); inc ecx inc(ecx); ret --> ret(); ``` ## Addressing Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory, otherwise use `ptr`. ``` (ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement] [rip + 32bit disp] ; x64 only NASM Xbyak mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]); mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]); test byte [esp], 4 --> test(byte [esp], 4); inc qword [rax] --> inc(qword [rax]); ``` **Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type. ### How to use Selector (Segment Register) ``` mov eax, [fs:eax] --> putSeg(fs); mov(eax, ptr [eax]); mov ax, cs --> mov(ax, cs); ``` **Note**: Segment class is not derived from `Operand`. ## AVX ``` vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3 vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3); ``` **Note**: If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility. But the newer version will not support it. ``` vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3 ``` ## AVX-512 ``` vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30); vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]); vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]); vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2); vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2); vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae); vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary. vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5); vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]); vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]); vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]); vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]); vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]); vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4); vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword vcvtpd2dq(xmm16, ptr [eax+33]); // default xword vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]); vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256 vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512 vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding ``` ### Remark * `k1`, ..., `k7` are opmask registers. - `k0` is dealt as no mask. - e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`. * use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively. * `k4 | k3` is different from `k3 | k4`. * use `ptr_b` for broadcast `{1toX}`. X is automatically determined. * specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary. ## Label Two kinds of Label are supported. (String literal and Label class). ### String literal ``` L("L1"); jmp("L1"); jmp("L2"); ... a few mnemonics (8-bit displacement jmp) ... L("L2"); jmp("L3", T_NEAR); ... a lot of mnemonics (32-bit displacement jmp) ... L("L3"); ``` * Call `hasUndefinedLabel()` to verify your code has no undefined label. * you can use a label for immediate value of mov like as `mov(eax, "L2")`. ### Support `@@`, `@f`, `@b` like MASM ``` L("@@"); // jmp("@b"); // jmp to jmp("@f"); // jmp to L("@@"); // jmp("@b"); // jmp to mov(eax, "@b"); jmp(eax); // jmp to ``` ### Local label Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()` are treated as a local label. `inLocalLabel()` and `outLocalLabel()` can be nested. ``` void func1() { inLocalLabel(); L(".lp"); // ; local label ... jmp(".lp"); // jmp to L("aaa"); // global label outLocalLabel(); inLocalLabel(); L(".lp"); // ; local label func1(); jmp(".lp"); // jmp to inLocalLabel(); jmp("aaa"); // jmp to } ``` ### short and long jump Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified. So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error. ``` jmp("short-jmp"); // short jmp // small code L("short-jmp"); jmp("long-jmp"); // long code L("long-jmp"); // throw exception ``` Then specify T_NEAR for jmp. ``` jmp("long-jmp", T_NEAR); // long jmp // long code L("long-jmp"); ``` Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR. ``` jmp("long-jmp"); // long jmp // long code L("long-jmp"); ``` ### Label class `L()` and `jxx()` support Label class. ``` Xbyak::Label label1, label2; L(label1); ... jmp(label1); ... jmp(label2); ... L(label2); ``` Use `putL` for jmp table ``` Label labelTbl, L0, L1, L2; mov(rax, labelTbl); // rdx is an index of jump table jmp(ptr [rax + rdx * sizeof(void*)]); L(labelTbl); putL(L0); putL(L1); putL(L2); L(L0); .... L(L1); .... ``` `assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel. ``` Label label2; Label label1 = L(); // make label1 ; same to Label label1; L(label1); ... jmp(label2); // label2 is not determined here ... assignL(label2, label1); // label2 <- label1 ``` The `jmp` in the above code jumps to label1 assigned by `assignL`. **Note**: * srcLabel must be used in `L()`. * dstLabel must not be used in `L()`. `Label::getAddress()` returns the address specified by the label instance and 0 if not specified. ``` // not AutoGrow mode Label label; assert(label.getAddress() == 0); L(label); assert(label.getAddress() == getCurr()); ``` ### Rip ; relative addressing ``` Label label; mov(eax, ptr [rip + label]); // eax = 4 ... L(label); dd(4); ``` ``` int x; ... mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB ``` ## Far jump Use `word|dword|qword` instead of `ptr` to specify the address size. ### 32 bit mode ``` jmp(word[eax], T_FAR); // jmp m16:16(FF /5) jmp(dword[eax], T_FAR); // jmp m16:32(FF /5) ``` ### 64 bit mode ``` jmp(word[rax], T_FAR); // jmp m16:16(FF /5) jmp(dword[rax], T_FAR); // jmp m16:32(FF /5) jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5) ``` The same applies to `call`. ## Code size The default max code size is 4096 bytes. Specify the size in constructor of `CodeGenerator()` if necessary. ``` class Quantize : public Xbyak::CodeGenerator { public: Quantize() : CodeGenerator(8192) { } ... }; ``` ## User allocated memory You can make jit code on prepared memory. Call `setProtectModeRE` yourself to change memory mode if using the prepared memory. ``` uint8_t alignas(4096) buf[8192]; // C++11 or later struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(sizeof(buf), buf) { mov(rax, 123); ret(); } }; int main() { Code c; c.setProtectModeRE(); // set memory to Read/Exec printf("%d\n", c.getCode()()); } ``` **Note**: See [sample/test0.cpp](sample/test0.cpp). ### AutoGrow The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`. Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address. ``` struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(, Xbyak::AutoGrow) { ... } }; Code c; // generate code for jit c.ready(); // mode = Read/Write/Exec ``` **Note**: * Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address. ### Read/Exec mode Xbyak set Read/Write/Exec mode to memory to run jit code. If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and call `setProtectModeRE()` after generating jit code. ``` struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) { mov(eax, 123); ret(); } }; Code c; c.setProtectModeRE(); ... ``` Call `readyRE()` instead of `ready()` when using `AutoGrow` mode. See [protect-re.cpp](sample/protect-re.cpp). ## Exception-less mode If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`. In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong. The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`. `CodeGenerator::reset()` calls `ClearError()`. ## Macro * **XBYAK32** is defined on 32bit. * **XBYAK64** is defined on 64bit. * **XBYAK64_WIN** is defined on 64bit Windows(VC). * **XBYAK64_GCC** is defined on 64bit gcc, cygwin. * define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, .... * define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future). * define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro. * define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`. * define **XBYAK_USE_MEMFD** on Linux then /proc/self/maps shows the area used by xbyak. * define **XBYAK_OLD_DISP_CHECK** if the old disp check is necessary (deprecated in the future). ## Sample * [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64) * [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only) * [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64) * [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64) ## License modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History * 2021/Sep/14 ver 6.00 fully support AVX512-FP16 * 2021/Sep/09 ver 5.997 fix vrndscale* to support {sae} * 2021/Sep/03 ver 5.996 fix v{add,sub,mul,div,max,min}{sd,ss} to support T_rd_sae. * 2021/Aug/15 ver 5.995 add a label to /proc/self/maps if XBYAK_USE_MEMFD is defined on Linux * 2021/Jun/17 ver 5.994 add alias of vcmpXX{ps,pd,ss,sd} with mask register * 2021/Jun/06 ver 5.993 strict check of gather/scatter register combination * 2021/May/09 ver 5.992 support endbr32 and endbr64 * 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14 * 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito) * 2020/Oct/17 ver 5.98 support the form of [scale * reg] * 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc. * 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later * 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`. * 2020/Jul/28 ver 5.94 remove #include (only windows) * 2020/Jul/21 ver 5.93 support exception-less mode * 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov) * 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64 * 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso) * 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined. * 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask) * 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register * 2020/Feb/26 ver 5.891 fix typo of type * 2020/Jan/03 ver 5.89 fix error of vfpclasspd * 2019/Dec/20 ver 5.88 fix compile error on Windows * 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified. * 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available) * 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later * 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined * 2019/Oct/12 ver 5.83 exit(1) was removed * 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam) * 2019/Sep/14 ver 5.81 support some generic mnemonics. * 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov) * 2019/May/27 support vp2intersectd, vp2intersectq (not tested) * 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps * 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky) * 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage) * 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov * 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel * 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility * 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed * 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed * 2018/Oct/15 util::AddressFrame uses push/pop instead of mov * 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8) * 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) * 2018/Sep/04 ver 5.71 L() returns a new label instance * 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting * 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday) * 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm * 2018/Jul/26 ver 5.661 support mingw64 * 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect() * 2018/Jun/26 ver 5.65 fix push(qword [mem]) * 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu * 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem) * 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso * 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it) * 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace * 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf) * 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix * 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage) * 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen) * 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan) * 2017/Aug/08 ver 5.45 add sha(thanks to magurosan) * 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso) * 2017/Jul/12 ver 5.432 reduce warnings of PVS studio * 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar) * 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed) * 2017/May/13 ver 5.42 add movs{b,w,d,q} * 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso) * 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label * 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso) * 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N * 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro) * 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW * 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso) * 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso) * 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38) * 2016/Nov/20 ver 5.10 add addressing [rip+&var] * 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio) * 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h * 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4 * 2016/Aug/03 ver 5.01 disable omitted operand * 2016/Jun/24 ver 5.00 support avx-512 instruction set * 2016/Jun/13 avx-512 add mask instructions * 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu * 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp) * 2016/Feb/04 ver 4.90 add jcc(const void *addr); * 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell) * 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere) * 2015/Oct/05 ver 4.87 support segment selectors * 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere) * 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen) * 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff) * 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik) * 2015/May/24 ver 4.82 support detection of F16C * 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere) * 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere) * 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac * 2014/Oct/14 ver 4.70 support MmapAllocator * 2014/Jun/13 ver 4.62 disable warning of VC2014 * 2014/May/30 ver 4.61 support bt, bts, btr, btc * 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph * 2014/Apr/11 ver 4.52 add detection of rdrand * 2014/Mar/25 ver 4.51 remove state information of unreferenced labels * 2014/Mar/16 ver 4.50 support new Label * 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox * 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64() * 2013/Oct/16 ver 4.21 label support std::string * 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64) * 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class * 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label * 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest). * 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions * 2013/Mar/27 ver 3.80 support mov(reg, "label"); * 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz() * 2013/Jan/15 ver 3.75 add setSize() to modify generated code * 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect() * 2013/Jan/06 ver 3.73 use unordered_map if possible * 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const. * 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined. * 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util. * 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias) * 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit * 2012/Nov/01 ver 3.61 add fldcw/fstcw * 2012/May/03 ver 3.60 change interface of Allocator * 2012/Mar/23 ver 3.51 fix userPtr mode * 2012/Mar/19 ver 3.50 support AutoGrow mode * 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd * 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat) * 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya) * 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc * 2011/May/24 ver 3.01 fix typo of OSXSAVE * 2011/May/23 ver 3.00 add vcmpeqps and so on * 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) * 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe * 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm * 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) * 2011/Feb/04 ver 2.99 beta support AVX * 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp * 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist * 2010/Jun/07 ver 2.29 fix call( jmp("@b"); // jmp to jmp("@f"); // jmp to L("@@"); // jmp("@b"); // jmp to mov(eax, "@b"); jmp(eax); // jmp to 2. ラベルã®å±€æ‰€åŒ– ピリオドã§å§‹ã¾ã‚‹ãƒ©ãƒ™ãƒ«ã‚’inLocalLabel(), outLocalLabel()ã§æŒŸã‚€ã“ã¨ã§å±€æ‰€åŒ–ã§ãã¾ã™ã€‚ inLocalLabel(), outLocalLabel()ã¯å…¥ã‚Œå­ã«ã™ã‚‹ã“ã¨ãŒã§ãã¾ã™ã€‚ void func1() { inLocalLabel(); L(".lp"); // ; ローカルラベル ... jmp(".lp"); // jmpt to L("aaa"); // グローãƒãƒ«ãƒ©ãƒ™ãƒ« outLocalLabel(); } void func2() { inLocalLabel(); L(".lp"); // ; ローカルラベル func1(); jmp(".lp"); // jmp to outLocalLabel(); } 上記サンプルã§ã¯inLocalLabel(), outLocalLabel()ãŒç„¡ã„ã¨ã€ ".lp"ラベルã®äºŒé‡å®šç¾©ã‚¨ãƒ©ãƒ¼ã«ãªã‚Šã¾ã™ã€‚ 3. æ–°ã—ã„Labelクラスã«ã‚ˆã‚‹ã‚¸ãƒ£ãƒ³ãƒ—命令 ジャンプ先を文字列ã«ã‚ˆã‚‹æŒ‡å®šã ã‘ã§ãªãラベルクラスを使ãˆã‚‹ã‚ˆã†ã«ãªã‚Šã¾ã—ãŸã€‚ Label label1, label2; L(label1); ... jmp(label1); ... jmp(label2); ... L(label2); æ›´ã«ãƒ©ãƒ™ãƒ«ã®å‰²ã‚Šå½“ã¦ã‚’行ã†assignL(dstLabel, srcLabel)ã¨ã„ã†å‘½ä»¤ã‚‚追加ã•れã¾ã—ãŸã€‚ Label label2; Label label1 = L(); // Label label1; L(label1);ã¨åŒã˜æ„味 ... jmp(label2); ... assignL(label2, label1); 上記jmp命令ã¯label1ã«ã‚¸ãƒ£ãƒ³ãƒ—ã—ã¾ã™ã€‚ åˆ¶é™ * srcLabelã¯L()ã«ã‚ˆã‚Šé£›ã³å…ˆãŒç¢ºå®šã—ã¦ã„ãªã„ã¨ã„ã‘ã¾ã›ã‚“。 * dstLabelã¯L()ã«ã‚ˆã‚Šé£›ã³å…ˆãŒç¢ºå®šã—ã¦ã„ã¦ã¯ã„ã‘ã¾ã›ã‚“。 ラベルã¯`getAddress()`ã«ã‚ˆã‚Šãã®ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’å–å¾—ã§ãã¾ã™ã€‚ 未定義ã®ã¨ãã¯0ãŒè¿”りã¾ã™ã€‚ ``` // not AutoGrow mode Label label; assert(label.getAddress(), 0); L(label); assert(label.getAddress(), getCurr()); ``` 4. farジャンプ `jmp(mem, T_FAR)`, `call(mem, T_FAR)`, `retf()`をサãƒãƒ¼ãƒˆã—ã¾ã™ã€‚ サイズを明示ã™ã‚‹ãŸã‚ã«`ptr`ã®ä»£ã‚りã«`word|dword|qword`を利用ã—ã¦ãã ã•ã„。 32bit ``` jmp(word[eax], T_FAR); // jmp m16:16(FF /5) jmp(dword[eax], T_FAR); // jmp m16:32(FF /5) ``` 64bit ``` jmp(word[rax], T_FAR); // jmp m16:16(FF /5) jmp(dword[rax], T_FAR); // jmp m16:32(FF /5) jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5) ``` ・Xbyak::CodeGenerator()コンストラクタインタフェース @param maxSize [in] ã‚³ãƒ¼ãƒ‰ç”Ÿæˆæœ€å¤§ã‚µã‚¤ã‚º(デフォルト4096byte) @param userPtr [in] ユーザ指定メモリ CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0); デフォルトコードサイズã¯4096(=DEFAULT_MAX_CODE_SIZE)ãƒã‚¤ãƒˆã§ã™ã€‚ ãれより大ããªã‚³ãƒ¼ãƒ‰ã‚’生æˆã™ã‚‹å ´åˆã¯CodeGenerator()ã®ã‚³ãƒ³ã‚¹ãƒˆãƒ©ã‚¯ã‚¿ã«æŒ‡å®šã—ã¦ãã ã•ã„。 class Quantize : public Xbyak::CodeGenerator { public: Quantize() : CodeGenerator(8192) { } ... }; ã¾ãŸãƒ¦ãƒ¼ã‚¶æŒ‡å®šãƒ¡ãƒ¢ãƒªã‚’ã‚³ãƒ¼ãƒ‰ç”Ÿæˆæœ€å¤§ã‚µã‚¤ã‚ºã¨å…±ã«æŒ‡å®šã™ã‚‹ã¨ã€CodeGenerator㯠指定ã•れãŸãƒ¡ãƒ¢ãƒªä¸Šã«ãƒã‚¤ãƒˆåˆ—を生æˆã—ã¾ã™ã€‚ 補助関数ã¨ã—ã¦æŒ‡å®šã•れãŸã‚¢ãƒ‰ãƒ¬ã‚¹ã®å®Ÿè¡Œå±žæ€§ã‚’変更ã™ã‚‹CodeArray::protect()㨠与ãˆã‚‰ã‚ŒãŸãƒã‚¤ãƒ³ã‚¿ã‹ã‚‰ã‚¢ãƒ©ã‚¤ãƒ¡ãƒ³ãƒˆã•れãŸãƒã‚¤ãƒ³ã‚¿ã‚’å–å¾—ã™ã‚‹CodeArray::getAlignedAddress() も用æ„ã—ã¾ã—ãŸã€‚詳細ã¯sample/test0.cppã®use memory allocated by userã‚’å‚考㫠ã—ã¦ãã ã•ã„。 /** change exec permission of memory @param addr [in] buffer address @param size [in] buffer size @param canExec [in] true(enable to exec), false(disable to exec) @return true(success), false(failure) */ bool CodeArray::protect(const void *addr, size_t size, bool canExec); /** get aligned memory pointer */ uint8 *CodeArray::getAlignedAddress(uint8 *addr, size_t alignedSize = ALIGN_SIZE); ・read/execモード デフォルトã®CodeGeneratorã¯ã‚³ãƒ³ã‚¹ãƒˆãƒ©ã‚¯ãƒˆæ™‚ã«JIT用ã®é ˜åŸŸã‚’read/write/execモードã«è¨­å®šã—ã¦åˆ©ç”¨ã—ã¾ã™ã€‚ ã‚³ãƒ¼ãƒ‰ç”Ÿæˆæ™‚ã¯read/writeã§ã‚³ãƒ¼ãƒ‰å®Ÿè¡Œæ™‚ã«ã¯read/execã«ã—ãŸã„å ´åˆã€æ¬¡ã®ã‚ˆã†ã«ã—ã¦ãã ã•ã„。 struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096, Xbyak::DontUseProtect) // JIT領域をread/writeã®ã¾ã¾ã‚³ãƒ¼ãƒ‰ç”Ÿæˆ { mov(eax, 123); ret(); } }; Code c; c.setProtectModeRE(); // read/execモードã«å¤‰æ›´ // JIT領域を実行 AutoGrowã®å ´åˆã¯readyã®ä»£ã‚りã«readyRE()を読んã§ãã ã•ã„。 struct Code : Xbyak::CodeGenerator { Code() : Xbyak::CodeGenerator(4096, Xbyak::AutoGrow) // JIT領域をread/writeã®ã¾ã¾ã‚³ãƒ¼ãƒ‰ç”Ÿæˆ { mov(eax, 123); ret(); } }; Code c; c.readyRE(); // read/exeモードã«å¤‰æ›´ // JIT領域を実行 setProtectModeRW()を呼ã¶ã¨é ˜åŸŸãŒå…ƒã®read/execãƒ¢ãƒ¼ãƒ‰ã«æˆ»ã‚Šã¾ã™ã€‚ ãã®ä»–詳細ã¯å„種サンプルをå‚ç…§ã—ã¦ãã ã•ã„。 ----------------------------------------------------------------------------- ◎マクロ 32bit環境上ã§ã‚³ãƒ³ãƒ‘イルã™ã‚‹ã¨XBYAK32ãŒã€64bit環境上ã§ã‚³ãƒ³ãƒ‘イルã™ã‚‹ã¨XBYAK64㌠定義ã•れã¾ã™ã€‚ã•らã«64bit環境上ã§ã¯Windows(VC)ãªã‚‰XBYAK64_WINã€cygwin, gcc上ã§ã¯ XBYAK64_GCCãŒå®šç¾©ã•れã¾ã™ã€‚ ----------------------------------------------------------------------------- ◎使用例 test0.cpp ; ç°¡å˜ãªä¾‹(x86, x64) quantize.cpp ; 割り算ã®JITアセンブルã«ã‚ˆã‚‹é‡å­åŒ–ã®é«˜é€ŸåŒ–(x86) calc.cpp ; 与ãˆã‚‰ã‚ŒãŸå¤šé …å¼ã‚’アセンブルã—ã¦å®Ÿè¡Œ(x86, x64) boost(http://www.boost.org/)ãŒå¿…è¦ bf.cpp ; JIT Brainfuck(x86, x64) ----------------------------------------------------------------------------- ◎ライセンス 修正ã•ã‚ŒãŸæ–°ã—ã„BSDライセンスã«å¾“ã„ã¾ã™ã€‚ http://opensource.org/licenses/BSD-3-Clause sample/{echo,hello}.bf㯠http://www.kmonos.net/alang/etc/brainfuck.php ã‹ã‚‰ ã„ãŸã ãã¾ã—ãŸã€‚ ----------------------------------------------------------------------------- ◎履歴 2021/09/14 ver 6.00 AVX512-FP16を完全サãƒãƒ¼ãƒˆ 2021/09/09 ver 5.997 vrndscale*ã‚’{sae}をサãƒãƒ¼ãƒˆã™ã‚‹ã‚ˆã†ä¿®æ­£ 2021/09/03 ver 5.996 v{add,sub,mul,div,max,min}{sd,ss}ã‚’T_rd_saeãªã©ã‚’サãƒãƒ¼ãƒˆã™ã‚‹ã‚ˆã†ä¿®æ­£ 2021/08/15 ver 5.995 Linux上ã§XBYAK_USE_MEMFDãŒå®šç¾©ã•れãŸãªã‚‰/proc/self/mapsã«ãƒ©ãƒ™ãƒ«è¿½åŠ  2021/06/17 ver 5.994 マスクレジスタ用ã®vcmpXX{ps,pd,ss,sd}ã®alias追加 2021/06/06 ver 5.993 gather/scatterã®ãƒ¬ã‚¸ã‚¹ã‚¿ã®çµ„ã¿åˆã‚ã›ã®å޳坆ãªãƒã‚§ãƒƒã‚¯ 2021/05/09 ver 5.992 endbr32ã¨endbr64ã®ã‚µãƒãƒ¼ãƒˆ 2020/11/16 ver 5.991 g++-5ã®C++14ã§constexpræ©Ÿèƒ½ã®æŠ‘åˆ¶ 2020/10/19 ver 5.99 VNNI命令サãƒãƒ¼ãƒˆ(Thanks to akharito) 2020/10/17 ver 5.98 [scale * reg]ã®ã‚µãƒãƒ¼ãƒˆ 2020/09/08 ver 5.97 uint32ãªã©ã‚’uint32_tã«ç½®æ› 2020/08/28 ver 5.95 レジスタクラスã®ã‚³ãƒ³ã‚¹ãƒˆãƒ©ã‚¯ã‚¿ãŒconstexprã«å¯¾å¿œ(C++14以é™) 2020/08/04 ver 5.941 `CodeGenerator::reset()`ãŒ`ClearError()`を呼ã¶ã‚ˆã†ã«å¤‰æ›´ 2020/07/28 ver 5.94 #include ã®å‰Šé™¤ (only windows) 2020/07/21 ver 5.93 例外ãªã—モード追加 2020/06/30 ver 5.92 Intel AMX命令サãƒãƒ¼ãƒˆ (Thanks to nshustrov) 2020/06/19 ver 5.913 32ビット環境ã§XBYAK64を定義ã—ãŸã¨ãã®mov(r64, imm64)を修正 2020/06/19 ver 5.912 macOSã®å¤ã„Xcodeã§ã‚‚MAP_JITを有効ã«ã™ã‚‹(Thanks to rsdubtso) 2020/05/10 ver 5.911 Linux/macOSã§XBYAK_USE_MMAP_ALLOCATORãŒãƒ‡ãƒ•ォルト有効ã«ãªã‚‹ 2020/04/20 ver 5.91 マスクレジスタk0ã‚’å—ã‘入れる(マスクをã—ãªã„) 2020/04/09 ver 5.90 kmov{b,w,d,q}ãŒã‚µãƒãƒ¼ãƒˆã•れãªã„レジスタをå—ã‘ã‚‹ã¨ä¾‹å¤–を投ã’ã‚‹ 2020/02/26 ver 5.891 zm0ã®type修正 2020/01/03 ver 5.89 vfpclasspdã®å‡¦ç†ã‚¨ãƒ©ãƒ¼ä¿®æ­£ 2019/12/20 ver 5.88 Windowsã§ã®ã‚³ãƒ³ãƒ‘イルエラー修正 2019/12/19 ver 5.87 未定義ラベルã¸ã®jmp命令ã®ãƒ‡ãƒ•ォルト挙動をT_NEARã«ã™ã‚‹setDefaultJmpNEAR()を追加 2019/12/13 ver 5.86 [変更] -fno-operator-namesãŒæŒ‡å®šã•れãŸã¨ãã¯5.84以å‰ã®æŒ™å‹•ã«æˆ»ã™ 2019/12/07 ver 5.85 mmapã«MAP_JITフラグを追加(macOS mojave以上) 2019/11/29 ver 5.84 [変更] XBYAK_USE_OP_NAMESãŒå®šç¾©ã•れã¦ã„ãªã„é™ã‚ŠXBYAK_NO_OP_NAMESãŒå®šç¾©ã•れるよã†ã«å¤‰æ›´ 2019/10/12 ver 5.83 exit(1)ã®é™¤åŽ» 2019/09/23 ver 5.82 monitorx, mwaitx, clzero対応 (thanks to MagurosanTeam) 2019/09/14 ver 5.81 ã„ãã¤ã‹ã®ä¸€èˆ¬å‘½ä»¤ã‚’サãƒãƒ¼ãƒˆ 2019/08/01 ver 5.802 AVX512_BF16判定修正 (thanks to vpirogov) 2019/05/27 support vp2intersectd, vp2intersectq (not tested) 2019/05/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps 2019/04/27 ver 5.79 vcmppd/vcmppsã®ptr_b対応忘れ(thanks to jkopinsky) 2019/04/15 ver 5.78 Reg::changeBit()ã®ãƒªãƒ•ァクタリング(thanks to MerryMage) 2019/03/06 ver 5.77 LLCキャッシュを共有数CPUæ•°ã®ä¿®æ•´(by densamoilov) 2019/01/17 ver 5.76 Cpu::getNumCores()追加(by shelleygoel) 2018/10/31 ver 5.751 äº’æ›æ€§ã®ãŸã‚ã«Xbyak::CastToã®å¾©å…ƒ 2018/10/29 ver 5.75 LabelManagerã®ãƒ‡ã‚¹ãƒˆãƒ©ã‚¯ã‚¿ã§Labelã‹ã‚‰å‚照を切り離㙠2018/10/21 ver 5.74 RegRip +/intã®å½¢ã‚’サãƒãƒ¼ãƒˆ Xbyak::CastToを削除 2018/10/15 util::StackFrameã§movã®ä»£ã‚りã«push/popを使ㆠ2018/09/19 ver 5.73 vpslld, vpslldq, vpsllwãªã©ã®(reg, mem, imm8)ã«å¯¾ã™ã‚‹evexエンコーディング修整 2018/09/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday) 2018/08/27 ver 5.71 æ–°ã—ã„labelインスタンスを返ã™L()を追加 2018/08/27 ver 5.70 read/exec設定ã®ãŸã‚ã®setProtectMode()ã¨DontUseProtectã®è¿½åŠ  2018/08/24 ver 5.68 indexãŒ16以上ã®VSIBエンコーディングã®ãƒã‚°ä¿®æ­£(thanks to petercaday) 2018/08/14 ver 5.67 Addressクラス内ã®mutableを削除 ; fix setCacheHierarchy for cloud vm 2018/07/26 ver 5.661 mingw64対応 2018/07/24 ver 5.66 protect()ã®modeã«CodeArray::PROTECT_REを追加 2018/06/26 ver 5.65 fix push(qword [mem]) 2018/03/07 ver 5.64 Cpu()ã®ä¸­ã§zero divisionãŒå‡ºã‚‹ã“ã¨ãŒã‚ã‚‹ã®ã‚’修正 2018/02/14 ver 5.63 Cpu::setCacheHierarchy()ã®ä¿®æ­£ã¨clang<3.9ã®ãŸã‚ã®EvexModifierZero修正(thanks to mgouicem) 2018/02/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso 2018/02/07 ver 5.61 vmov*ãŒmem{k}{z}å½¢å¼å¯¾å¿œ(忘れã¦ãŸ) 2018/01/24 ver 5.601 xword, ywordãªã©ã‚’Xbyak::utilåå‰ç©ºé–“ã«è¿½åŠ  2018/01/05 ver 5.60 Ice lake系命令対応(319433-030.pdf) 2017/08/22 ver 5.53 mpxエンコーディングãƒã‚°ä¿®æ­£, bnd()プレフィクス追加 2017/08/18 ver 5.52 align修正(thanks to MerryMage) 2017/08/17 ver 5.51 multi-byte nop追加 align()ã¯ãれを使用ã™ã‚‹(thanks to inolen) 2017/08/08 ver 5.50 mpx追加(thanks to magurosan) 2017/08/08 ver 5.45 sha追加(thanks to magurosan) 2017/08/08 ver 5.44 prefetchw追加(thanks to rsdubtso) 2017/07/12 ver 5.432 PVS-studioã®è­¦å‘Šã‚’減ら㙠2017/07/09 ver 5.431 hasRex()修正 (影響ãªã—) (thanks to drillsar) 2017/05/14 ver 5.43 CodeGenerator::resetSize()修正(thanks to gibbed) 2017/05/13 ver 5.42 movs{b,w,d,q}追加 2017/01/26 ver 5.41 prefetcwt1追加ã¨scale == 0対応(thanks to rsdubtso) 2016/12/14 ver 5.40 LabelãŒç¤ºã™ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’å–å¾—ã™ã‚‹Label::getAddress()追加 2016/12/07 ver 5.34 disp8N時ã®è² ã®ã‚ªãƒ•セット処ç†ã®ä¿®æ­£(thanks to rsdubtso) 2016/12/06 ver 5.33 disp8N時ã®vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w}ã®ãƒã‚°ä¿®æ­£ 2016/12/01 ver 5.32 clang for Visual Studioサãƒãƒ¼ãƒˆã®ãŸã‚ã«__xgetbv()ã‚’_xgetbv()ã«å¤‰æ›´(thanks to freiro) 2016/11/27 ver 5.31 AVX512_4VNNIã‚’AVX512_4VNNIWã«å¤‰æ›´ 2016/11/27 ver 5.30 AVX512_4VNNI, AVX512_4FMAPS命令ã®è¿½åŠ (thanks to rsdubtso) 2016/11/26 ver 5.20 AVX512_4VNNIã¨AVX512_4FMAPSã®åˆ¤å®šè¿½åŠ (thanks to rsdubtso) 2016/11/20 ver 5.11 ä½•æ•…ã‹æ¶ˆãˆã¦ã„ãŸvptest for ymm追加(thanks to gregory38) 2016/11/20 ver 5.10 [rip+&var]ã®å½¢ã®ã‚¢ãƒ‰ãƒ¬ãƒƒã‚·ãƒ³ã‚°è¿½åŠ  2016/09/29 ver 5.03 ERR_INVALID_OPMASK_WITH_MEMORYã®åˆ¤å®šãƒŸã‚¹ä¿®æ­£(thanks to PVS-Studio) 2016/08/15 ver 5.02 xbyak_bin2hex.hã‚’includeã—ãªã„ 2016/08/15 ver 5.011 gcc 5.4ã®ãƒãƒ¼ã‚¸ãƒ§ãƒ³å–得ミスã®ä¿®æ­£ 2016/08/03 ver 5.01 AVXã®çœç•¥è¡¨è¨˜éžã‚µãƒãƒ¼ãƒˆ 2016/07/24 ver 5.00 avx-512フルサãƒãƒ¼ãƒˆ 2016/06/13 avx-512 opmask命令サãƒãƒ¼ãƒˆ 2016/05/05 ver 4.91 AVX-512å‘½ä»¤ã®æ¤œå‡ºã‚µãƒãƒ¼ãƒˆ 2016/03/14 ver 4.901 ready()関数ã«ã‚³ãƒ¡ãƒ³ãƒˆåŠ ç­†(thanks to skmp) 2016/02/04 ver 4.90 æ¡ä»¶åˆ†å²å‘½ä»¤ã«jcc(const void *addr);ã®ã‚¿ã‚¤ãƒ—を追加 2016/01/30 ver 4.89 vpblendvbãŒymmレジスタをサãƒãƒ¼ãƒˆã—ã¦ã„ãªã‹ã£ãŸ(thanks to John Funnell) 2016/01/24 ver 4.88 lea, cmovã®16bitレジスタ対応(thanks to whyisthisfieldhere) 2015/08/16 ver 4.87 セグメントセレクタã«å¯¾å¿œ 2015/08/16 ver 4.86 [rip + label]アドレッシングã§å³å€¤ã‚’使ã†ã¨å£Šã‚Œã‚‹(thanks to whyisthisfieldhere) 2015/08/10 ver 4.85 Address::operator==()ãŒé–“é•ã£ã¦ã„ã‚‹(thanks to inolen) 2015/07/22 ver 4.84 call()ãŒvariadic template対応 2015/05/24 ver 4.83 mobveサãƒãƒ¼ãƒˆ(thanks to benvanik) 2015/05/24 ver 4.82 F16CãŒä½¿ãˆã‚‹ã‹ã©ã†ã‹ã®åˆ¤å®šè¿½åŠ  2015/04/25 ver 4.81 setSizeãŒä¾‹å¤–を投ã’ã‚‹æ¡ä»¶ã‚’修正(thanks to whyisthisfieldhere) 2015/04/22 ver 4.80 rip相対ã§Labelã®ã‚µãƒãƒ¼ãƒˆ(thanks to whyisthisfieldhere) 2015/01/28 ver 4.71 adcx, adox, cmpxchg, rdseed, stacã®ã‚µãƒãƒ¼ãƒˆ 2014/10/14 ver 4.70 MmapAllocatorã®ã‚µãƒãƒ¼ãƒˆ 2014/06/13 ver 4.62 VC2014ã§è­¦å‘ŠæŠ‘åˆ¶ 2014/05/30 ver 4.61 bt, bts, btr, btcã®ã‚µãƒãƒ¼ãƒˆ 2014/05/28 ver 4.60 vcvtph2ps, vcvtps2phã®ã‚µãƒãƒ¼ãƒˆ 2014/04/11 ver 4.52 rdrandã®åˆ¤å®šè¿½åŠ  2014/03/25 ver 4.51 å‚ç…§ã•れãªããªã£ãŸãƒ©ãƒ™ãƒ«ã®çŠ¶æ…‹ã‚’å‰Šé™¤ã™ã‚‹ 2014/03/16 ver 4.50 æ–°ã—ã„ラベルクラスã®ã‚µãƒãƒ¼ãƒˆ 2014/03/05 ver 4.40 VirtualBox上ã§BMI/enhanced repã®ã‚µãƒãƒ¼ãƒˆåˆ¤å®šã‚’é–“é•ã†ã“ã¨ãŒã‚ã‚‹ã®ã‚’修正 2013/12/03 ver 4.30 Reg::cvt8(), cvt16(), cvt32()ã®ã‚µãƒãƒ¼ãƒˆ 2013/10/16 ver 4.21 ラベルã§std::stringã‚’å—ã‘付ã‘る。 2013/07/30 ver 4.20 [break backward compatibility] 従æ¥ã®Reg32eクラスをアドレッシング用ã®RegExpã¨Reg32, Reg64を表ã™Reg32eã«åˆ†é›¢ 2013/07/04 ver 4.10 [break backward compatibility] Xbyak::Errorã®åž‹ã‚’enumã‹ã‚‰classã«å¤‰æ›´ 2013/06/21 ver 4.02 LABELã®æŒ‡ã™ã‚¢ãƒ‰ãƒ¬ã‚¹ã‚’書ã込むputL(LABEL)関数ã®è¿½åŠ ã€‚ 2013/06/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm) support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest) 2013/05/30 ver 4.00 AVX2, VEX-encoded GPR-instructionをサãƒãƒ¼ãƒˆ 2013/03/27 ver 3.80 mov(reg, "label");をサãƒãƒ¼ãƒˆ 2013/03/13 ver 3.76 cqo, jcxz, jecxz, jrcxz追加 2013/01/15 ver 3.75 生æˆã•れãŸã‚³ãƒ¼ãƒ‰ã‚’修正ã™ã‚‹ãŸã‚ã«setSize()を追加 2013/01/12 ver 3.74 CodeGenerator::reset()ã¨Allocator::useProtect()を追加 2013/01/06 ver 3.73 å¯èƒ½ãªã‚‰unordered_mapを使ㆠ2012/12/04 ver 3.72 eaxãªã©ã‚’CodeGeneratorã®ãƒ¡ãƒ³ãƒå¤‰æ•°ã«æˆ»ã™. Xbyak::util::eaxã¯static const変数 2012/11/17 ver 3.71 and_(), or_(), xor_(), not_()ã‚’XBYAK_NO_OP_NAMESãŒå®šç¾©ã•れã¦ã„ãªã„ã¨ãã§ã‚‚使ãˆã‚‹ã‚ˆã†ã«ã—㟠2012/11/17 CodeGeneratorã®eax, ecx, ptrãªã©ã®ãƒ¡ãƒ³ãƒå¤‰æ•°ã‚’staticã«ã—ã€constå‚ç…§ã‚’Xbyak::utilã«ã‚‚定義 2012/11/09 ver 3.70 and()ã‚’and_()ã«ã™ã‚‹ãŸã‚ã®ãƒžã‚¯ãƒ­XBYAK_NO_OP_NAMESを追加(thanks to Mattias) 2012/11/01 ver 3.62 add fwait/fnwait/finit/fninit 2012/11/01 ver 3.61 add fldcw/fstcw 2012/05/03 ver 3.60 Allocatorクラスã®ã‚¤ãƒ³ã‚¿ãƒ•ェースを変更 2012/03/23 ver 3.51 userPtrモードãŒãƒã‚°ã£ãŸã®ã‚’修正 2012/03/19 ver 3.50 AutoGrowモードサãƒãƒ¼ãƒˆ 2011/11/09 ver 3.05 rip相対ã®64bitã‚µã‚¤ã‚ºä»¥å¤–ã®æ‰±ã„ã®ãƒã‚°ä¿®æ­£ / movsxdサãƒãƒ¼ãƒˆ 2011/08/15 ver 3.04 add(dword [ebp-8], 0xda);ãªã©ã«ãŠã‘ã‚‹imm8ã®æ‰±ã„ã®ãƒã‚°ä¿®æ­£(thanks to lolcat) 2011/06/16 ver 3.03 Macã®gcc上ã§ã®__GNUC_PREREQãŒãƒŸã‚¹ã£ã¦ãŸã®ã‚’修正(thanks to t_teruya) 2011/04/28 ver 3.02 Macã®gcc上ã§ã¯xgetbvã‚’disable 2011/03/24 ver 3.01 fix typo of OSXSAVE 2011/03/23 ver 3.00 vcmpeqpsãªã©ã‚’追加 2011/02/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it) 2011/02/16 ver 2.993 beta remove cvtReg to avoid thread unsafe 2011/02/10 ver 2.992 beta support one argument syntax for fadd like nasm 2011/02/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest) 2011/02/04 ver 2.99 beta support AVX 2010/12/08 ver 2.31 fix ptr [rip + 32bit offset], support rtdscp 2010/10/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist 2010/07/07 ver 2.29 fix call(