From 4f809d0ef7a1dde12498b91bd86c985448143a25 Mon Sep 17 00:00:00 2001 From: Roman Dubtsov Date: Wed, 12 Dec 2018 18:39:36 -0800 Subject: [PATCH] src: cpu: update Xbyak to 5.751 --- src/cpu/jit_generator.hpp | 2 +- src/cpu/xbyak/xbyak.h | 187 +++++++++++++++++++++++---------- src/cpu/xbyak/xbyak_mnemonic.h | 116 +++++++++++--------- src/cpu/xbyak/xbyak_util.h | 55 +++------- 4 files changed, 216 insertions(+), 144 deletions(-) diff --git a/src/cpu/jit_generator.hpp b/src/cpu/jit_generator.hpp index a549d802e19..e09aeea0007 100644 --- a/src/cpu/jit_generator.hpp +++ b/src/cpu/jit_generator.hpp @@ -297,7 +297,7 @@ class jit_generator : public Xbyak::CodeGenerator // Disallow char-based labels completely void L(const char *label) = delete; - void L(const Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); } + void L(Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); } void uni_vpxor(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op) { diff --git a/src/cpu/xbyak/xbyak.h b/src/cpu/xbyak/xbyak.h index 74d91d4c182..e1af8eebc34 100644 --- a/src/cpu/xbyak/xbyak.h +++ b/src/cpu/xbyak/xbyak.h @@ -85,6 +85,8 @@ // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) + #include + #define XBYAK_STD_UNORDERED_SET std::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap @@ -94,16 +96,22 @@ libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). */ #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set #include #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap #else + #include + #define XBYAK_STD_UNORDERED_SET std::set #include #define XBYAK_STD_UNORDERED_MAP std::map #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap @@ -150,7 +158,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5631 /* 0xABCD = A.BC(D) */ + VERSION = 0x5751 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -662,6 +670,12 @@ struct RegRip { const Label* label_; bool isAddr_; explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {} + friend const RegRip operator+(const RegRip& r, int disp) { + return RegRip(r.disp_ + disp, r.label_, r.isAddr_); + } + friend const RegRip operator-(const RegRip& r, int disp) { + return RegRip(r.disp_ - disp, r.label_, r.isAddr_); + } friend const RegRip operator+(const RegRip& r, sint64 disp) { return RegRip(r.disp_ + disp, r.label_, r.isAddr_); } @@ -831,6 +845,7 @@ inline RegExp operator-(const RegExp& e, size_t disp) // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) void *const AutoGrow = (void*)1; //-V566 +void *const DontSetProtectRWE = (void*)2; //-V566 class CodeArray { enum Type { @@ -870,6 +885,7 @@ class CodeArray { size_t size_; bool isCalledCalcJmpAddress_; + bool useProtect() const { return alloc_->useProtect(); } /* allocate new memory and copy old data to the new area */ @@ -893,12 +909,16 @@ class CodeArray { uint64 disp = i->getVal(top_); rewrite(i->codeOffset, disp, i->jmpSize); } - if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT); isCalledCalcJmpAddress_ = true; } public: + enum ProtectMode { + PROTECT_RW = 0, // read/write + PROTECT_RWE = 1, // read/write/exec + PROTECT_RE = 2 // read/exec + }; explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0) - : type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF) + : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF) , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) , maxSize_(maxSize) , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) @@ -906,7 +926,7 @@ class CodeArray { , isCalledCalcJmpAddress_(false) { if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC); - if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) { + if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) { alloc_->free(top_); throw Error(ERR_CANT_PROTECT); } @@ -914,10 +934,19 @@ class CodeArray { virtual ~CodeArray() { if (isAllocType()) { - if (alloc_->useProtect()) protect(top_, maxSize_, false); + if (useProtect()) setProtectModeRW(false); alloc_->free(top_); } } + bool setProtectMode(ProtectMode mode, bool throwException = true) + { + bool isOK = protect(top_, maxSize_, mode); + if (isOK) return true; + if (throwException) throw Error(ERR_CANT_PROTECT); + return false; + } + bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); } + bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); } void resetSize() { size_ = 0; @@ -949,10 +978,10 @@ class CodeArray { void dq(uint64 code) { db(code, 8); } const uint8 *getCode() const { return top_; } template - const F getCode() const { return CastTo(top_); } + const F getCode() const { return reinterpret_cast(top_); } const uint8 *getCurr() const { return &top_[size_]; } template - const F getCurr() const { return CastTo(&top_[size_]); } + const F getCurr() const { return reinterpret_cast(&top_[size_]); } size_t getSize() const { return size_; } void setSize(size_t size) { @@ -1005,19 +1034,39 @@ class CodeArray { change exec permission of memory @param addr [in] buffer address @param size [in] buffer size - @param canExec [in] true(enable to exec), false(disable to exec) + @param protectMode [in] mode(RW/RWE/RE) @return true(success), false(failure) */ - static inline bool protect(const void *addr, size_t size, bool canExec) + static inline bool protect(const void *addr, size_t size, int protectMode) { +#if defined(_WIN32) + const DWORD c_rw = PAGE_READWRITE; + const DWORD c_rwe = PAGE_EXECUTE_READWRITE; + const DWORD c_re = PAGE_EXECUTE_READ; + DWORD mode; +#else + const int c_rw = PROT_READ | PROT_WRITE; + const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC; + const int c_re = PROT_READ | PROT_EXEC; + int mode; +#endif + switch (protectMode) { + case PROTECT_RW: mode = c_rw; break; + case PROTECT_RWE: mode = c_rwe; break; + case PROTECT_RE: mode = c_re; break; + default: + return false; + } #if defined(_WIN32) DWORD oldProtect; - return VirtualProtect(const_cast(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0; + return VirtualProtect(const_cast(addr), size, mode, &oldProtect) != 0; #elif defined(__GNUC__) size_t pageSize = sysconf(_SC_PAGESIZE); size_t iaddr = reinterpret_cast(addr); size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); - int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0); +#ifndef NDEBUG + if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize); +#endif return mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode) == 0; #else return true; @@ -1044,46 +1093,43 @@ class Address : public Operand { M_ripAddr }; Address(uint32 sizeBit, bool broadcast, const RegExp& e) - : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast) + : Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast) { e_.verify(); } #ifdef XBYAK64 explicit Address(size_t disp) - : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ } + : Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ } Address(uint32 sizeBit, bool broadcast, const RegRip& addr) - : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { } + : Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { } #endif - void permitVsib() const { permitVsib_ = true; } RegExp getRegExp(bool optimize = true) const { return optimize ? e_.optimize() : e_; } Mode getMode() const { return mode_; } - bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } - bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax - size_t getDisp() const { verify(); return e_.getDisp(); } + bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } + bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax + size_t getDisp() const { return e_.getDisp(); } uint8 getRex() const { - verify(); if (mode_ != M_ModRM) return 0; return getRegExp().getRex(); } - bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset + bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset bool isBroadcast() const { return broadcast_; } const Label* getLabel() const { return label_; } bool operator==(const Address& rhs) const { - return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_; + return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_; } bool operator!=(const Address& rhs) const { return !operator==(rhs); } + bool isVsib() const { return e_.isVsib(); } private: RegExp e_; const Label* label_; Mode mode_; - mutable bool permitVsib_; bool broadcast_; - void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); } }; inline const Address& Operand::getAddress() const @@ -1141,6 +1187,7 @@ class Label { Label(const Label& rhs); Label& operator=(const Label& rhs); ~Label(); + void clear() { mgr = 0; id = 0; } int getId() const { return id; } const uint8 *getAddress() const; @@ -1179,6 +1226,7 @@ class LabelManager { }; typedef XBYAK_STD_UNORDERED_MAP ClabelDefList; typedef XBYAK_STD_UNORDERED_MULTIMAP ClabelUndefList; + typedef XBYAK_STD_UNORDERED_SET LabelPtrList; CodeArray *base_; // global : stateList_.front(), local : stateList_.back() @@ -1186,6 +1234,7 @@ class LabelManager { mutable int labelId_; ClabelDefList clabelDefList_; ClabelUndefList clabelUndefList_; + LabelPtrList labelPtrList_; int getId(const Label& label) const { @@ -1234,9 +1283,14 @@ class LabelManager { return true; } friend class Label; - void incRefCount(int id) { clabelDefList_[id].refCount++; } - void decRefCount(int id) + void incRefCount(int id, Label *label) { + clabelDefList_[id].refCount++; + labelPtrList_.insert(label); + } + void decRefCount(int id, Label *label) + { + labelPtrList_.erase(label); ClabelDefList::iterator i = clabelDefList_.find(id); if (i == clabelDefList_.end()) return; if (i->second.refCount == 1) { @@ -1255,11 +1309,23 @@ class LabelManager { #endif return !list.empty(); } + // detach all labels linked to LabelManager + void resetLabelPtrList() + { + for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) { + (*i)->clear(); + } + labelPtrList_.clear(); + } public: LabelManager() { reset(); } + ~LabelManager() + { + resetLabelPtrList(); + } void reset() { base_ = 0; @@ -1269,6 +1335,7 @@ class LabelManager { stateList_.push_back(SlabelState()); clabelDefList_.clear(); clabelUndefList_.clear(); + resetLabelPtrList(); } void enterLocal() { @@ -1301,10 +1368,11 @@ class LabelManager { SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); define_inner(st.defList, st.undefList, label, base_->getSize()); } - void defineClabel(const Label& label) + void defineClabel(Label& label) { define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); label.mgr = this; + labelPtrList_.insert(&label); } void assign(Label& dst, const Label& src) { @@ -1312,6 +1380,7 @@ class LabelManager { if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); dst.mgr = this; + labelPtrList_.insert(&dst); } bool getOffset(size_t *offset, std::string& label) const { @@ -1359,19 +1428,19 @@ inline Label::Label(const Label& rhs) { id = rhs.id; mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); + if (mgr) mgr->incRefCount(id, this); } inline Label& Label::operator=(const Label& rhs) { if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); id = rhs.id; mgr = rhs.mgr; - if (mgr) mgr->incRefCount(id); + if (mgr) mgr->incRefCount(id, this); return *this; } inline Label::~Label() { - if (id && mgr) mgr->decRefCount(id); + if (id && mgr) mgr->decRefCount(id, this); } inline const uint8* Label::getAddress() const { @@ -1488,6 +1557,8 @@ class CodeGenerator : public CodeArray { T_B32 = 1 << 26, // m32bcst T_B64 = 1 << 27, // m64bcst T_M_K = 1 << 28, // mem{k} + T_VSIB = 1 << 29, + T_MEM_EVEX = 1 << 30, // use evex if mem T_XXX }; void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false) @@ -1525,7 +1596,7 @@ class CodeGenerator : public CodeArray { if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err); return v; } - int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0) + int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false) { if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID); int w = (type & T_EW1) ? 1 : 0; @@ -1568,7 +1639,7 @@ class CodeGenerator : public CodeArray { } } } - bool Vp = !(v ? v->isExtIdx2() : 0); + bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx); bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); db(0x62); @@ -1714,8 +1785,9 @@ class CodeGenerator : public CodeArray { // reg is reg field of ModRM // immSize is the size for immediate value // disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement - void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0) + void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false) { + if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING); if (addr.getMode() == Address::M_ModRM) { setSIB(addr.getRegExp(), reg, disp8N); } else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { @@ -1857,15 +1929,20 @@ class CodeGenerator : public CodeArray { } void opPushPop(const Operand& op, int code, int ext, int alt) { - if (op.isREG()) { - if (op.isBit(16)) db(0x66); - if (op.getReg().getIdx() >= 8) db(0x41); - db(alt | (op.getIdx() & 7)); - } else if (op.isMEM()) { - opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code); - } else { - throw Error(ERR_BAD_COMBINATION); + int bit = op.getBit(); + if (bit == 16 || bit == BIT) { + if (bit == 16) db(0x66); + if (op.isREG()) { + if (op.getReg().getIdx() >= 8) db(0x41); + db(alt | (op.getIdx() & 7)); + return; + } + if (op.isMEM()) { + opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code); + return; + } } + throw Error(ERR_BAD_COMBINATION); } void verifyMemHasSize(const Operand& op) const { @@ -1954,10 +2031,11 @@ class CodeGenerator : public CodeArray { const Address& addr = op2.getAddress(); const RegExp& regExp = addr.getRegExp(); const Reg& base = regExp.getBase(); + const Reg& index = regExp.getIndex(); if (BIT == 64 && addr.is32bit()) db(0x67); int disp8N = 0; - bool x = regExp.getIndex().isExtIdx(); - if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { + bool x = index.isExtIdx(); + if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) { int aaa = addr.getOpmaskIdx(); if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY); bool b = false; @@ -1965,12 +2043,12 @@ class CodeGenerator : public CodeArray { if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST); b = true; } - int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0; - disp8N = evex(r, base, p1, type, code, x, b, aaa, VL); + int VL = regExp.isVsib() ? index.getBit() : 0; + disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2()); } else { vex(r, base, p1, type, code, x); } - opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N); + opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0); } else { const Reg& base = op2.getReg(); if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { @@ -2071,8 +2149,7 @@ class CodeGenerator : public CodeArray { } if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING); } - addr.permitVsib(); - opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code); + opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code); } enum { xx_yy_zz = 0, @@ -2096,7 +2173,6 @@ class CodeGenerator : public CodeArray { { if (x.hasZero()) throw Error(ERR_INVALID_ZERO); checkGather2(x, addr.getRegExp().getIndex(), mode); - addr.permitVsib(); opVex(x, 0, addr, type, code); } /* @@ -2116,7 +2192,6 @@ class CodeGenerator : public CodeArray { { if (addr.hasZero()) throw Error(ERR_INVALID_ZERO); if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING); - addr.permitVsib(); opVex(x, 0, addr, type, code); } public: @@ -2169,7 +2244,8 @@ class CodeGenerator : public CodeArray { const Segment es, cs, ss, ds, fs, gs; #endif void L(const std::string& label) { labelMgr_.defineSlabel(label); } - void L(const Label& label) { labelMgr_.defineClabel(label); } + void L(Label& label) { labelMgr_.defineClabel(label); } + Label L() { Label label; L(label); return label; } void inLocalLabel() { labelMgr_.enterLocal(); } void outLocalLabel() { labelMgr_.leaveLocal(); } /* @@ -2200,7 +2276,7 @@ class CodeGenerator : public CodeArray { // call(function pointer) #ifdef XBYAK_VARIADIC_TEMPLATE template - void call(Ret(*func)(Params...)) { call(CastTo(func)); } + void call(Ret(*func)(Params...)) { call(reinterpret_cast(func)); } #endif void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); } @@ -2458,11 +2534,16 @@ class CodeGenerator : public CodeArray { MUST call ready() to complete generating code if you use AutoGrow mode. It is not necessary for the other mode if hasUndefinedLabel() is true. */ - void ready() + void ready(ProtectMode mode = PROTECT_RWE) { if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND); - if (isAutoGrow()) calcJmpAddress(); + if (isAutoGrow()) { + calcJmpAddress(); + if (useProtect()) setProtectMode(mode); + } } + // set read/exec + void readyRE() { return ready(PROTECT_RE); } #ifdef XBYAK_TEST void dump(bool doClear = true) { diff --git a/src/cpu/xbyak/xbyak_mnemonic.h b/src/cpu/xbyak/xbyak_mnemonic.h index 9e3c5351850..917a42a353a 100644 --- a/src/cpu/xbyak/xbyak_mnemonic.h +++ b/src/cpu/xbyak/xbyak_mnemonic.h @@ -43,7 +43,7 @@ * THE POSSIBILITY OF SUCH DAMAGE. *******************************************************************************/ -const char *getVersionString() const { return "5.631"; } +const char *getVersionString() const { return "5.751"; } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -167,8 +167,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } +void cmpsb() { db(0xA6); } +void cmpsd() { db(0xA7); } void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } +void cmpsw() { db(0x66); db(0xA7); } void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } @@ -728,6 +731,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); } void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); } void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } +void scasb() { db(0xAE); } +void scasd() { db(0xAF); } +void scasw() { db(0x66); db(0xAF); } void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524 void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524 void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524 @@ -787,6 +793,9 @@ void stc() { db(0xF9); } void std() { db(0xFD); } void sti() { db(0xFB); } void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } +void stosb() { db(0xAA); } +void stosd() { db(0xAB); } +void stosw() { db(0x66); db(0xAB); } void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); } @@ -1046,10 +1055,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); } void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); } void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); } -void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); } -void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); } -void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); } -void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); } +void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); } +void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); } +void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); } +void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); } void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); } void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); } void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); } @@ -1059,7 +1068,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); } void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } -void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } +void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); } void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); } void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); } void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); } @@ -1180,10 +1189,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16| void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); } void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } -void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); } -void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); } -void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); } -void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); } +void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); } +void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); } +void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); } +void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); } void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); } void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); } void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); } @@ -1242,28 +1251,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); } void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); } void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); } -void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); } -void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } -void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } +void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); } void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); } void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); } -void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); } -void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); } void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); } -void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); } -void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); } +void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); } void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); } -void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); } -void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); } +void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); } void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); } void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); } void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); } -void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); } +void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); } void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); } void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); } void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); } @@ -1589,7 +1598,10 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } void cdqe() { db(0x48); db(0x98); } void cqo() { db(0x48); db(0x99); } +void cmpsq() { db(0x48); db(0xA7); } void movsq() { db(0x48); db(0xA5); } +void scasq() { db(0x48); db(0xAF); } +void stosq() { db(0x48); db(0xAB); } void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); } void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); } void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); } @@ -1762,18 +1774,18 @@ void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(1 void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); } void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); } -void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); } -void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); } -void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); } -void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); } -void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); } -void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); } -void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); } -void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); } -void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); } -void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); } -void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); } -void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); } +void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); } +void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); } +void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } +void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); } +void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } +void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); } +void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); } +void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); } void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); } void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); } void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); } @@ -1860,10 +1872,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); } void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); } void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); } -void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); } -void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); } -void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); } -void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); } +void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); } +void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); } +void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); } +void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); } void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); } void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); } void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); } @@ -1914,10 +1926,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); } void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); } void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); } -void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); } -void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); } -void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); } -void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); } +void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); } +void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); } +void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); } +void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); } void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); } void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); } void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); } @@ -1981,18 +1993,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); } void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); } void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); } -void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); } -void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); } -void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); } -void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); } -void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); } -void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); } -void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); } -void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); } -void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); } -void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); } -void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); } -void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); } +void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); } +void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); } +void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } +void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); } +void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); } +void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); } +void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); } +void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); } +void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); } void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); } void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); } diff --git a/src/cpu/xbyak/xbyak_util.h b/src/cpu/xbyak/xbyak_util.h index 713c68db2f9..0bad61fd9f4 100644 --- a/src/cpu/xbyak/xbyak_util.h +++ b/src/cpu/xbyak/xbyak_util.h @@ -173,8 +173,9 @@ class Cpu { if (cacheType == NO_CACHE) break; if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) { unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1; - if (n_cores != 0) // true only if leaf 0xB is supported and valid + if (n_cores != 0) { // true only if leaf 0xB is supported and valid nb_logical_cores = (std::min)(nb_logical_cores, n_cores); + } assert(nb_logical_cores != 0); data_cache_size[data_cache_levels] = (extractBit(data[1], 22, 31) + 1) @@ -183,7 +184,7 @@ class Cpu { * (data[2] + 1); if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores; assert(smt_width != 0); - cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width; + cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u); data_cache_levels++; } } @@ -460,7 +461,7 @@ const int UseRCX = 1 << 6; const int UseRDX = 1 << 7; class Pack { - static const size_t maxTblNum = 10; + static const size_t maxTblNum = 15; const Xbyak::Reg64 *tbl_[maxTblNum]; size_t n_; public: @@ -520,7 +521,7 @@ class Pack { const Xbyak::Reg64& operator[](size_t n) const { if (n >= n_) { - fprintf(stderr, "ERR Pack bad n=%d\n", (int)n); + fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_); throw Error(ERR_BAD_PARAMETER); } return *tbl_[n]; @@ -562,6 +563,7 @@ class StackFrame { static const int rcxPos = 3; static const int rdxPos = 2; #endif + static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax Xbyak::CodeGenerator *code_; int pNum_; int tNum_; @@ -571,7 +573,7 @@ class StackFrame { int P_; bool makeEpilog_; Xbyak::Reg64 pTbl_[4]; - Xbyak::Reg64 tTbl_[10]; + Xbyak::Reg64 tTbl_[maxRegNum]; Pack p_; Pack t_; StackFrame(const StackFrame&); @@ -583,7 +585,7 @@ class StackFrame { make stack frame @param sf [in] this @param pNum [in] num of function parameter(0 <= pNum <= 4) - @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX) + @param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14 @param stackSizeByte [in] local stack size @param makeEpilog [in] automatically call close() if true @@ -610,27 +612,17 @@ class StackFrame { using namespace Xbyak; if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); - if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM); + if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM); const Reg64& _rsp = code->rsp; - const AddressFrame& _ptr = code->ptr; saveNum_ = (std::max)(0, allRegNum - noSaveNum); const int *tbl = getOrderTbl() + noSaveNum; - P_ = saveNum_ + (stackSizeByte + 7) / 8; - if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment - P_ *= 8; - if (P_ > 0) code->sub(_rsp, P_); -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i])); - } - for (int i = 4; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); - } -#else for (int i = 0; i < saveNum_; i++) { - code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); + code->push(Reg64(tbl[i])); } -#endif + P_ = (stackSizeByte + 7) / 8; + if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment + P_ *= 8; + if (P_ > 0) code->sub(_rsp, P_); int pos = 0; for (int i = 0; i < pNum; i++) { pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); @@ -651,21 +643,11 @@ class StackFrame { { using namespace Xbyak; const Reg64& _rsp = code_->rsp; - const AddressFrame& _ptr = code_->ptr; const int *tbl = getOrderTbl() + noSaveNum; -#ifdef XBYAK64_WIN - for (int i = 0; i < (std::min)(saveNum_, 4); i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]); - } - for (int i = 4; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); - } -#else + if (P_ > 0) code_->add(_rsp, P_); for (int i = 0; i < saveNum_; i++) { - code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); + code_->pop(Reg64(tbl[saveNum_ - 1 - i])); } -#endif - if (P_ > 0) code_->add(_rsp, P_); if (callRet) code_->ret(); } @@ -677,9 +659,6 @@ class StackFrame { } catch (std::exception& e) { printf("ERR:StackFrame %s\n", e.what()); exit(1); - } catch (...) { - printf("ERR:StackFrame otherwise\n"); - exit(1); } } private: @@ -698,7 +677,7 @@ class StackFrame { } int getRegIdx(int& pos) const { - assert(pos < 14); + assert(pos < maxRegNum); using namespace Xbyak; const int *tbl = getOrderTbl(); int r = tbl[pos++];