diff --git a/plugins/arm/semantics/aarch64-arithmetic.lisp b/plugins/arm/semantics/aarch64-arithmetic.lisp index 82a145ca6..6d74984e2 100644 --- a/plugins/arm/semantics/aarch64-arithmetic.lisp +++ b/plugins/arm/semantics/aarch64-arithmetic.lisp @@ -18,6 +18,47 @@ (defun ADDWrs (rd rn rm off) (ADD*r* setw shift-encoded rd rn rm off)) (defun ADDXrs (rd rn rm off) (ADD*r* set$ shift-encoded rd rn rm off)) + +; ADCS: add with carry, setting flags +(defun ADCSXr (rd rn rm) + (add-with-carry set$ rd CF rm rn)) +(defun ADCSWr (rd rn rm) + (add-with-carry setw rd CF rm rn)) + +; ADC: add with carry, no flags +(defun ADCXr (rd rn rm) + (set$ rd (+ CF rm rn))) +(defun ADCWr (rd rn rm) + (setw rd (+ CF rm rn))) + +; adds immediate +(defun ADDSXri (rd rn imm off) + (add-with-carry set$ rd rn (lshift imm off) 0)) + +(defun ADDSWri (rd rn imm off) + (add-with-carry setw rd rn (lshift imm off) 0)) + +; adds shifted +(defun ADDSXrs (rd rn rm shift) + (add-with-carry set$ rd rn (shift-encoded rm shift) 0)) + +(defun ADDSWrs (rd rn rm shift) + (add-with-carry set$ rd rn (shift-encoded rm shift) 0)) + +; add extended +(defun ADDXrx (rd rn rm shift) + (set$ rd (+ rn (extended rm shift)))) + +(defun ADDWrx (rd rn rm shift) + (setw rd (+ rn (extended rm shift)))) + +; add extend SXRX|UXTX +(defun ADDXrx64 (rd rn rm shift) + (set$ rd (+ rn (extended rm shift)))) + +; endTODO + + (defun ADRP (dst imm) (set$ dst (+ (logand (get-program-counter) (lshift -1 12)) @@ -25,7 +66,7 @@ (defmacro SUB*r* (set shift-function rd rn imm-or-rm off) "Implements SUB*ri and SUB*rs by specifying the shift function." - (set rd (- rn (shift-function imm-or-rm off)))) + (set rd (cast-low (word-width rd) (- rn (shift-function imm-or-rm off))))) ;; see ADD*ri vs ADD*rs (defun SUBWri (rd rn rm off) (SUB*r* setw lshift rd rn rm off)) @@ -33,20 +74,48 @@ (defun SUBWrs (rd rn rm off) (SUB*r* setw shift-encoded rd rn rm off)) (defun SUBXrs (rd rn rm off) (SUB*r* set$ shift-encoded rd rn rm off)) +(defun SUBXrx (rd rn rm off) + (set$ rd (- rn (extended rm off)))) + (defun SUBXrx64 (rd rn rm off) (set$ rd (- rn (extended rm off)))) +(defun SUBXrw (rd rn rm off) + (setw rd (- rn (extended rm off)))) + +(defun SBCSXr (rd rn rm) + (add-with-carry set$ rd CF (lnot rm) rn)) + +(defun SBCSWr (rd rn rm) + (add-with-carry setw rd CF (lnot rm) rn)) + +(defun SBCXr (rd rn rm) + (set$ rd (+ CF (lnot rm) rn))) + +(defun SBCWr (rd rn rm) + (setw rd (+ CF (lnot rm) rn))) + (defun SUBSWrs (rd rn rm off) (add-with-carry/clear-base rd rn (lnot (shift-encoded rm off)) 1)) +(defun SUBSXrx (rd rn rm off) + (add-with-carry set$ rd rn (lnot (shift-encoded rm off)) 1)) + +(defun SUBSXrx64 (rd rn rm off) + (add-with-carry set$ rd rn (lnot (shift-encoded rm off)) 1)) + (defun SUBSXrs (rd rn rm off) - (add-with-carry rd rn (lnot (shift-encoded rm off)) 1)) + (add-with-carry set$ rd rn (lnot (shift-encoded rm off)) 1)) + +; seems suspect but probably works +(defun SUBSWrx (rd rn rm off) + (add-with-carry set$ rd rn (lnot (shift-encoded rm off)) 1)) (defun SUBSWri (rd rn imm off) (add-with-carry/clear-base rd rn (lnot (lshift imm off)) 1)) (defun SUBSXri (rd rn imm off) - (add-with-carry rd rn (lnot (lshift imm off)) 1)) + (add-with-carry set$ rd rn (lnot (lshift imm off)) 1)) (defmacro Mop*rrr (set op rd rn rm ra) "(Mop*rrr set op rd rn rm ra) implements multiply-add, multiply-subtract @@ -58,6 +127,19 @@ (defun MSUBWrrr (rd rn rm ra) (Mop*rrr setw - rd rn rm ra)) (defun MSUBXrrr (rd rn rm ra) (Mop*rrr set$ - rd rn rm ra)) +(defun UMADDLrrr (rd rn rm ra) (set$ rd (cast-low 64 (+ ra (* rn rm))))) + +(defun SMADDLrrr (rd rn rm ra) (set$ rd (cast-signed 64 (+ ra (* (cast-signed 64 rn) (cast-signed 64 rm)))))) + +(defun UMSUBLrrr (rd rn rm ra) (set$ rd (cast-low 64 (- ra (* (cast-signed 64 rn) (cast-signed 64 rm)))))) + +(defun SMSUBLrrr (rd rn rm ra) (set$ rd (cast-signed 64 (- ra (* (cast-signed 64 rn) (cast-signed 64 rm)))))) + +(defun UMULHrr (rd rn rm) + "multiplies rn and rm together and stores the high 64 bits of the resulting + 128-bit value to the register rd" + (set$ rd (cast-high 64 (* (cast-unsigned 128 rn) (cast-unsigned 128 rm))))) + (defmacro *DIV*r (set div rd rn rm) "(*DIV*r set div rd rn rm) implements the SDIV or UDIV instructions on W or X registers, with div set to s/ or / respectively." @@ -69,3 +151,6 @@ (defun SDIVXr (rd rn rm) (*DIV*r set$ s/ rd rn rm)) (defun UDIVWr (rd rn rm) (*DIV*r setw / rd rn rm)) (defun UDIVXr (rd rn rm) (*DIV*r set$ / rd rn rm)) + +(defun ADR (rd label) + (store-word rd (+ (get-program-counter) (cast-signed 64 label)))) diff --git a/plugins/arm/semantics/aarch64-atomic.lisp b/plugins/arm/semantics/aarch64-atomic.lisp index 3cf8bc82d..30e4770d0 100644 --- a/plugins/arm/semantics/aarch64-atomic.lisp +++ b/plugins/arm/semantics/aarch64-atomic.lisp @@ -12,7 +12,7 @@ load and store are functions to load/store to/from the size of rs and rt. acquire and release are booleans indicating whether load-acquire and store-release ordering is to be enforced." - (let ((data (load rn))) + (let ((data (load rn))) (when acquire (intrinsic 'load-acquire)) (when (= data rs) (when release (intrinsic 'store-release)) @@ -62,6 +62,43 @@ (defun CASLH (rs _ rt rn) (CASordH rs rt rn false true)) (defun CASALH (rs _ rt rn) (CASordH rs rt rn true true)) +(defun first (x y) (declare (visibility :private)) x) +(defun second (x y) (declare (visibility :private)) y) + +(defmacro CASPord* (set load rs-pair rt-pair rn register-width acquire release) + "(CASP* set load store rs-pair rt-pair rn register-width acquire release) + implements a compare-and-swap-pair instruction for W and X registers. + set is the functions to set to a register in the pair. + register-width is 64 or 32, depending on the size of register used. + load either loads 128 bits or 64 (the size of the whole pair). + acquire and release are as in the CASord* macro." + (let ((data (load rn)) + (lower (cast-low register-width data)) + (upper (cast-high register-width data))) + (when acquire (intrinsic 'load-acquire)) + (when (= data (register-pair-concat rs-pair)) + (when release (intrinsic 'store-release)) + (store-word rn (register-pair-concat rt-pair))) + (set$ (nth-reg-in-group rs-pair 0) (endian first upper lower)) + (set$ (nth-reg-in-group rs-pair 1) (endian second upper lower)))) + +(defmacro CASPordX (rs-pair rt-pair rn acquire release) + "Specialisation of CASPord* for X registers." + (CASPord* set$ load-dword rs-pair rt-pair rn 64 acquire release)) + +(defmacro CASPordW (rs-pair rt-pair rn acquire release) + "Specialisation of CASPord* for W registers." + (CASPord* setw load-word rs-pair rt-pair rn 32 acquire release)) + +(defun CASPX (rs-pair _ rt-pair rn) (CASPordX rs-pair rt-pair rn false false)) +(defun CASPAX (rs-pair _ rt-pair rn) (CASPordX rs-pair rt-pair rn true false)) +(defun CASPLX (rs-pair _ rt-pair rn) (CASPordX rs-pair rt-pair rn false true)) +(defun CASPALX (rs-pair _ rt-pair rn) (CASPordX rs-pair rt-pair rn true true)) + +(defun CASPW (rs-pair _ rt-pair rn) (CASPordW rs-pair rt-pair rn false false)) +(defun CASPAW (rs-pair _ rt-pair rn) (CASPordW rs-pair rt-pair rn true false)) +(defun CASPLW (rs-pair _ rt-pair rn) (CASPordW rs-pair rt-pair rn false true)) +(defun CASPALW (rs-pair _ rt-pair rn) (CASPordW rs-pair rt-pair rn true true)) (defmacro CSop*r (set op rd rn rm cnd) "(CSop*r set op rd rn rm cnd) implements the conditional select diff --git a/plugins/arm/semantics/aarch64-data-movement.lisp b/plugins/arm/semantics/aarch64-data-movement.lisp index 637d4a24a..f78a4d13b 100644 --- a/plugins/arm/semantics/aarch64-data-movement.lisp +++ b/plugins/arm/semantics/aarch64-data-movement.lisp @@ -8,39 +8,238 @@ ;; LD... -(defun LDRXui (dst reg off) - (set$ dst (load-word (+ reg (lshift off 3))))) +;; LDR (register) -(defun LDRSWui (dst base off) - (set$ dst (cast-signed - (word) - (load-hword (+ base (lshift off 2)))))) +(defmacro LDR*ro* (rt base index signed s scale setf mem-load) + "(LDR*ro* rt base index signed s scale setf mem-load) loads a register from + memory at the address calculated from a base register and optionally shifted + and extended offset value. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((shift (* s scale)) + (off (if (= signed 1) + (cast-signed 64 (lshift index shift)) + (cast-unsigned 64 (lshift index shift))))) + (setf rt (mem-load (+ base off))))) -(defun LDRWui (dst reg off) - (setw dst - (cast-unsigned (word) (load-hword (+ reg (lshift off 2)))))) +(defmacro LDRWro* (wt base index signed s) (LDR*ro* wt base index signed s 2 setw load-hword)) +(defmacro LDRXro* (xt base index signed s) (LDR*ro* xt base index signed s 3 set$ load-word)) + +(defun LDRWroW (wt base index signed s) (LDRWro* wt base index signed s)) +(defun LDRWroX (wt base index signed s) (LDRWro* wt base index signed s)) +(defun LDRXroW (xt base index signed s) (LDRXro* xt base index signed s)) +(defun LDRXroX (xt base index signed s) (LDRXro* xt base index signed s)) + +;; LDR (immediate, post-index) + +(defmacro LDR*post (dst base off setf) + (setf dst (mem-read base (/ (word-width dst) 8)))) + +(defun LDRWpost (_ dst base off) (LDR*post dst base off setw)) +(defun LDRXpost (_ dst base off) (LDR*post dst base off set$)) + +;; LDR (immediate, pre-index) + +(defmacro LDR*pre (dst base off setf) + (let ((address (+ base (cast-signed 64 off)))) + (setf dst (mem-read address (/ (word-width dst) 8))) + (set$ base address))) + +(defun LDRWpre (_ dst base off) (LDR*pre dst base off setw)) +(defun LDRXpre (_ dst base off) (LDR*pre dst base off set$)) + +;; LDR (immediate, unsigned offset) + +(defmacro LDR*ui (dst reg off setf scale) + (setf dst (mem-read (+ reg (lshift off scale)) (/ (word-width dst) 8)))) + +(defun LDRXui (dst reg off) (LDR*ui dst reg off set$ 3)) +(defun LDRWui (dst reg off) (LDR*ui dst reg off setw 2)) + +;; LDRB (immediate, post-index) + +(defun LDRBBpost (_ dst base simm) + "(LDRBBpost _ dst base simm) loads a byte from the base address and stores + it in the 32 bit dst register, and increments the base register by simm. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment(), + ConstrainUnpredictable()" + (setw dst (cast-unsigned 32 (load-byte base))) + (set$ base (+ base simm))) + +;; LDRB (immediate, pre-index) + +(defun LDRBBpre (_ dst base simm) + "(LDRBBpre _ dst base simm) loads a byte from the base address and an offset + simm and stores it in the 32 bit dst register. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment(), + ConstrainUnpredictable()" + (setw dst (cast-unsigned 32 (load-byte (+ base simm)))) + (set$ base (+ base simm))) + +;; LDRB (immediate, unsigned offset) (defun LDRBBui (dst reg off) + "(LDRBBui _ dst base simm) loads a byte from a preindexed base address + and an unsigned offset and stores it in the 32 bit dst register. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment(), + ConstrainUnpredictable()" (setw dst - (cast-unsigned (word) (load-byte (+ reg off))))) + (cast-unsigned 32 (load-byte (+ reg off))))) + +;; LDRB (register) + +(defmacro LDRBBro* (dst base index signed) + "(LDRBBro* dst base index signed) loads a byte from memory from a base address + and index and stores it in a 32 bit destination register. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((off (if (= signed 1) + (cast-signed 64 index) + (cast-unsigned 64 index)))) + (setw dst (cast-unsigned 32 (load-byte (+ base off)))))) + +(defun LDRBBroW (dst base index signed _) (LDRBBro* dst base index signed)) +(defun LDRBBroX (dst base index signed _) (LDRBBro* dst base index signed)) -(defun LDRBBroX (dst reg off _ _) - (set$ dst - (cast-unsigned (word) (load-byte (+ reg off))))) +;; LDP (post-index) -(defun LDPXpost (dst r1 r2 base off) + +(defun LDPXpost (_ r1 r2 base off) (let ((off (lshift off 3))) (set$ r1 (load-word base)) (set$ r2 (load-word (+ base (sizeof word)))) - (set$ dst (+ dst off)))) + (set$ base (+ base off)))) -(defun LDPXi (r1 r2 base off) +(defun LDPXpre (_ r1 r2 base off) (let ((off (lshift off 3))) (set$ r1 (load-word (+ base off))) - (set$ r2 (load-word (+ base off (sizeof word)))))) + (set$ r2 (load-word (+ base off (sizeof word)))) + (set$ base (+ base off)))) + +(defun LDPWpost (_ r1 r2 base off) + (let ((off (lshift off 2))) + (setw r1 (load-hword base)) + (setw r2 (load-hword (+ base (sizeof word)))) + (set$ base (+ base off)))) + +(defun LDPWpre (_ r1 r2 base off) + (let ((off (lshift off 2))) + (setw r1 (load-hword base)) + (setw r2 (load-hword (+ base (sizeof word)))) + (set$ base (+ base off)))) + +;; LDP (signed offset) + +(defmacro LDP*i (r1 r2 base imm scale datasize setf mem-load) + "(LDP*i r1 r2 base imm scale datasize setf mem-load) loads a pair of registers + r1 and r2 from the address calculated from a base register value and immediate offset. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((off (lshift (cast-signed 64 imm) scale))) + (setf r1 (mem-load (+ base off))) + (setf r2 (mem-load (+ base off (/ datasize 8)))))) + +(defun LDPXi (r1 r2 base imm) (LDP*i r1 r2 base imm 3 64 set$ load-word)) +(defun LDPWi (w1 w2 base imm) (LDP*i w1 w2 base imm 2 32 setw load-hword)) + +;; LDRH (register) + +(defmacro LDRHHro* (wt base index signed s) + "(LDRHHro* wt base index signed s) loads 2 bytes from the address calculated from + a base register address and offset. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((off (if (= signed 1) + (cast-signed 64 (lshift index s)) + (cast-unsigned 64 (lshift index s))))) + (setw wt (load-dbyte (+ base off))))) + +(defun LDRHHroX (wt xn xm extend s) (LDRHHro* wt xn xm extend s)) +(defun LDRHHroW (wt xn wm extend s) (LDRHHro* wt xn wm extend s)) + +;; LDRH (immediate, unsigned offset, pre/post indexed) + +(defun LDRHHui (wt xn pimm) + "(LDRHHui wt xn pimm) loads 2 bytes from the address calculated from + a base register and unsigned immediate offset. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((off (lshift (cast-unsigned 64 pimm) 1))) + (setw wt (load-dbyte (+ xn off))))) + +(defun LDRHHpost (_ rd rn off) + (setw rd (load-dbyte rn)) + (set$ rn (+ rn off))) + +(defun LDRHHpre (_ rd rn off) + (setw rd (load-dbyte (+ rn off))) + (set$ rn (+ rn off))) + +;; LDRSW (immediate, unsigned offset) + +(defun LDRSWui (dst base off) + (set$ dst (cast-signed (word) (load-hword (+ base (lshift off 2)))))) + +;; LRDSW (register) + +(defmacro LDRSWro* (xt base index signed s) + "(LDRSWro* xt base index signed s) loads 32 bits from memory from + a base address and offset and stores it in the destination register xt. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (let ((shift (* s 2)) + (off (if (= signed 1) + (cast-signed 64 (lshift index shift)) + (cast-unsigned 64 (lshift index shift))))) + (set$ xt (load-hword (+ base off))))) -(defun LDRXroX (rt rn rm _ shift) - (set$ rt (load-word (+ rn (lshift rm (* shift 3)))))) +(defun LDRSWroX (xt base xm signed s) (LDRSWro* xt base xm signed s)) +(defun LDRSWroW (xt base wm signed s) (LDRSWro* xt base wm signed s)) + +;; LDURB + +(defun LDURBBi (wt base simm) + "(LDURBBi wt base simm) loads a byte from the address calculated from + a base register and signed immediate offset and stores it in the + 32 bit destination register. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (setw wt (load-byte (+ base simm)))) + +;; LDURH + +(defun LDURHHi (rt rn simm) + (setw rt (cast-unsigned 32 (load-dbyte (+ rn simm))))) + +;; LDURSB + +(defun LDURSBWi (rt rn simm) + "LDURSBWi loads a byte from the address (rn + simm) and sign-extends it to write it to rt" + (setw rt (cast-signed 32 (load-byte (+ rn simm))))) + +(defun LDURSBXi (rt rn simm) + "LDURSBXi loads a byte from the address (rn + simm) and sign-extends it to write it to rt" + (set$ rt (cast-signed 64 (load-byte (+ rn simm))))) + +;; LDURSH + +(defun LDURSHWi (rt rn simm) + "LDURSBWi loads a halfword from the address (rn + simm) and sign-extends it to write it to rt" + (setw rt (cast-signed 32 (load-dbyte (+ rn simm))))) + +(defun LDURSHXi (rt rn simm) + "LDURSBXi loads a halfword from the address (rn + simm) and sign-extends it to write it to rt" + (set$ rt (cast-signed 64 (load-dbyte (+ rn simm))))) + +;; LDURSW + +(defun LDURSWi (rt rn simm) + "LDURSBXi loads a word from the address (rn + simm) and sign-extends it to write it to rt" + (set$ rt (cast-signed 64 (load-hword (+ rn simm))))) + +;; LDUR + +(defmacro LDUR*i (rt base simm setf mem-load) + "(LDUR*i rt base simm setf mem-load) loads a register from the address + calculated from a base register and signed immediate offset. + NOTE: does not HaveMTE2Ext(), SetTagCheckedInstruction(), CheckSPAlignment()" + (setf rt (mem-load (+ base (cast-signed 64 simm))))) + +(defun LDURWi (wt base simm) (LDUR*i wt base simm setw load-hword)) +(defun LDURXi (xt base simm) (LDUR*i xt base simm set$ load-word)) ;; MOV... @@ -65,39 +264,127 @@ ;; ST... +; STRB (defun STRBBui (src reg off) (store-byte (+ reg off) src)) -(defun STPXpre (dst t1 t2 _ off) - (let ((off (lshift off 3))) - (store-word (+ dst off) t1) - (store-word (+ dst off (sizeof word)) t2) - (set$ dst (+ dst off)))) +; STR (register) +(defun STR*ro* (scale rt rn rm signed shift) + "stores rt to (rn + rm << (shift * scale)) with signed or unsigned extension + of rm, where rt is a register of size (8 << scale). Note that rm can be an X + or W register and it chooses the appropriate extend mode implicitly. rn must + be an X register." + (assert (< signed 2)) + (assert-msg (= (word-width rt) (lshift 8 scale)) + "STR*ro*: scale must match size of rt") + (store-word + (+ rn + (if (= signed 1) + (signed-extend (word-width rm) (lshift rm (* shift scale))) + (unsigned-extend (word-width rm) (lshift rm (* shift scale))))) + rt)) + +(defun STRWroX (rt rn rm option shift) (STR*ro* 2 rt rn rm option shift)) +(defun STRWroW (rt rn rm option shift) (STR*ro* 2 rt rn rm option shift)) +(defun STRXroX (rt rn rm option shift) (STR*ro* 3 rt rn rm option shift)) +(defun STRXroW (rt rn rm option shift) (STR*ro* 3 rt rn rm option shift)) + +(defun STRHHroX (rt rn rm option shift) + (STR*ro* 1 (cast-low 16 rt) rn rm option shift)) + +; STR (immediate) (base registers): +(defun str-post (xreg src off) + "stores all of src to xreg, and post-indexes reg (reg += off)." + (store-word xreg src) + (set$ xreg (+ xreg off))) + +(defun STRWpost (_ rt rn simm) + (str-post rn rt simm)) + +(defun STRXpost (_ rt rn simm) + (str-post rn rt simm)) -(defun STPXi (t1 t2 base off) - (let ((off (lshift off 4))) - (store-word base (+ base off)) - (store-word base (+ base off (sizeof word))))) +(defun str-pre (xreg src off) + "stores all of src to xreg, and pre-indexes reg (reg += off)." + (store-word (+ xreg off) src) + (set$ xreg (+ xreg off))) + +(defun STRWpre (_ rt rn simm) + (str-pre rn rt simm)) + +(defun STRXpre (_ rt rn simm) + (str-pre rn rt simm)) + +(defun STR*ui (scale src reg off) + "Stores a register of size (8 << scale) to the memory address + (reg + (off << scale))." + (assert-msg (= (word-width src) (lshift 8 scale)) + "STR*ui: scale must match size of register") + (store-word (+ reg (lshift off scale)) + (cast-unsigned (lshift 8 scale) src))) (defun STRXui (src reg off) - (let ((off (lshift off 3))) - (store-word (+ reg off) src))) + (STR*ui 3 src reg off)) (defun STRWui (src reg off) - (let ((off (lshift off 2))) - (store-word (+ reg off) (cast-low 32 src)))) + (STR*ui 2 src reg off)) + +; STRH (base reg), signed offset variant +(defun STRHHui (rt rn off) + (store-word (+ rn (lshift off 1)) (cast-low 16 rt))) + +; STRB +(defun STRBBpost (_ rt base simm) + (store-byte base rt) + (set$ base (+ base simm))) + +(defun STRBBpre (_ rt base simm) + (store-byte (+ base simm) rt) + (set$ base (+ base simm))) + +(defun STRBBroW (rt rn rm option shift) + (let ((off + (if (= option 1) + (signed-extend 32 rm) ; SXTW + (unsigned-extend 32 rm)))) ; UXTW + (store-byte (+ rn off) rt))) + +(defun STRBBroX (rt rn rm option shift) + (let ((off + (if (= option 1) + (signed-extend 64 rm) ; SXTX + (unsigned-extend 64 rm)))) ; LSL + (store-byte (+ rn off) rt))) -(defun STRXroX (rt rn rm _ shift) - (store-word (+ rn (lshift rm (* shift 3))) rt)) +; STP +(defun STPWpost (_ t1 t2 dst off) (store-pair 2 'post t1 t2 dst off)) +(defun STPXpost (_ t1 t2 dst off) (store-pair 3 'post t1 t2 dst off)) + +(defun STPXpre (_ t1 t2 dst off) (store-pair 3 'pre t1 t2 dst off)) +(defun STPWpre (_ t1 t2 dst off) (store-pair 2 'pre t1 t2 dst off)) + +(defun STPWi (rt rt2 base imm) (store-pair 2 'offset rt rt2 base imm)) +(defun STPXi (rt rt2 base imm) (store-pair 3 'offset rt rt2 base imm)) + +; addr + offset indexed STUR (defmacro STUR*i (src base off size) "Takes `size` bits from src and stores at base + off" (store-word (+ base off) (cast-low size src))) (defun STURXi (src base off) (STUR*i src base off 64)) - (defun STURWi (src base off) (STUR*i src base off 32)) +(defun STURHHi (src base off) (STUR*i src base off 16)) +(defun STURBBi (src base off) (STUR*i src base off 8)) -(defun STURHHi (src base off) (STUR*i src base off 16)) -(defun STURBBi (src base off) (STUR*i src base off 8)) +; EXTR + +(defun EXTRWrri (rd rn rm lsb) + "Extracts a register from a pair of registers, datasize = 32" + (setw rd (extract (+ lsb 31) lsb (concat rn rm)))) + +(defun EXTRXrri (rd rn rm lsb) + "Extracts a register from a pair of registers, datasize = 64" + (set$ rd (extract (+ lsb 63) lsb (concat rn rm)))) + diff --git a/plugins/arm/semantics/aarch64-helper.lisp b/plugins/arm/semantics/aarch64-helper.lisp index 7c1729f71..f06411627 100644 --- a/plugins/arm/semantics/aarch64-helper.lisp +++ b/plugins/arm/semantics/aarch64-helper.lisp @@ -84,19 +84,151 @@ (defun barrier-option-to-symbol (option) "(barrier-option-to-symbol option) converts the - 4-bit optional value to a symbol. - This is to be used with the (special) primitive." + 4-bit value to a symbol. + This is to be used with the (intrinsic) primitive." (case option - 0b1111 :sy - 0b1110 :st - 0b1101 :ld - 0b1011 :ish - 0b1010 :ishst - 0b1001 :ishld - 0b0111 :nsh - 0b0110 :nshst - 0b0101 :nshld - 0b0011 :osh - 0b0010 :oshst - 0b0001 :oshld - :unknown)) + 0b1111 'sy + 0b1110 'st + 0b1101 'ld + 0b1011 'ish + 0b1010 'ishst + 0b1001 'ishld + 0b0111 'nsh + 0b0110 'nshst + 0b0101 'nshld + 0b0011 'osh + 0b0010 'oshst + 0b0001 'oshld + 'unknown)) + +(defun bitvec-to-symbol (x sym) + "(bitvec-to-symbol x sym) returns the symbol concatenation of + sym and the hexadecimal representation of x." + (if (> (word-width x) 0) + (bitvec-to-symbol + (cast-low (- (word-width x) 4) x) + (symbol-concat + sym + (case (cast-high 4 x) + 0x0 '0 + 0x1 '1 + 0x2 '2 + 0x3 '3 + 0x4 '4 + 0x5 '5 + 0x6 '6 + 0x7 '7 + 0x8 '8 + 0x9 '9 + 0xa 'a + 0xb 'b + 0xc 'c + 0xd 'd + 0xe 'e + 0xf 'f))) + sym)) + +(defun replace-bit-range (reg hi lo val size) + "(replace-bit-range reg hi lo val) returns reg with bits + hi to lo inclusive set to the value stored in val." + (let ((mask (lshift (cast-unsigned size (ones (+ (- hi lo) 1))) lo)) + (cleared (logand reg (lnot mask))) + (result (logor cleared (logand mask (lshift (cast-unsigned size val) lo))))) + result)) + +(defun insert-element-into-vector (vd index element size) + "(insert-element-into-vector vd index element size) inserts element into vd[index], + where size is in {8,16,32,64}" + (let ((highIndex (-1 (* size (+ index 1)))) + (lowIndex (* size index))) + (set$ vd (replace-bit-range vd highIndex lowIndex element 128)))) + +(defun replicate-and-insert (vd element esize dsize) + "(replicate-and-insert vd element esize dsize) replicates and concatenates + an element of esize to dsize and sets the vector register vd" + (set$ vd (replicate-and-insert-helper element esize dsize 1))) + +(defun replicate-and-insert-helper (element esize dsize index) + (if (< (* index esize) dsize) + (concat element (replicate-and-insert-helper element esize dsize (+ index 1))) + element)) + +(defun get-vector-S-element (index vn) + "(get-vector-S-element index vn) returns the 32 bit element from vn[index]" + (case index + 0x0 (extract 31 0 vn) + 0x1 (extract 63 32 vn) + 0x2 (extract 95 64 vn) + 0x3 (extract 127 96 vn) + 0x0)) + +(defun load-dbyte (address) + "(load-dbyte address) loads two bytes from memory." + (load-bits 16 address)) + +(defun mem-read (address size) + "(mem-read address size) loads size bytes from memory at address." + (case size + 1 (load-byte address) + 2 (load-dbyte address) + 4 (load-hword address) + 8 (load-word address) + 16 (concat (load-word address) (load-word (+ address 8))))) + +(defun register-pair-concat (r-pair) + "(register-pair-concat r-pair) returns the concatenated values of + the register pair returned by LLVM, taking into account + the endianness." + (case (symbol r-pair) + 'X0_X1 (endian concat X0 X1) + 'X2_X3 (endian concat X2 X3) + 'X4_X5 (endian concat X4 X5) + 'X6_X7 (endian concat X6 X7) + 'X8_X9 (endian concat X8 X9) + 'X10_X11 (endian concat X10 X11) + 'X12_X13 (endian concat X12 X13) + 'X14_X15 (endian concat X14 X15) + 'X16_X17 (endian concat X16 X17) + 'X18_X19 (endian concat X18 X19) + 'X20_X21 (endian concat X20 X21) + 'X22_X23 (endian concat X22 X23) + 'X24_X25 (endian concat X24 X25) + 'X26_X27 (endian concat X26 X27) + 'X28_X29 (endian concat X28 X29) + 'W0_W1 (endian concat W0 W1) + 'W2_W3 (endian concat W2 W3) + 'W4_W5 (endian concat W4 W5) + 'W6_W7 (endian concat W6 W7) + 'W8_W9 (endian concat W8 W9) + 'W10_W11 (endian concat W10 W11) + 'W12_W13 (endian concat W12 W13) + 'W14_W15 (endian concat W14 W15) + 'W16_W17 (endian concat W16 W17) + 'W18_W19 (endian concat W18 W19) + 'W20_W21 (endian concat W20 W21) + 'W22_W23 (endian concat W22 W23) + 'W24_W25 (endian concat W24 W25) + 'W26_W27 (endian concat W26 W27) + 'W28_W29 (endian concat W28 W29))) + +(defun store-pair (scale indexing t1 t2 dst imm) + "(store-pair scale indexing t1 t2 dst imm) + stores the pair t1,t2 of size (8 << scale) at the register dst plus an offset, + using the specified indexing (either 'post, 'pre or 'offset)." + (assert-msg (= (word-width t1) (word-width t2) (lshift 8 scale)) + "store-pair: scale must match size of register ") + (let ((off (lshift (cast-signed 64 imm) scale)) + (datasize (lshift 8 scale)) + (addr + (case indexing + 'post dst + 'pre (+ dst off) + 'offset (+ dst off) + (assert-msg false "store-pair invalid indexing scheme")))) + (store-word addr t1) + (store-word (+ addr (/ datasize 8)) t2) + (case indexing + 'post (set$ dst (+ addr off)) + 'pre (set$ dst addr) + 'offset ) + )) diff --git a/plugins/arm/semantics/aarch64-logical.lisp b/plugins/arm/semantics/aarch64-logical.lisp index d2d450d29..b1abe473d 100644 --- a/plugins/arm/semantics/aarch64-logical.lisp +++ b/plugins/arm/semantics/aarch64-logical.lisp @@ -40,6 +40,71 @@ (defun ORRWri (rd rn imm) (log*ri setw logor rd rn imm 32)) (defun ORRXri (rd rn imm) (log*ri set$ logor rd rn imm 64)) +;; Logical ANDS (flags set) + +(defmacro ANDS*r* (setf rd rn immOp) + "(ANDS*r* set rd rn immOp) implements the logical AND operation on either an X or W register + with immediate/shifted immediate and sets the N, V, Z, C flags based on the result." + (let ((result (logand rn immOp))) + (set-nzcv-after-logic-op result) + (setf rd result))) + +(defmacro ANDS*ri (setf size rd rn imm) + "(ANDS*ri set rd rn imm) implements the logical AND operation on either an X or W register + with immediate and sets the N, V, Z, C flags based on the result." + (let ((immOp (immediate-from-bitmask imm size))) + (ANDS*r* setf rd rn immOp))) + +(defun ANDSWri (rd rn imm) (ANDS*ri setw 32 rd rn imm)) +(defun ANDSXri (rd rn imm) (ANDS*ri set$ 64 rd rn imm)) + +(defmacro ANDS*rs (setf rd rn rm is) + "(ANDS*rs set rd rn imm) implements the logical AND operation on either an X or W register + with shifted immediate and sets the N, V, Z, C flags based on the result." + (let ((immOp (shift-encoded rm is))) + (ANDS*r* setf rd rn immOp))) + +(defun ANDSWrs (rd rn rm is) (ANDS*rs setw rd rn rm is)) +(defun ANDSXrs (rd rn rm is) (ANDS*rs set$ rd rn rm is)) + + +;; BIC + +(defmacro BIC*rs (setr rd rn rm is) + "(BIC*r setr rd rn rm) stores the result of a logical and of rn with the complement of + the contents of optionally shifted rm in rd" + (let ((shift (shift-encoded rm is)) + (comp (lnot shift))) + (setr rd (logand rn comp)))) + +(defun BICWrs (rd rn rm is) (BIC*rs setw rd rn rm is)) +(defun BICXrs (rd rn rm is) (BIC*rs set$ rd rn rm is)) + +(defmacro BICS*rs (setr rd rn rm is) + "(BICS*r setr rd rn rm) sets appropriate flags and stores the result of a logical and of rn + with the complement of the contents of optionally shifted rm in rd" + (let ((shift (shift-encoded rm is)) + (comp (lnot shift)) + (result (logand rn comp))) + (set-nzcv-after-logic-op result) + (setr rd result))) + +(defun BICSWrs (rd rn rm is) (BICS*rs setw rd rn rm is)) +(defun BICSXrs (rd rn rm is) (BICS*rs set$ rd rn rm is)) + +;; REV... + +(defmacro REVn*r (setr container-size rd rn) + "(REVn*r setr container-size rd rn) implements the non-vector REV# + instructions with the given container-size." + (setr rd (reverse-in-containers container-size 8 rn))) + +(defun REVWr (rd rn) (REVn*r setw 32 rd rn)) +(defun REVXr (rd rn) (REVn*r set$ 64 rd rn)) +(defun REV16Xr (rd rn) (REVn*r setw 16 rd rn)) +(defun REV16Wr (rd rn) (REVn*r set$ 16 rd rn)) +(defun REV32Xr (rd rn) (REVn*r setw 32 rd rn)) + ;; UBFM and SBFM ;; (bitfield moves) @@ -84,3 +149,24 @@ (defun BFMWri (_ xd xr ir is) (BFM setw xd xr ir is)) + +;; Shifts + +(defmacro SHIFT*r (setr shift datasize rd rn rm) + "(ASRV*r setr datasize rd rn rm) does an arithmetic shift right and stores it + in the destination register rd" + (setr rd (cast-low datasize (shift rn (mod rm datasize))))) + +(defun ASRVXr (rd rn rm) (SHIFT*r set$ arshift 64 rd rn rm)) +(defun ASRVWr (rd rn rm) (SHIFT*r setw arshift 32 rd rn rm)) + +(defun LSRVXr (rd rn rm) (SHIFT*r set$ rshift 64 rd rn rm)) +(defun LSRVWr (rd rn rm) (SHIFT*r setw rshift 32 rd rn rm)) +(defun LSLVXr (rd rn rm) (SHIFT*r set$ lshift 64 rd rn rm)) +(defun LSLVWr (rd rn rm) (SHIFT*r setw lshift 32 rd rn rm)) + +(defun RORVXr (rd rn rm) (SHIFT*r set$ rotate-right 64 rd rn rm)) +(defun RORVWr (rd rn rm) (SHIFT*r setw rotate-right 32 rd rn rm)) + +(defun RBITXr (rd rn) (set$ rd (reverse-bits rn))) +(defun RBITWr (rd rn) (setw rd (reverse-bits rn))) \ No newline at end of file diff --git a/plugins/arm/semantics/aarch64-simd-arithmetic.lisp b/plugins/arm/semantics/aarch64-simd-arithmetic.lisp new file mode 100644 index 000000000..7320c702f --- /dev/null +++ b/plugins/arm/semantics/aarch64-simd-arithmetic.lisp @@ -0,0 +1,66 @@ +(declare (context (target armv8-a+le))) + +(in-package aarch64) + +(defun sym-to-binop (binop-sym x y) + (case binop-sym + 'add (+ x y) + 'sub (- x y) + 'mul (* x y))) + +(defun vector-binop (sym ecount esize vn vm) + "(vector-binop sym ecount esize vn vm e) returns the result + of applying the binary operation specified by sym (see sym-to-binop) + to each of the elements in vn and vm. For example, with addition, + Elem[vn, ecount-1, esize] + Elem[vm, ecount-1, esize] + concat + ... + concat + Elem[vn, 0, esize] + Elem[vm, 0, esize] + ecount and esize are the number and size of the elements." + (vector-binop/helper sym ecount esize vn vm 0)) + +(defun vector-binop/helper (sym ecount esize vn vm e) + ;; i can't make this a macro and take in the binop as + ;; a function, because when i try, BAP gets a stack overflow ._. + (if (>= e (-1 ecount)) + (sym-to-binop sym (extract-elem vn e esize) (extract-elem vm e esize)) + (concat + (vector-binop/helper sym ecount esize vn vm (+1 e)) + (sym-to-binop sym (extract-elem vn e esize) (extract-elem vm e esize ))))) + +(defun ADDv*i* (vd vn vm ecount esize) + (set$ vd (vector-binop 'add ecount esize vn vm))) + +(defun ADDv1i64 (vd vn vm) (ADDv*i* vd vn vm 1 64)) +(defun ADDv2i64 (vd vn vm) (ADDv*i* vd vn vm 2 64)) +(defun ADDv2i32 (vd vn vm) (ADDv*i* vd vn vm 2 32)) +(defun ADDv4i32 (vd vn vm) (ADDv*i* vd vn vm 4 32)) +(defun ADDv4i16 (vd vn vm) (ADDv*i* vd vn vm 4 16)) +(defun ADDv8i16 (vd vn vm) (ADDv*i* vd vn vm 8 16)) +(defun ADDv8i8 (vd vn vm) (ADDv*i* vd vn vm 8 8)) +(defun ADDv16i8 (vd vn vm) (ADDv*i* vd vn vm 16 8)) + +(defun SUBv*i* (vd vn vm ecount esize) + (set$ vd (vector-binop 'sub ecount esize vn vm))) + +(defun SUBv1i64 (vd vn vm) (SUBv*i* vd vn vm 1 64)) +(defun SUBv2i64 (vd vn vm) (SUBv*i* vd vn vm 2 64)) +(defun SUBv2i32 (vd vn vm) (SUBv*i* vd vn vm 2 32)) +(defun SUBv4i32 (vd vn vm) (SUBv*i* vd vn vm 4 32)) +(defun SUBv4i16 (vd vn vm) (SUBv*i* vd vn vm 4 16)) +(defun SUBv8i16 (vd vn vm) (SUBv*i* vd vn vm 8 16)) +(defun SUBv8i8 (vd vn vm) (SUBv*i* vd vn vm 8 8)) +(defun SUBv16i8 (vd vn vm) (SUBv*i* vd vn vm 16 8)) + +(defun MULv*i* (vd vn vm ecount esize) + (set$ vd (vector-binop 'mul ecount esize vn vm))) + +(defun MULv1i64 (vd vn vm) (MULv*i* vd vn vm 1 64)) +(defun MULv2i64 (vd vn vm) (MULv*i* vd vn vm 2 64)) +(defun MULv2i32 (vd vn vm) (MULv*i* vd vn vm 2 32)) +(defun MULv4i32 (vd vn vm) (MULv*i* vd vn vm 4 32)) +(defun MULv4i16 (vd vn vm) (MULv*i* vd vn vm 4 16)) +(defun MULv8i16 (vd vn vm) (MULv*i* vd vn vm 8 16)) +(defun MULv8i8 (vd vn vm) (MULv*i* vd vn vm 8 8)) +(defun MULv16i8 (vd vn vm) (MULv*i* vd vn vm 16 8)) diff --git a/plugins/arm/semantics/aarch64-simd-load.lisp b/plugins/arm/semantics/aarch64-simd-load.lisp new file mode 100644 index 000000000..639fa5297 --- /dev/null +++ b/plugins/arm/semantics/aarch64-simd-load.lisp @@ -0,0 +1,665 @@ +(declare (context (target armv8-a+le))) + +(in-package aarch64) + +;;; LDs.. +;;; NOTE: +;;; encodings do not CheckFPAdvSIMDEnabled64(), HaveMTE2Ext(), +;;; SetTagCheckedInstruction(), CheckSPAlignment() + +;; LD1 (multiple structures, post index, four registers) + +(defmacro LD1Fourv._POST (elems bytes va base off) + (LD..v._POST 4 elems 1 bytes va base off)) + +(defun LD1Fourv8b_POST (_ da_db_dc_dd xn xm) (LD1Fourv._POST 8 1 da_db_dc_dd xn xm)) +(defun LD1Fourv16b_POST (_ qa_qb_qc_qd xn xm) (LD1Fourv._POST 16 1 qa_qb_qc_qd xn xm)) +(defun LD1Fourv4h_POST (_ da_db_dc_dd xn xm) (LD1Fourv._POST 4 2 da_db_dc_dd xn xm)) +(defun LD1Fourv8h_POST (_ qa_qb_qc_qd xn xm) (LD1Fourv._POST 8 2 qa_qb_qc_qd xn xm)) +(defun LD1Fourv2s_POST (_ da_db_dc_dd xn xm) (LD1Fourv._POST 2 4 da_db_dc_dd xn xm)) +(defun LD1Fourv4s_POST (_ qa_qb_qc_qd xn xm) (LD1Fourv._POST 4 4 qa_qb_qc_qd xn xm)) +(defun LD1Fourv1d_POST (_ da_db_dc_dd xn xm) (LD1Fourv._POST 1 8 da_db_dc_dd xn xm)) +(defun LD1Fourv2d_POST (_ qa_qb_qc_qd xn xm) (LD1Fourv._POST 2 8 qa_qb_qc_qd xn xm)) + +;; LD1 (multiple structures, post index, three registers) + +(defmacro LD1Threev._POST (elems bytes va base off) + (LD..v._POST 3 elems 1 bytes va base off)) + +(defun LD1Threev8b_POST (_ da_db_dc xn xm) (LD1Threev._POST 8 1 da_db_dc xn xm)) +(defun LD1Threev16b_POST (_ qa_qb_qc xn xm) (LD1Threev._POST 16 1 qa_qb_qc xn xm)) +(defun LD1Threev4h_POST (_ da_db_dc xn xm) (LD1Threev._POST 4 2 da_db_dc xn xm)) +(defun LD1Threev8h_POST (_ qa_qb_qc xn xm) (LD1Threev._POST 8 2 qa_qb_qc xn xm)) +(defun LD1Threev2s_POST (_ da_db_dc xn xm) (LD1Threev._POST 2 4 da_db_dc xn xm)) +(defun LD1Threev4s_POST (_ qa_qb_qc xn xm) (LD1Threev._POST 4 4 qa_qb_qc xn xm)) +(defun LD1Threev1d_POST (_ da_db_dc xn xm) (LD1Threev._POST 1 8 da_db_dc xn xm)) +(defun LD1Threev2d_POST (_ qa_qb_qc xn xm) (LD1Threev._POST 2 8 qa_qb_qc xn xm)) + +;; LD1 (multiple structures, post index, two registers) + +(defmacro LD1Twov._POST (elems bytes va base off) + (LD..v._POST 2 elems 1 bytes va base off)) + +(defun LD1Twov8b_POST (_ da_db xn xm) (LD1Twov._POST 8 1 da_db xn xm)) +(defun LD1Twov16b_POST (_ qa_qb xn xm) (LD1Twov._POST 16 1 qa_qb xn xm)) +(defun LD1Twov4h_POST (_ da_db xn xm) (LD1Twov._POST 4 2 da_db xn xm)) +(defun LD1Twov8h_POST (_ qa_qb xn xm) (LD1Twov._POST 8 2 qa_qb xn xm)) +(defun LD1Twov2s_POST (_ da_db xn xm) (LD1Twov._POST 2 4 da_db xn xm)) +(defun LD1Twov4s_POST (_ qa_qb xn xm) (LD1Twov._POST 4 4 qa_qb xn xm)) +(defun LD1Twov1d_POST (_ da_db xn xm) (LD1Twov._POST 1 8 da_db xn xm)) +(defun LD1Twov2d_POST (_ qa_qb xn xm) (LD1Twov._POST 2 8 qa_qb xn xm)) + +;; LD1 (multiple structures, post index, one register) + +(defmacro LD1Onev._POST (elems bytes va base off) + (LD..v._POST 1 elems 1 bytes va base off)) + +(defun LD1Onev8b_POST (_ da xn xm) (LD1Onev._POST 8 1 da xn xm)) +(defun LD1Onev16b_POST (_ qa xn xm) (LD1Onev._POST 16 1 qa xn xm)) +(defun LD1Onev4h_POST (_ da xn xm) (LD1Onev._POST 4 2 da xn xm)) +(defun LD1Onev8h_POST (_ qa xn xm) (LD1Onev._POST 8 2 qa xn xm)) +(defun LD1Onev2s_POST (_ da xn xm) (LD1Onev._POST 2 4 da xn xm)) +(defun LD1Onev4s_POST (_ qa xn xm) (LD1Onev._POST 4 4 qa xn xm)) +(defun LD1Onev1d_POST (_ da xn xm) (LD1Onev._POST 1 8 da xn xm)) +(defun LD1Onev2d_POST (_ qa xn xm) (LD1Onev._POST 2 8 qa xn xm)) + +;; LD1 (multiple structures, no offset, four registers) + +(defmacro LD1Fourv. (elems bytes va base) + (LD 4 elems 1 base bytes va)) + +(defun LD1Fourv8b (da_db_dc_dd xn) (LD1Fourv. 8 1 da_db_dc_dd xn)) +(defun LD1Fourv16b (qa_qb_qc_qd xn) (LD1Fourv. 16 1 qa_qb_qc_qd xn)) +(defun LD1Fourv4h (da_db_dc_dd xn) (LD1Fourv. 4 2 da_db_dc_dd xn)) +(defun LD1Fourv8h (qa_qb_qc_qd xn) (LD1Fourv. 8 2 qa_qb_qc_qd xn)) +(defun LD1Fourv2s (da_db_dc_dd xn) (LD1Fourv. 2 4 da_db_dc_dd xn)) +(defun LD1Fourv4s (qa_qb_qc_qd xn) (LD1Fourv. 4 4 qa_qb_qc_qd xn)) +(defun LD1Fourv1d (da_db_dc_dd xn) (LD1Fourv. 1 8 da_db_dc_dd xn)) +(defun LD1Fourv2d (qa_qb_qc_qd xn) (LD1Fourv. 2 8 qa_qb_qc_qd xn)) + +;; LD1 (multiple structures, no offset, three registers) + +(defmacro LD1Threev. (elems bytes va base) + (LD 3 elems 1 base bytes va)) + +(defun LD1Threev8b (da_db_dc xn) (LD1Threev. 8 1 da_db_dc xn)) +(defun LD1Threev16b (qa_qb_qc xn) (LD1Threev. 16 1 qa_qb_qc xn)) +(defun LD1Threev4h (da_db_dc xn) (LD1Threev. 4 2 da_db_dc xn)) +(defun LD1Threev8h (qa_qb_qc xn) (LD1Threev. 8 2 qa_qb_qc xn)) +(defun LD1Threev2s (da_db_dc xn) (LD1Threev. 2 4 da_db_dc xn)) +(defun LD1Threev4s (qa_qb_qc xn) (LD1Threev. 4 4 qa_qb_qc xn)) +(defun LD1Threev1d (da_db_dc xn) (LD1Threev. 1 8 da_db_dc xn)) +(defun LD1Threev2d (qa_qb_qc xn) (LD1Threev. 2 8 qa_qb_qc xn)) + +;; LD1 (multiple structures, no offset, two registers) + +(defmacro LD1Twov. (elems bytes va base) + (LD 2 elems 1 base bytes va)) + +(defun LD1Twov8b (da_db xn) (LD1Twov. 8 1 da_db xn)) +(defun LD1Twov16b (qa_qb xn) (LD1Twov. 16 1 qa_qb xn)) +(defun LD1Twov4h (da_db xn) (LD1Twov. 4 2 da_db xn)) +(defun LD1Twov8h (qa_qb xn) (LD1Twov. 8 2 qa_qb xn)) +(defun LD1Twov2s (da_db xn) (LD1Twov. 2 4 da_db xn)) +(defun LD1Twov4s (qa_qb xn) (LD1Twov. 4 4 qa_qb xn)) +(defun LD1Twov1d (da_db xn) (LD1Twov. 1 8 da_db xn)) +(defun LD1Twov2d (qa_qb xn) (LD1Twov. 2 8 qa_qb xn)) + +;; LD1 (multiple structures, no offset, one register) + +(defmacro LD1Onev. (elems bytes va base) + (LD 1 elems 1 base bytes va)) + +(defun LD1Onev8b (da xn) (LD1Onev. 8 1 da xn)) +(defun LD1Onev16b (qa xn) (LD1Onev. 16 1 qa xn)) +(defun LD1Onev4h (da xn) (LD1Onev. 4 2 da xn)) +(defun LD1Onev8h (qa xn) (LD1Onev. 8 2 qa xn)) +(defun LD1Onev2s (da xn) (LD1Onev. 2 4 da xn)) +(defun LD1Onev4s (qa xn) (LD1Onev. 4 4 qa xn)) +(defun LD1Onev1d (da xn) (LD1Onev. 1 8 da xn)) +(defun LD1Onev2d (qa xn) (LD1Onev. 2 8 qa xn)) + +;; LD2 (multiple structures, post index) + +(defun LD2Twov8b_POST (_ da_db xn xm) (LD2Twov._POST da_db xn xm 8 1)) +(defun LD2Twov16b_POST (_ qa_qb xn xm) (LD2Twov._POST qa_qb xn xm 16 1)) +(defun LD2Twov4h_POST (_ da_db xn xm) (LD2Twov._POST da_db xn xm 4 2)) +(defun LD2Twov8h_POST (_ qa_qb xn xm) (LD2Twov._POST qa_qb xn xm 8 2)) +(defun LD2Twov2s_POST (_ da_db xn xm) (LD2Twov._POST da_db xn xm 2 4)) +(defun LD2Twov4s_POST (_ qa_qb xn xm) (LD2Twov._POST qa_qb xn xm 4 4)) +(defun LD2Twov2d_POST (_ qa_qb xn xm) (LD2Twov._POST qa_qb xn xm 2 8)) + +(defmacro LD2Twov._POST (va_vb xn xm elems bytes) + "(LD2Twov._POST va_vb xn elesms bytes) loads multiple 2-element structures from + memory at address xn with offset xm and stores it in va and vb with de-interleaving." + (LD..v._POST 1 elems 2 bytes va_vb xn xm)) + +;; LD2 (multiple structures, no offset) + +(defun LD2Twov8b (da_db xn) (LD2Twov. da_db xn 8 1)) +(defun LD2Twov16b (qa_qb xn) (LD2Twov. qa_qb xn 16 1)) +(defun LD2Twov4h (da_db xn) (LD2Twov. da_db xn 4 2)) +(defun LD2Twov8h (qa_qb xn) (LD2Twov. qa_qb xn 8 2)) +(defun LD2Twov2s (da_db xn) (LD2Twov. da_db xn 2 4)) +(defun LD2Twov4s (qa_qb xn) (LD2Twov. qa_qb xn 4 4)) +(defun LD2Twov2d (qa_qb xn) (LD2Twov. qa_qb xn 2 8)) + +(defmacro LD2Twov. (va_vb xn elems bytes) + "(LD2Twov. va_vb xn elesms bytes) loads multiple 2-element structures from + memory at address xn and stores it in va and vb with de-interleaving." + (LD 1 elems 2 xn bytes va_vb)) + +;; LD3 (multiple structures, post index) + +(defun LD3Threev8b_POST (_ da_db_dc xn xm) (LD3Threev._POST da_db_dc xn xm 8 1)) +(defun LD3Threev16b_POST (_ qa_qb_qc xn xm) (LD3Threev._POST qa_qb_qc xn xm 16 1)) +(defun LD3Threev4h_POST (_ da_db_dc xn xm) (LD3Threev._POST da_db_dc xn xm 4 2)) +(defun LD3Threev8h_POST (_ qa_qb_qc xn xm) (LD3Threev._POST qa_qb_qc xn xm 8 2)) +(defun LD3Threev2s_POST (_ da_db_dc xn xm) (LD3Threev._POST da_db_dc xn xm 2 4)) +(defun LD3Threev4s_POST (_ qa_qb_qc xn xm) (LD3Threev._POST qa_qb_qc xn xm 4 4)) +(defun LD3Threev2d_POST (_ qa_qb_qc xn xm) (LD3Threev._POST qa_qb_qc xn xm 2 8)) + +(defmacro LD3Threev._POST (va_vb_vc xn xm elems bytes) + "(LD3Threev._POST va_vb_vc xn xm elems bytes) loads multiple 3-element structures + from memory at address xn with offset xm and stores it in va, vb and vc with de-interleaving." + (LD..v._POST 1 elems 3 bytes va_vb_vc xn xm)) + +;; LD3 (multiple structures, no offset) + +(defun LD3Threev8b (da_db_dc xn) (LD3Threev. da_db_dc xn 8 1)) +(defun LD3Threev16b (qa_qb_qc xn) (LD3Threev. qa_qb_qc xn 16 1)) +(defun LD3Threev4h (da_db_dc xn) (LD3Threev. da_db_dc xn 4 2)) +(defun LD3Threev8h (qa_qb_qc xn) (LD3Threev. qa_qb_qc xn 8 2)) +(defun LD3Threev2s (da_db_dc xn) (LD3Threev. da_db_dc xn 2 4)) +(defun LD3Threev4s (qa_qb_qc xn) (LD3Threev. qa_qb_qc xn 4 4)) +(defun LD3Threev2d (qa_qb_qc xn) (LD3Threev. qa_qb_qc xn 2 8)) + +(defmacro LD3Threev. (va_vb_vc xn elems bytes) + "(LD3Threev. va_vb_vc xn elems bytes) loads multiple 3-element structures from + memory at address xn and stores it in va, vb and vc with de-interleaving." + (LD 1 elems 3 xn bytes va_vb_vc)) + +;; LD4 (multiple structures, post index) + +(defun LD4Fourv8b_POST (_ da_db_dc_dd xn xm) (LD4Fourv._POST da_db_dc_dd xn xm 8 1)) +(defun LD4Fourv16b_POST (_ qa_qb_qc_qd xn xm) (LD4Fourv._POST qa_qb_qc_qd xn xm 16 1)) +(defun LD4Fourv4h_POST (_ da_db_dc_dd xn xm) (LD4Fourv._POST da_db_dc_dd xn xm 4 2)) +(defun LD4Fourv8h_POST (_ qa_qb_qc_qd xn xm) (LD4Fourv._POST qa_qb_qc_qd xn xm 8 2)) +(defun LD4Fourv2s_POST (_ da_db_dc_dd xn xm) (LD4Fourv._POST da_db_dc_dd xn xm 2 4)) +(defun LD4Fourv4s_POST (_ qa_qb_qc_qd xn xm) (LD4Fourv._POST qa_qb_qc_qd xn xm 4 4)) +(defun LD4Fourv2d_POST (_ qa_qb_qc_qd xn xm) (LD4Fourv._POST qa_qb_qc_qd xn xm 2 8)) + +(defmacro LD4Fourv._POST (va_vb_vc xn xm elems bytes) + "(LD4Fourv._POST va_vb_vc xn xm elems bytes) loads multiple 4-element structures + from memory at address xn with offset xm and stores it in va, vb, vc and vd with de-interleaving." + (LD..v._POST 1 elems 4 bytes va_vb_vc xn xm)) + +;; LD4 (multiple structures, no offset) + +(defun LD4Fourv8b (da_db_dc_dd xn) (LD4Fourv. da_db_dc_dd xn 8 1)) +(defun LD4Fourv16b (qa_qb_qc_qd xn) (LD4Fourv. qa_qb_qc_qd xn 16 1)) +(defun LD4Fourv4h (da_db_dc_dd xn) (LD4Fourv. da_db_dc_dd xn 4 2)) +(defun LD4Fourv8h (qa_qb_qc_qd xn) (LD4Fourv. qa_qb_qc_qd xn 8 2)) +(defun LD4Fourv2s (da_db_dc_dd xn) (LD4Fourv. da_db_dc_dd xn 2 4)) +(defun LD4Fourv4s (qa_qb_qc_qd xn) (LD4Fourv. qa_qb_qc_qd xn 4 4)) +(defun LD4Fourv2d (qa_qb_qc_qd xn) (LD4Fourv. qa_qb_qc_qd xn 2 8)) + +(defmacro LD4Fourv. (va_vb_vc xn elems bytes) + "(LD4Fourv. va_vb_vc xn elems bytes) loads multiple 4-element structures from memory + at address xn and stores it in va, vb, vc and vd with de-interleaving." + (LD 1 elems 4 xn bytes va_vb_vc)) + +;; LD multiple struct algorithm + +(defmacro LD..v._POST (rpt elems selems bytes grp base off) + "(LD..v._POST rpt elems selems bytes grp base off) loads multiple selems-element structs + from memory address base with offset off and stores them in the vector list grp." + (prog + (LD rpt elems selems base bytes grp) + (if (= (symbol off) 'XZR) + (set$ base (+ base (* rpt selems elems bytes))) + (set$ base (+ base off))))) + +(defmacro LD (rpt elems selems base bytes grp) + "(LD rpt elems selems base bytes grp) loads multiple selems-element structs from memory address base." + (insert-with-de-interleaving 0 rpt elems selems base bytes grp)) + +(defun insert-with-de-interleaving (r rpt elems selems base bytes grp) + (if (> rpt 1) + (when (< r rpt) + (let ((nth (nth-reg-in-group grp r))) + (prog + (insert-a 1 0 elems base bytes (symbol nth) 0 0 0 0) + (insert-with-de-interleaving + (+ r 1) rpt elems selems (+ base (* elems bytes)) bytes grp)))) + (insert-a selems 0 elems base bytes grp 0 0 0 0))) + +(defun insert-a (selems e elems addr bytes grp acc-a acc-b acc-c acc-d) + (if (< e elems) + (let ((acc-a (if (= e 0) (mem-read addr bytes) (concat acc-a (mem-read addr bytes))))) + (if (> selems 1) + (insert-b selems e elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d) + (insert-a selems (+ e 1) elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d))) + (prog + (when (<= 1 selems) (set$ (nth-reg-in-group grp 0) acc-a)) + (when (<= 2 selems) (set$ (nth-reg-in-group grp 1) acc-b)) + (when (<= 3 selems) (set$ (nth-reg-in-group grp 2) acc-c)) + (when (<= 4 selems) (set$ (nth-reg-in-group grp 3) acc-d))))) + +(defun insert-b (selems e elems addr bytes grp acc-a acc-b acc-c acc-d) + (let ((acc-b (if (= e 0) (mem-read addr bytes) (concat acc-b (mem-read addr bytes))))) + (if (> selems 2) + (insert-c selems e elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d) + (insert-a selems (+ e 1) elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d)))) + +(defun insert-c (selems e elems addr bytes grp acc-a acc-b acc-c acc-d) + (let ((acc-c (if (= e 0) (mem-read addr bytes) (concat acc-c (mem-read addr bytes))))) + (if (> selems 3) + (insert-d selems e elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d) + (insert-a selems (+ e 1) elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d)))) + +(defun insert-d (selems e elems addr bytes grp acc-a acc-b acc-c acc-d) + (let ((acc-d (if (= e 0) (mem-read addr bytes) (concat acc-d (mem-read addr bytes))))) + (insert-a selems (+ e 1) elems (+ addr bytes) bytes grp acc-a acc-b acc-c acc-d))) + +;; LD single struct algorithm + +(defmacro LD.i._POST (selems grp index base size off) + "(LD.i._POST selems grp index base size off) loads multiple single structures from + address base with post index off, and inserts each structure into the index of each + vector register in grp." + (prog + (LD.i. selems grp index base size) + (if (= (symbol off) 'XZR) + (set$ base (+ base (/ size 8))) + (set$ base (+ base off))))) + +(defmacro LD.i. (selems grp index base size) + "(LD.i._POST selems grp index base size off) loads multiple single structures from + address base, and inserts each structure into the index of each vector register in grp." + (insert-single-element 0 selems grp index base size)) + +(defun insert-single-element (s selems grp index base size) + (when (< s selems) + (prog + (insert-element-into-vector (nth-reg-in-group grp s) + index (mem-read base (/ size 8)) size) + (insert-single-element (+ s 1) selems grp index (+ base (/ size 8)) size)))) + +;; LD1 (single struct, no offset) + +(defmacro LD1i. (qa index base size) + (LD.i. 1 qa index base size)) + +(defun LD1i8 (_ qa index xn) (LD1i. qa index xn 8)) +(defun LD1i16 (_ qa index xn) (LD1i. qa index xn 16)) +(defun LD1i32 (_ qa index xn) (LD1i. qa index xn 32)) +(defun LD1i64 (_ qa index xn) (LD1i. qa index xn 64)) + +;; LD1 (single struct, post index) + +(defmacro LD1i._POST (qa index base size off) + (LD.i._POST 1 qa index base size off)) + +(defun LD1i8_POST (_ _ qa index xn xm) (LD1i._POST qa index xn 8 xm)) +(defun LD1i16_POST (_ _ qa index xn xm) (LD1i._POST qa index xn 16 xm)) +(defun LD1i32_POST (_ _ qa index xn xm) (LD1i._POST qa index xn 32 xm)) +(defun LD1i64_POST (_ _ qa index xn xm) (LD1i._POST qa index xn 64 xm)) + +;; LD2 (single struct, no offset) + +(defmacro LD2i. (qa_qb index base size) + (LD.i. 2 qa_qb index base size)) + +(defun LD2i8 (_ qa_qb index xn) (LD2i. qa_qb index xn 8)) +(defun LD2i16 (_ qa_qb index xn) (LD2i. qa_qb index xn 16)) +(defun LD2i32 (_ qa_qb index xn) (LD2i. qa_qb index xn 32)) +(defun LD2i64 (_ qa_qb index xn) (LD2i. qa_qb index xn 64)) + +;; LD2 (single struct, post index) + +(defmacro LD2i._POST (qa_qb index base size off) + (LD.i._POST 2 qa_qb index base size off)) + +(defun LD2i8_POST (_ _ qa_qb index xn xm) (LD2i._POST qa_qb index xn 8 xm)) +(defun LD2i16_POST (_ _ qa_qb index xn xm) (LD2i._POST qa_qb index xn 16 xm)) +(defun LD2i32_POST (_ _ qa_qb index xn xm) (LD2i._POST qa_qb index xn 32 xm)) +(defun LD2i64_POST (_ _ qa_qb index xn xm) (LD2i._POST qa_qb index xn 64 xm)) + +;; LD3 (single struct, no offset) + +(defmacro LD3i. (qa_qb_qc index base size) + (LD.i. 3 qa_qb_qc index base size)) + +(defun LD3i8 (_ qa_qb_qc index xn) (LD3i. qa_qb_qc index xn 8)) +(defun LD3i16 (_ qa_qb_qc index xn) (LD3i. qa_qb_qc index xn 16)) +(defun LD3i32 (_ qa_qb_qc index xn) (LD3i. qa_qb_qc index xn 32)) +(defun LD3i64 (_ qa_qb_qc index xn) (LD3i. qa_qb_qc index xn 64)) + +;; LD3 (single struct, post index) + +(defmacro LD3i._POST (qa_qb_qc index base size off) + (LD.i._POST 3 qa_qb_qc index base size off)) + +(defun LD3i8_POST (_ _ qa_qb_qc index xn xm) (LD3i._POST qa_qb_qc index xn 8 xm)) +(defun LD3i16_POST (_ _ qa_qb_qc index xn xm) (LD3i._POST qa_qb_qc index xn 16 xm)) +(defun LD3i32_POST (_ _ qa_qb_qc index xn xm) (LD3i._POST qa_qb_qc index xn 32 xm)) +(defun LD3i64_POST (_ _ qa_qb_qc index xn xm) (LD3i._POST qa_qb_qc index xn 64 xm)) + +;; LD4 (single struct, no offset) + +(defmacro LD4i. (qa_qb_qc_qd index base size) + (LD.i. 4 qa_qb_qc_qd index base size)) + +(defun LD4i8 (_ qa_qb_qc_qd index xn) (LD4i. qa_qb_qc_qd index xn 8)) +(defun LD4i16 (_ qa_qb_qc_qd index xn) (LD4i. qa_qb_qc_qd index xn 16)) +(defun LD4i32 (_ qa_qb_qc_qd index xn) (LD4i. qa_qb_qc_qd index xn 32)) +(defun LD4i64 (_ qa_qb_qc_qd index xn) (LD4i. qa_qb_qc_qd index xn 64)) + +;; LD4 (single struct, post index) + +(defmacro LD4i._POST (qa_qb_qc_qd index base size off) + (LD.i._POST 4 qa_qb_qc_qd index base size off)) + +(defun LD4i8_POST (_ _ qa_qb_qc_qd index xn xm) (LD4i._POST qa_qb_qc_qd index xn 8 xm)) +(defun LD4i16_POST (_ _ qa_qb_qc_qd index xn xm) (LD4i._POST qa_qb_qc_qd index xn 16 xm)) +(defun LD4i32_POST (_ _ qa_qb_qc_qd index xn xm) (LD4i._POST qa_qb_qc_qd index xn 32 xm)) +(defun LD4i64_POST (_ _ qa_qb_qc_qd index xn xm) (LD4i._POST qa_qb_qc_qd index xn 64 xm)) + +;; LD.R algorithm + +(defmacro LD.Rv._POST (grp base esize dsize selems off) + "(LD.Rv._POST grp base esize dsize selems off) loads an multiple element from a + base address and off post index, replicates them to the size of dsize and + inserts them into each vector register in group." + (prog + (LD.Rv. grp base esize dsize selems) + (if (= (symbol off) 'XZR) + (set$ base (+ base (* selems (/ dsize 8)))) + (set$ base (+ base off))))) + +(defmacro LD.Rv. (grp base esize dsize selems) + "(LD.Rv. grp base esize dsize selems) loads an multiple element from a + base address, replicates them to the size of dsize and + inserts them into each vector register in group." + (insert-single-and-replicate grp base esize dsize selems 0)) + +(defun insert-single-and-replicate (grp base esize dsize selems s) + (when (< s selems) + (let ((element (cast-low esize (mem-read base (/ dsize esize))))) + (prog + (replicate-and-insert (nth-reg-in-group grp s) element esize dsize) + (insert-single-and-replicate grp (+ base (/ dsize 8)) esize dsize selems (+ s 1)))))) + +;; LD1R (no offset) + +(defmacro LD1Rv. (va xn esize dsize) + (LD.Rv. (symbol va) xn esize dsize 1)) + +(defun LD1Rv8b (va xn) (LD1Rv. va xn 8 64)) +(defun LD1Rv16b (va xn) (LD1Rv. va xn 8 128)) +(defun LD1Rv4h (va xn) (LD1Rv. va xn 16 64)) +(defun LD1Rv8h (va xn) (LD1Rv. va xn 16 128)) +(defun LD1Rv2s (va xn) (LD1Rv. va xn 32 64)) +(defun LD1Rv4s (va xn) (LD1Rv. va xn 32 128)) +(defun LD1Rv1d (va xn) (LD1Rv. va xn 64 64)) +(defun LD1Rv2d (va xn) (LD1Rv. va xn 64 128)) + +;; LD1R (post index) + +(defmacro LD1Rv._POST (va xn esize dsize off) + (LD.Rv._POST (symbol va) xn esize dsize 1 off)) + +(defun LD1Rv8b_POST (_ va xn xm) (LD1Rv._POST va xn 8 64 xm)) +(defun LD1Rv16b_POST (_ va xn xm) (LD1Rv._POST va xn 8 128 xm)) +(defun LD1Rv4h_POST (_ va xn xm) (LD1Rv._POST va xn 16 64 xm)) +(defun LD1Rv8h_POST (_ va xn xm) (LD1Rv._POST va xn 16 128 xm)) +(defun LD1Rv2s_POST (_ va xn xm) (LD1Rv._POST va xn 32 64 xm)) +(defun LD1Rv4s_POST (_ va xn xm) (LD1Rv._POST va xn 32 128 xm)) +(defun LD1Rv1d_POST (_ va xn xm) (LD1Rv._POST va xn 64 64 xm)) +(defun LD1Rv2d_POST (_ va xn xm) (LD1Rv._POST va xn 64 128 xm)) + +;; LD2R (no offset) + +(defmacro LD2Rv. (va_vb xn esize dsize) + (LD.Rv. va_vb xn esize dsize 2)) + +(defun LD2Rv8b (va_vb xn) (LD2Rv. va_vb xn 8 64)) +(defun LD2Rv16b (va_vb xn) (LD2Rv. va_vb xn 8 128)) +(defun LD2Rv4h (va_vb xn) (LD2Rv. va_vb xn 16 64)) +(defun LD2Rv8h (va_vb xn) (LD2Rv. va_vb xn 16 128)) +(defun LD2Rv2s (va_vb xn) (LD2Rv. va_vb xn 32 64)) +(defun LD2Rv4s (va_vb xn) (LD2Rv. va_vb xn 32 128)) +(defun LD2Rv1d (va_vb xn) (LD2Rv. va_vb xn 64 64)) +(defun LD2Rv2d (va_vb xn) (LD2Rv. va_vb xn 64 128)) + +;; LD2R (post index) + +(defmacro LD2Rv._POST (va_vb xn esize dsize off) + (LD.Rv._POST va_vb xn esize dsize 2 off)) + +(defun LD2Rv8b_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 8 64 xm)) +(defun LD2Rv16b_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 8 128 xm)) +(defun LD2Rv4h_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 16 64 xm)) +(defun LD2Rv8h_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 16 128 xm)) +(defun LD2Rv2s_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 32 64 xm)) +(defun LD2Rv4s_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 32 128 xm)) +(defun LD2Rv1d_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 64 64 xm)) +(defun LD2Rv2d_POST (_ va_vb xn xm) (LD2Rv._POST va_vb xn 64 128 xm)) + +;; LD3R (no offset) + +(defmacro LD3Rv. (va_vb_vc xn esize dsize) + (LD.Rv. va_vb_vc xn esize dsize 3)) + +(defun LD3Rv8b (va_vb_vc xn) (LD3Rv. va_vb_vc xn 8 64)) +(defun LD3Rv16b (va_vb_vc xn) (LD3Rv. va_vb_vc xn 8 128)) +(defun LD3Rv4h (va_vb_vc xn) (LD3Rv. va_vb_vc xn 16 64)) +(defun LD3Rv8h (va_vb_vc xn) (LD3Rv. va_vb_vc xn 16 128)) +(defun LD3Rv2s (va_vb_vc xn) (LD3Rv. va_vb_vc xn 32 64)) +(defun LD3Rv4s (va_vb_vc xn) (LD3Rv. va_vb_vc xn 32 128)) +(defun LD3Rv1d (va_vb_vc xn) (LD3Rv. va_vb_vc xn 64 64)) +(defun LD3Rv2d (va_vb_vc xn) (LD3Rv. va_vb_vc xn 64 128)) + +;; LD3R (post index) + +(defmacro LD3Rv._POST (va_vb_vc xn esize dsize off) + (LD.Rv._POST va_vb_vc xn esize dsize 3 off)) + +(defun LD3Rv8b_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 8 64 xm)) +(defun LD3Rv16b_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 8 128 xm)) +(defun LD3Rv4h_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 16 64 xm)) +(defun LD3Rv8h_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 16 128 xm)) +(defun LD3Rv2s_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 32 64 xm)) +(defun LD3Rv4s_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 32 128 xm)) +(defun LD3Rv1d_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 64 64 xm)) +(defun LD3Rv2d_POST (_ va_vb_vc xn xm) (LD3Rv._POST va_vb_vc xn 64 128 xm)) + +;; LD4R (no offset) + +(defmacro LD4Rv. (va_vb_vc_vd xn esize dsize) + (LD.Rv. va_vb_vc_vd xn esize dsize 4)) + +(defun LD4Rv8b (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 8 64)) +(defun LD4Rv16b (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 8 128)) +(defun LD4Rv4h (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 16 64)) +(defun LD4Rv8h (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 16 128)) +(defun LD4Rv2s (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 32 64)) +(defun LD4Rv4s (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 32 128)) +(defun LD4Rv1d (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 64 64)) +(defun LD4Rv2d (va_vb_vc_vd xn) (LD4Rv. va_vb_vc_vd xn 64 128)) + +;; LD4R (post index) + +(defmacro LD4Rv._POST (va_vb_vc_vd xn esize dsize off) + (LD.Rv._POST va_vb_vc_vd xn esize dsize 4 off)) + +(defun LD4Rv8b_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 8 64 xm)) +(defun LD4Rv16b_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 8 128 xm)) +(defun LD4Rv4h_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 16 64 xm)) +(defun LD4Rv8h_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 16 128 xm)) +(defun LD4Rv2s_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 32 64 xm)) +(defun LD4Rv4s_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 32 128 xm)) +(defun LD4Rv1d_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 64 64 xm)) +(defun LD4Rv2d_POST (_ va_vb_vc_vd xn xm) (LD4Rv._POST va_vb_vc_vd xn 64 128 xm)) + +;; LDNP + +(defmacro LDNP.i (vn vm base imm size scale) + "(LDNP.i vn vm base imm) loads a pair of SIMD registers from memory at + at address base with optional offset imm and stores them in vn and vm. + Issues a non-temporal hint, in the form of an intrinsic for each memory + access." + (let ((off (lshift (cast-signed 64 imm) scale)) + (dbytes (/ size 8))) + (intrinsic 'non-temporal-hint (+ base off)) + (set$ vn (mem-read (+ base off) dbytes)) + (intrinsic 'non-temporal-hint (+ base off dbytes)) + (set$ vm (mem-read (+ base off dbytes) dbytes)))) + +(defun LDNPSi (sn sm base imm) (LDNP.i sn sm base imm 32 2)) +(defun LDNPDi (dn dm base imm) (LDNP.i dn dm base imm 64 3)) +(defun LDNPQi (qn qm base imm) (LDNP.i qn qm base imm 128 4)) + +;; LDP (pre-index) + +(defmacro LDP.pre (vn vm base imm size scale) + "(LDP.i qn qm imm size mem-load scale) loads a pair of SIMD&FP registers from + memory using the address base and an optional signed immediate offset." + (let ((off (lshift (cast-signed 64 imm) scale)) + (dbytes (/ size 8)) + (addr (+ base off))) + (set$ vn (mem-read addr (/ size 8))) + (set$ vm (mem-read (+ addr dbytes) (/ size 8))) + (set$ base addr))) + +(defun LDPQpre (_ qn qm base imm) (LDP.pre qn qm base imm 128 4)) +(defun LDPDpre (_ qn qm base imm) (LDP.pre qn qm base imm 64 3)) +(defun LDPSpre (_ qn qm base imm) (LDP.pre qn qm base imm 32 2)) + +;; LDP (post-index) + +(defmacro LDP.post (vn vm base imm size scale) + "(LDP.i qn qm imm size mem-load scale) loads a pair of SIMD&FP registers from + memory using the address base and an optional signed immediate offset." + (let ((off (lshift (cast-signed 64 imm) scale)) + (dbytes (/ size 8))) + (set$ vn (mem-read base (/ size 8))) + (set$ vm (mem-read (+ base dbytes) (/ size 8))) + (set$ base (+ base off)))) + +(defun LDPQpost (_ qn qm base imm) (LDP.post qn qm base imm 128 4)) +(defun LDPDpost (_ qn qm base imm) (LDP.post qn qm base imm 64 3)) +(defun LDPSpost (_ qn qm base imm) (LDP.post qn qm base imm 32 2)) + +;; LDP (signed offset) + +(defmacro LDP.i (vn vm base imm size scale) + "(LDP.i qn qm imm size mem-load scale) loads a pair of SIMD&FP registers from + memory using the address base and an optional signed immediate offset." + (let ((off (lshift (cast-signed 64 imm) scale)) + (dbytes (/ size 8))) + (set$ vn (mem-read (+ base off) dbytes)) + (set$ vm (mem-read (+ base off dbytes) dbytes)))) + +(defun LDPQi (qn qm base imm) (LDP.i qn qm base imm 128 4)) +(defun LDPDi (qn qm base imm) (LDP.i qn qm base imm 64 3)) +(defun LDPSi (qn qm base imm) (LDP.i qn qm base imm 32 2)) + +;; LDR (immediate, post-index) + +(defmacro LDR.post (vt base off size) + "(LDR.post vt base imm mem-load scale) loads an element from memory from + the post-index base address and unsigned immediate offset off and stores the result + in vt." + (prog + (set$ vt (mem-read base (/ size 8))) + (set$ base (+ base off)))) + +(defun LDRBpost (_ bt base imm) (LDR.post bt base imm 8)) +(defun LDRHpost (_ ht base imm) (LDR.post ht base imm 16)) +(defun LDRSpost (_ st base imm) (LDR.post st base imm 32)) +(defun LDRDpost (_ dt base imm) (LDR.post dt base imm 64)) +(defun LDRQpost (_ qt base imm) (LDR.post qt base imm 128)) + +;; LDR (immediate, pre-index) + +(defmacro LDR.pre (vt base off size) + "(LDR.ui vt base imm mem-load scale) loads an element from memory from + the pre-index base address and unsigned immediate offset off and stores the result + in vt." + (let ((addr (+ base off))) + (set$ vt (mem-read addr (/ size 8))) + (set$ base addr))) + +(defun LDRBpre (_ bt base imm) (LDR.pre bt base imm 8)) +(defun LDRHpre (_ ht base imm) (LDR.pre ht base imm 16)) +(defun LDRSpre (_ st base imm) (LDR.pre st base imm 32)) +(defun LDRDpre (_ dt base imm) (LDR.pre dt base imm 64)) +(defun LDRQpre (_ qt base imm) (LDR.pre qt base imm 128)) + +;; LDR (immediate, unsigned offset) + +(defmacro LDR.ui (vt base imm size scale) + "(LDR.ui vt base imm mem-load scale) loads an element from memory from + the base address and unsigned immediate offset imm and stores the result + in vt." + (let ((off (lshift (cast-unsigned 64 imm) scale))) + (set$ vt (mem-read (+ base off) (/ size 8))))) + +(defun LDRBui (bt base imm) (LDR.ui bt base imm 8 0)) +(defun LDRHui (ht base imm) (LDR.ui ht base imm 16 1)) +(defun LDRSui (st base imm) (LDR.ui st base imm 32 2)) +(defun LDRDui (dt base imm) (LDR.ui dt base imm 64 3)) +(defun LDRQui (qt base imm) (LDR.ui qt base imm 128 4)) + +;; LDR (literal) + +(defmacro LDR.l (vn label bytes) + "(LDR.l vn label bytes) loads a register from memory at an address + relative to the program counter and a program label." + (let ((off (cast-signed 64 (lshift label 2)))) + (set$ vn (mem-read (+ off (get-program-counter)) bytes)))) + +(defun LDRSl (sn label) (LDR.l sn label 4)) +(defun LDRDl (dn label) (LDR.l dn label 8)) +(defun LDRQl (qn label) (LDR.l qn label 16)) + +;; LDR (register) + +(defmacro LDR.ro. (vt base index signed s scale size) + "(LDR.ro. vt base index signed s scale mem-load) loads a SIMD&FP register + from address base and an optionally shifted and extended index." + (let ((shift (if (= s 1) + (+ scale 0) + (+ 0 0))) + (off (if (= signed 1) + (cast-signed 64 (lshift index shift)) + (cast-unsigned 64 (lshift index shift))))) + (set$ vt (mem-read (+ base off) (/ size 8))))) + +(defun LDRBroX (bt base index signed s) (LDR.ro. bt base index signed s 0 8)) +(defun LDRHroX (ht base index signed s) (LDR.ro. ht base index signed s 1 16)) +(defun LDRSroX (st base index signed s) (LDR.ro. st base index signed s 2 32)) +(defun LDRDroX (dt base index signed s) (LDR.ro. dt base index signed s 3 64)) +(defun LDRQroX (qt base index signed s) (LDR.ro. qt base index signed s 4 128)) + +(defun LDRBroW (bt base index signed s) (LDR.ro. bt base index signed s 0 8)) +(defun LDRHroW (ht base index signed s) (LDR.ro. ht base index signed s 1 16)) +(defun LDRSroW (st base index signed s) (LDR.ro. st base index signed s 2 32)) +(defun LDRDroW (dt base index signed s) (LDR.ro. dt base index signed s 3 64)) +(defun LDRQroW (qt base index signed s) (LDR.ro. qt base index signed s 4 128)) + +;; LDUR + +(defmacro LDUR.i (vt base simm size) + "(LDUR.i vt base simm mem-load) loads a SIMD&FP register from memory at + the address calculated from a base register and optional immediate offset." + (set$ vt (mem-read (+ base simm) (/ size 8)))) + +(defun LDURBi (bt base simm) (LDUR.i bt base simm 8)) +(defun LDURHi (ht base simm) (LDUR.i ht base simm 16)) +(defun LDURSi (st base simm) (LDUR.i st base simm 32)) +(defun LDURDi (dt base simm) (LDUR.i dt base simm 64)) +(defun LDURQi (qt base simm) (LDUR.i qt base simm 128)) + diff --git a/plugins/arm/semantics/aarch64-simd-logical.lisp b/plugins/arm/semantics/aarch64-simd-logical.lisp new file mode 100644 index 000000000..b1905ec0a --- /dev/null +++ b/plugins/arm/semantics/aarch64-simd-logical.lisp @@ -0,0 +1,25 @@ +(declare (context (target armv8-a+le))) + +(in-package aarch64) + +;;; LOGICAL + +(defun ANDv8i8 (vd vn vm) (set$ vd (logand vn vm))) +(defun ANDv16i8 (vd vn vm) (set$ vd (logand vn vm))) + +;; the ISA expresses (logxor vn vm) as +;; (logxor vm (logand (logor (zeros (word-width vn)) vn) (ones (word-width vn)))) +;; I've simplified it to just this. +(defun EORv8i8 (vd vn vm) (set$ vd (logxor vn vm))) +(defun EORv16i8 (vd vn vm) (set$ vd (logxor vn vm))) + +;; the ISA says NOT acts element-wise, but this is +;; equivalent to just (lnot vn). Not sure why it does this. +(defun NOTv8i8 (vd vn) (set$ vd (lnot vn))) +(defun NOTv16i8 (vd vn) (set$ vd (lnot vn))) + +(defun ORRv8i8 (vd vn vm) (set$ vd (logor vn vm))) +(defun ORRv16i8 (vd vn vm) (set$ vd (logor vn vm))) + +(defun ORNv8i8 (vd vn vm) (set$ vd (logor vn (lnot vm)))) +(defun ORNv16i8 (vd vn vm) (set$ vd (logor vn (lnot vm)))) diff --git a/plugins/arm/semantics/aarch64-simd-mov-ins-ext.lisp b/plugins/arm/semantics/aarch64-simd-mov-ins-ext.lisp new file mode 100644 index 000000000..8d27622d1 --- /dev/null +++ b/plugins/arm/semantics/aarch64-simd-mov-ins-ext.lisp @@ -0,0 +1,56 @@ +(declare (context (target armv8-a+le))) + +(in-package aarch64) + +;;; INS + +(defun INSvi32gpr (vd _ index gpr) + "(INSvi32gpr vd ts index gpr) inserts an element in the general purpose register gpr + into vecter register vd at index. + NOTE: does not encode Security state & Exception level" + (insert-element-into-vector vd index gpr 32)) + +(defun INSvi32lane (vd _ index vn index2) + "NOTE: does not encode Security state & Exception level" + (let ((element (get-vector-S-element index2 vn))) + (insert-element-into-vector vd index element 32))) + + +(defun MOVI* (datasize channelsize vd val shift) + "Sets every channel of vd to have value. the size of val should be equal to + the channel width." + (let ((val (cast-low channelsize (lshift val shift))) + (result (replicate-to-fill val datasize))) + (set$ vd result))) + +(defun MOVIv8b_ns (vd imm) + (MOVI* 64 8 vd imm 0)) + +(defun MOVIv16b_ns (vd imm) + (MOVI* 128 8 vd imm 0)) + +(defun MOVIv4i16 (vd imm shift) + (MOVI* 64 16 vd imm shift)) + +(defun MOVIv8i16 (vd imm shift) + (MOVI* 128 16 vd imm shift)) + +(defun MOVIv2i32 (vd imm shift) + (MOVI* 64 32 vd imm shift)) + +(defun MOVIv4i32 (vd imm shift) + (MOVI* 128 32 vd imm shift)) + +; EXT + +(defmacro EXTv* (datasize vd vn vm pos) + "Extracts a vector from a pair of vectors. pos is the bit offset that will + become the least significant bit of vd." + (let ((pos (lshift pos 3))) + (set$ vd (extract (+ pos (- datasize 1)) pos (concat vm vn))))) + +(defun EXTv16i8 (vd vn vm pos) + (EXTv* 128 vd vn vm pos)) + +(defun EXTv8i8 (vd vn vm pos) + (EXTv* 64 vd vn vm pos)) diff --git a/plugins/arm/semantics/aarch64-simd-store.lisp b/plugins/arm/semantics/aarch64-simd-store.lisp new file mode 100644 index 000000000..0396dd291 --- /dev/null +++ b/plugins/arm/semantics/aarch64-simd-store.lisp @@ -0,0 +1,92 @@ +(declare (context (target armv8-a+le))) + +(in-package aarch64) + +;;; STR + +(defun STR.ro* (scale rt rn rm signed shift) + "stores rt to (rn + rm << (shift * scale)) with signed or unsigned extension + of rm, where rt is a register of size (8 << scale). Note that rm can be an X + or W register and it chooses the appropriate extend mode implicitly. rn must + be an X register." + (assert (< signed 2)) + (assert-msg (= (word-width rt) (lshift 8 scale)) + "STR.ro*: scale must match size of rt") + (store-word + (+ rn + (if (= signed 1) + (signed-extend (word-width rm) (lshift rm (* shift scale))) + (unsigned-extend (word-width rm) (lshift rm (* shift scale))))) + rt)) + +;; no differences in X or W address variants +(defun STRBroX (rt rn rm option shift) (STR.ro* 0 rt rn rm option shift)) +(defun STRBroW (rt rn rm option shift) (STR.ro* 0 rt rn rm option shift)) +(defun STRHroX (rt rn rm option shift) (STR.ro* 1 (cast-low 16 rt) rn rm option shift)) +(defun STRHroW (rt rn rm option shift) (STR.ro* 1 (cast-low 16 rt) rn rm option shift)) +(defun STRSroX (rt rn rm option shift) (STR.ro* 2 rt rn rm option shift)) +(defun STRSroW (rt rn rm option shift) (STR.ro* 2 rt rn rm option shift)) +(defun STRDroX (rt rn rm option shift) (STR.ro* 3 rt rn rm option shift)) +(defun STRDroW (rt rn rm option shift) (STR.ro* 3 rt rn rm option shift)) +(defun STRQroX (rt rn rm option shift) (STR.ro* 4 rt rn rm option shift)) +(defun STRQroW (rt rn rm option shift) (STR.ro* 4 rt rn rm option shift)) + +(defun STR.post (xreg src off) + "stores all of src to xreg, and post-indexes reg (reg += off)." + (store-word xreg src) + (set$ xreg (+ xreg off))) + +(defun STRQpost (_ rt rn simm) (STR.post rn rt simm)) +(defun STRDpost (_ rt rn simm) (STR.post rn rt simm)) +(defun STRSpost (_ rt rn simm) (STR.post rn (cast-low 32 rt) simm)) +(defun STRHpost (_ rt rn simm) (STR.post rn (cast-low 16 rt) simm)) +(defun STRBpost (_ rt rn simm) (STR.post rn (cast-low 8 rt) simm)) + +(defun STR.pre (xreg src off) + "stores all of src to xreg, and pre-indexes reg (reg += off)." + (store-word (+ xreg off) src) + (set$ xreg (+ xreg off))) + +(defun STRQpre (_ rt rn simm) (STR.pre rn rt simm)) +(defun STRDpre (_ rt rn simm) (STR.pre rn rt simm)) +(defun STRSpre (_ rt rn simm) (STR.pre rn (cast-low 32 rt) simm)) +(defun STRHpre (_ rt rn simm) (STR.pre rn (cast-low 16 rt) simm)) +(defun STRBpre (_ rt rn simm) (STR.pre rn (cast-low 8 rt) simm)) + +(defun STR.ui (scale src reg off) + "Stores a register of size (8 << scale) to the memory address + (reg + (off << scale))." + (assert-msg (= (word-width src) (lshift 8 scale)) + "STR.ui: scale must match size of register") + (store-word (+ reg (lshift off scale)) + (cast-unsigned (lshift 8 scale) src))) + +(defun STRQui (src reg off) (STR*ui 4 src reg off)) +(defun STRDui (src reg off) (STR*ui 3 src reg off)) +(defun STRSui (src reg off) (STR*ui 2 src reg off)) +(defun STRHui (src reg off) (STR*ui 1 src reg off)) +(defun STRBui (src reg off) (STR*ui 0 src reg off)) + +;;; STP + +;; these use store-pair from aarch64-helper.lisp + +(defun STPQpost (_ t1 t2 dst off) (store-pair 4 'post t1 t2 dst off)) +(defun STPDpost (_ t1 t2 dst off) (store-pair 3 'post t1 t2 dst off)) +(defun STPSpost (_ t1 t2 dst off) (store-pair 2 'post t1 t2 dst off)) + +(defun STPQpre (_ t1 t2 dst off) (store-pair 4 'pre t1 t2 dst off)) +(defun STPDpre (_ t1 t2 dst off) (store-pair 3 'pre t1 t2 dst off)) +(defun STPSpre (_ t1 t2 dst off) (store-pair 2 'pre t1 t2 dst off)) + +(defun STPQi (rt rt2 base imm) (store-pair 4 'offset rt rt2 base imm)) +(defun STPDi (rt rt2 base imm) (store-pair 3 'offset rt rt2 base imm)) +(defun STPSi (rt rt2 base imm) (store-pair 2 'offset rt rt2 base imm)) + +;;; STUR + +(defun STURQi (rn rt imm) (store-word (+ rt imm) rn)) +(defun STURDi (rn rt imm) (store-word (+ rt imm) rn)) +(defun STURSi (rn rt imm) (store-word (+ rt imm) rn)) +(defun STURHi (rn rt imm) (store-word (+ rt imm) rn)) +(defun STURBi (rn rt imm) (store-word (+ rt imm) rn)) \ No newline at end of file diff --git a/plugins/arm/semantics/aarch64-special.lisp b/plugins/arm/semantics/aarch64-special.lisp index 7561975d0..a9736b04e 100644 --- a/plugins/arm/semantics/aarch64-special.lisp +++ b/plugins/arm/semantics/aarch64-special.lisp @@ -8,7 +8,8 @@ (defun make-barrier (barrier-type option) (intrinsic (symbol-concat 'barrier barrier-type - (barrier-option-to-symbol option)))) + (barrier-option-to-symbol option) + :sep '_))) (defun DMB (option) (make-barrier 'dmb option)) @@ -24,3 +25,6 @@ (defun UDF (exn) (intrinsic 'undefined-instruction)) + +(defun BRK (option) + (intrinsic 'software-breakpoint option)) diff --git a/plugins/arm/semantics/aarch64.lisp b/plugins/arm/semantics/aarch64.lisp index 0820d0771..fe5e3b1ce 100644 --- a/plugins/arm/semantics/aarch64.lisp +++ b/plugins/arm/semantics/aarch64.lisp @@ -16,6 +16,13 @@ (require aarch64-arithmetic) (require aarch64-atomic) (require aarch64-branch) -(require aarch64-logical) (require aarch64-data-movement) +(require aarch64-logical) +(require aarch64-pstate) (require aarch64-special) + +(require aarch64-simd-arithmetic) +(require aarch64-simd-load) +(require aarch64-simd-logical) +(require aarch64-simd-mov-ins-ext) +(require aarch64-simd-store) diff --git a/plugins/arm/semantics/arm-bits.lisp b/plugins/arm/semantics/arm-bits.lisp index c25a16947..4418327e7 100644 --- a/plugins/arm/semantics/arm-bits.lisp +++ b/plugins/arm/semantics/arm-bits.lisp @@ -24,12 +24,20 @@ (set CF (select 1 nzcv)) (set VF (select 0 nzcv))) -(defun add-with-carry (rd x y c) +(defun set-nzcv-after-logic-op (result) + "sets the flags after an AND operation i.e. sets the carry and overflow flags to zero and the negative and zero flags based on the result" + (set NF (msb result)) + (set ZF (is-zero result)) + (set CF 0) + (set VF 0)) + + +(defmacro add-with-carry (set rd x y c) "(add-with-carry rd x y c) sets rd to the result of adding x and y with carry bit c, and sets processor flags." (let ((r (+ c y x))) (set-nzcv-from-registers r x y) - (set$ rd r))) + (set rd r))) (defun add-with-carry/clear-base (rd x y c) "(add-with-carry/clear-base rd x y c) sets rd to the result of adding x and y diff --git a/plugins/arm/semantics/arm.lisp b/plugins/arm/semantics/arm.lisp index 9b993187d..240cef226 100644 --- a/plugins/arm/semantics/arm.lisp +++ b/plugins/arm/semantics/arm.lisp @@ -7,3 +7,9 @@ (defun CLZ (rd rn pre _) (when (condition-holds pre) (set$ rd (clz rn)))) + +(defun CLZWr (rd rn) + (setw rd (clz rn))) + +(defun CLZXr (rd rn) + (set$ rd (clz rn))) \ No newline at end of file diff --git a/plugins/primus_lisp/primus_lisp_semantic_primitives.ml b/plugins/primus_lisp/primus_lisp_semantic_primitives.ml index 0bf5800f7..0342db6ea 100644 --- a/plugins/primus_lisp/primus_lisp_semantic_primitives.ml +++ b/plugins/primus_lisp/primus_lisp_semantic_primitives.ml @@ -276,6 +276,12 @@ let export = Primus.Lisp.Type.Spec.[ "(alias-base-register x) if X has a symbolic value that is an aliased register returns the base register"; + "nth-reg-in-group", tuple [sym; int] @-> int, + "(nth-reg-in-group reg-group n) returns the nth register in the + symbolic register group reg-group. For example, + (nth-reg-in-group 'X0_X1 1) returns X1, + (nth-reg-in-group 'Q0_Q1_Q2 0) returns Q0."; + "cast-low", tuple [int; a] @-> b, "(cast-low S X) extracts low S bits from X."; @@ -752,6 +758,26 @@ module Primitives(CT : Theory.Core)(T : Target) = struct CT.var reg >>| fun v -> KB.Value.put Primus.Lisp.Semantics.symbol v (Some name) + let nth_reg_in_group target args = + binary args @@ fun sym n -> + to_int n >>= fun n -> + match n with + | None -> illformed "index must be statically known" + | Some n -> + match symbol sym with + | None -> illformed "sym must be symbol" + | Some sym -> + let components = String.split sym ~on:'_' in + match List.nth components n with + | None -> illformed "symbol does not have component at index %d" n + | Some name -> + match Theory.Target.var target name with + | None -> illformed "%s is not a register" name + | Some var -> + forget @@ + CT.var var >>| fun v -> + KB.Value.put Primus.Lisp.Semantics.symbol v (Some name) + module Intrinsic = struct type param = | Inputs @@ -1320,6 +1346,7 @@ module Primitives(CT : Theory.Core)(T : Target) = struct | "symbol",[x] -> pure@@symbol s x | "is-symbol", [x] -> pure@@is_symbol x | "alias-base-register", [x] -> pure@@alias_base_register t x + | "nth-reg-in-group",_ -> pure@@nth_reg_in_group t args | "cast-low",xs -> pure@@low xs | "cast-high",xs -> pure@@high xs | "cast-signed",xs -> pure@@signed xs diff --git a/plugins/primus_lisp/semantics/bits.lisp b/plugins/primus_lisp/semantics/bits.lisp index d0b9514f6..eda8a0135 100644 --- a/plugins/primus_lisp/semantics/bits.lisp +++ b/plugins/primus_lisp/semantics/bits.lisp @@ -66,6 +66,56 @@ (lshift bitv m) (rshift bitv (- bitv-length m))))))) +(defun extract-elem (x e esize off) + "(extract-elem x e esize off) extracts the e-th bit range + of size esize of x, after adding the bit offset off." + (extract + (+ off (-1 (* esize (+1 e)))) + (+ off (* esize e)) + x)) + +(defun extract-elem (x e esize) + "(extract-elem x e esize) extracts the e-th bit range + of size esize of x." + (extract-elem x e esize 0)) + +(defun reverse-in-containers (csize esize x) + "(reverse-in-containers csize esize x) returns the result + of reversing the order of elements of elem-size bits + within each container of container-size bits. + It returns this as a concatenation of extracts of x." + (assert (= 0 (mod csize esize))) + (assert (= 0 (mod (word-width x) csize))) + (reverse-in-containers/helper csize esize x 0)) + +(defun reverse-in-containers/helper-elem (csize esize x off e) + "Returns the result of reversing the elements in one container." + (declare (visibility :private)) + (if (= e (-1 (/ csize esize))) + (extract-elem x e esize off) + (concat + (extract-elem x e esize off) + (reverse-in-containers/helper-elem csize esize x off (+1 e))))) + +(defun reverse-in-containers/helper (csize esize x c) + "Maps reverse-in-containers/helper-elem over all containers + and concatenates the results." + (declare (visibility :private)) + (if (= c (-1 (/ (word-width x) csize))) + (reverse-in-containers/helper-elem csize esize x (* csize c) 0) + (concat + (reverse-in-containers/helper csize esize x (+1 c)) + (reverse-in-containers/helper-elem csize esize x (* csize c) 0)))) + +(defun reverse-bits (x) + "(reverse-bits x) returns a bitvector with the bit order of x reversed." + (reverse-bits/helper x (-1 (word-width x)))) + +(defun reverse-bits/helper (x i) + (if (> i 0) + (concat (reverse-bits/helper x (-1 i)) (select i x)) + (select i x))) + (defun clz (x) "(clz X) counts leading zeros in X. The returned value is the number of consecutive zeros starting