Skip to content

Commit

Permalink
[SOL] Reworked the SBF textual assembly syntax to match the rbpf-styl…
Browse files Browse the repository at this point in the history
…e syntax. (#54)

- Update the syntax of every instruction in SBFInstrInfo.td (currently
  using the asm variants feature to temporarily support both).
- Update AsmParser for the new syntax (add new operand, memory,
  instruction, and directive parse routines).
- Add error checking for unresolved 16-bit branch relocations and
  emit error message for graceful exit (the old BPF back-end crashes)
  and corresponding lit unit test.
- Add new lit unit tests in MC/SBF and MC/Disassembler/SBF to cover
  disassembly, object emission, and parsing of every single instruction.
  This is more extensive coverage than existed previously.
- Remaster all CodeGen/SBF unit tests accordingly.
- A minor TableGen patch was needed to support asm strings containing
  '|' within variant strings ('|' happens to be the variant separator).

The patch is a bit more complex than it otherwise might be in that we
currently support both syntaxes to ease the verification (e.g., being
able to see and compare each instruction and object code side-by-side
within each unit test). After some 'soak time' for the new functionality,
I intend to remove the old syntax altogether and otherwise clean-up. We'll
also remove the TableGen patch at that time.
  • Loading branch information
nvjle authored and LucasSte committed Feb 16, 2024
1 parent 6d64c8e commit c3944e1
Show file tree
Hide file tree
Showing 103 changed files with 2,873 additions and 753 deletions.
163 changes: 152 additions & 11 deletions llvm/lib/Target/SBF/AsmParser/SBFAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/SBFMCTargetDesc.h"
#include "MCTargetDesc/SBFInstPrinter.h"
#include "TargetInfo/SBFTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
Expand All @@ -31,6 +32,10 @@ class SBFAsmParser : public MCTargetAsmParser {

SMLoc getLoc() const { return getParser().getTok().getLoc(); }

bool isNewSyntax() {
return getParser().getAssemblerDialect() == 0;
}

bool PreMatchCheck(OperandVector &Operands);

bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
Expand All @@ -45,21 +50,26 @@ class SBFAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;

bool parseOldInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands);

bool ParseDirective(AsmToken DirectiveID) override;

// "=" is used as assignment operator for assembly statment, so can't be used
// for symbol assignment.
bool equalIsAsmAssignment() override { return false; }
// "=" is used as assignment operator for assembly statement, so can't be used
// for symbol assignment (old syntax only).
bool equalIsAsmAssignment() override { return isNewSyntax(); }
// "*" is used for dereferencing memory that it will be the start of
// statement.
bool starIsStartOfStatement() override { return true; }
// statement (old syntax only).
bool starIsStartOfStatement() override { return !isNewSyntax(); }

#define GET_ASSEMBLER_HEADER
#include "SBFGenAsmMatcher.inc"

bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
OperandMatchResultTy parseImmediate(OperandVector &Operands);
OperandMatchResultTy parseRegister(OperandVector &Operands);
OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
OperandMatchResultTy parseMemOperand(OperandVector &Operands);

public:
enum SBFMatchResultTy {
Expand Down Expand Up @@ -161,13 +171,20 @@ struct SBFOperand : public MCParsedAsmOperand {
}

void print(raw_ostream &OS) const override {
auto RegName = [](unsigned Reg) {
if (Reg)
return SBFInstPrinter::getRegisterName(Reg);
else
return "noreg";
};

switch (Kind) {
case Immediate:
OS << *getImm();
break;
case Register:
OS << "<register x";
OS << getReg() << ">";
OS << "<register ";
OS << RegName(getReg()) << ">";
break;
case Token:
OS << "'" << getToken() << "'";
Expand Down Expand Up @@ -263,6 +280,10 @@ struct SBFOperand : public MCParsedAsmOperand {

bool SBFAsmParser::PreMatchCheck(OperandVector &Operands) {

// These checks not needed for the new syntax.
if (isNewSyntax())
return false;

if (Operands.size() == 4) {
// check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
// reg1 must be the same as reg2
Expand Down Expand Up @@ -293,7 +314,9 @@ bool SBFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (PreMatchCheck(Operands))
return Error(IDLoc, "additional inst constraint not met");

switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
unsigned Dialect = getParser().getAssemblerDialect();
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm,
Dialect)) {
default:
break;
case Match_Success:
Expand Down Expand Up @@ -349,6 +372,9 @@ OperandMatchResultTy SBFAsmParser::tryParseRegister(unsigned &RegNo,

OperandMatchResultTy
SBFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
if (isNewSyntax())
llvm_unreachable("parseOperandAsOperator called for new syntax");

SMLoc S = getLoc();

if (getLexer().getKind() == AsmToken::Identifier) {
Expand Down Expand Up @@ -458,10 +484,108 @@ OperandMatchResultTy SBFAsmParser::parseImmediate(OperandVector &Operands) {
return MatchOperand_Success;
}

/// ParseInstruction - Parse an SBF instruction which is in SBF verifier
/// format.
OperandMatchResultTy SBFAsmParser::parseMemOperand(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LBrac)) {
return MatchOperand_ParseFail;
}

getParser().Lex(); // Eat '['.
Operands.push_back(SBFOperand::createToken("[", getLoc()));

if (parseRegister(Operands) != MatchOperand_Success) {
Error(getLoc(), "expected register");
return MatchOperand_ParseFail;
}

if (parseImmediate(Operands) != MatchOperand_Success) {
Error(getLoc(), "expected immediate offset");
return MatchOperand_ParseFail;
}

if (getLexer().isNot(AsmToken::RBrac)) {
Error(getLoc(), "expected ']'");
return MatchOperand_ParseFail;
}

getParser().Lex(); // Eat ']'.
Operands.push_back(SBFOperand::createToken("]", getLoc()));

return MatchOperand_Success;
}

/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. If operand was parsed, returns false, else
/// true.
bool SBFAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
if (!isNewSyntax())
llvm_unreachable("parseOperand called for old syntax");

// Attempt to parse token as a register.
if (parseRegister(Operands) == MatchOperand_Success)
return false;

// Attempt to parse token as an immediate.
if (parseImmediate(Operands) == MatchOperand_Success) {
return false;
}

// Attempt to parse token sequence as a memory operand ("[reg+/-offset]").
if (parseMemOperand(Operands) == MatchOperand_Success) {
return false;
}

// Finally we have exhausted all options and must declare defeat.
Error(getLoc(), "unknown operand");
return true;
}

/// Parse an SBF instruction.
bool SBFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
if (!isNewSyntax()) {
return parseOldInstruction(Info, Name, NameLoc, Operands);
}

// First operand is token for instruction mnemonic.
Operands.push_back(SBFOperand::createToken(Name, NameLoc));

// If there are no more operands, then finish.
if (getLexer().is(AsmToken::EndOfStatement)) {
getParser().Lex(); // Consume the EndOfStatement.
return false;
}

// Parse first operand.
if (parseOperand(Operands, Name))
return true;

// Parse until end of statement, consuming commas between operands.
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token.
getLexer().Lex();

// Parse next operand.
if (parseOperand(Operands, Name))
return true;
}

if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
getParser().eatToEndOfStatement();
return Error(Loc, "unexpected token");
}

getParser().Lex(); // Consume the EndOfStatement.
return false;
}

/// Parse an SBF instruction which is in SBF verifier format (old syntax).
bool SBFAsmParser::parseOldInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
if (isNewSyntax())
llvm_unreachable("parseOldInstruction called for new syntax");

// The first operand could be either register or actually an operator.
unsigned RegNo = MatchRegisterName(Name);

Expand Down Expand Up @@ -502,7 +626,24 @@ bool SBFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}

bool SBFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
bool SBFAsmParser::ParseDirective(AsmToken DirectiveID) {
// This returns false if this function recognizes the directive
// regardless of whether it is successfully handles or reports an
// error. Otherwise it returns true to give the generic parser a
// chance at recognizing it.
StringRef IDVal = DirectiveID.getString();

if (IDVal == ".syntax_old") {
getParser().setAssemblerDialect(1);
return false;
}
if (IDVal == ".syntax_new") {
getParser().setAssemblerDialect(0);
return false;
}

return true;
}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSBFAsmParser() {
RegisterMCAsmParser<SBFAsmParser> XX(getTheSBFXTarget());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SBF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(LLVM_TARGET_DEFINITIONS SBF.td)

tablegen(LLVM SBFGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM SBFGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SBFGenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
tablegen(LLVM SBFGenCallingConv.inc -gen-callingconv)
tablegen(LLVM SBFGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM SBFGenDisassemblerTables.inc -gen-disassembler)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SBF/MCTargetDesc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ add_llvm_component_library(LLVMSBFDesc
SBFMCTargetDesc.cpp
SBFAsmBackend.cpp
SBFInstPrinter.cpp
SBFMCAsmInfo.cpp
SBFMCCodeEmitter.cpp
SBFELFObjectWriter.cpp

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/SBF/MCTargetDesc/SBFELFObjectWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "MCTargetDesc/SBFMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
Expand Down Expand Up @@ -65,6 +66,10 @@ unsigned SBFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case FK_PCRel_4:
// CALL instruction.
return ELF::R_SBF_64_32;
case FK_PCRel_2:
// Branch instruction.
Ctx.reportError(Fixup.getLoc(), "2-byte relocations not supported");
return ELF::R_SBF_NONE;
case FK_Data_8:
return (isSolana && !relocAbs64) ? ELF::R_SBF_64_64 : ELF::R_SBF_64_ABS64;
case FK_Data_4:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SBF/MCTargetDesc/SBFInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ using namespace llvm;

// Include the auto-generated portion of the assembly writer.
#include "SBFGenAsmWriter.inc"
#include "SBFGenAsmWriter1.inc"

void SBFInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/SBF/MCTargetDesc/SBFInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@ class SBFInstPrinter : public MCInstPrinter {
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};

class MachineInstr;

class SBFLegacyInstPrinter : public SBFInstPrinter {
public:
SBFLegacyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
: SBFInstPrinter(MAI, MII, MRI) {}

void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override {
printInstruction(MI, Address, O);
printAnnotation(O, Annot);
}

// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
};

}

#endif
46 changes: 46 additions & 0 deletions llvm/lib/Target/SBF/MCTargetDesc/SBFMCAsmInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===-- SBFMCAsmInfo.cpp - SBF Asm properties -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the SBFMCAsmInfo properties.
//
//===----------------------------------------------------------------------===//

#include "SBFMCAsmInfo.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;

cl::opt<unsigned> SBFAsmWriterVariant(
"sbf-output-asm-variant", cl::Hidden, cl::init(0),
cl::desc("Choose output assembly variant (0 = sbf[default], 1 = legacy)"));

SBFMCAsmInfo::SBFMCAsmInfo(const Triple &TT, const MCTargetOptions &Options) {
AssemblerDialect = SBFAsmWriterVariant;

PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";

UsesELFSectionDirectiveForBSS = true;
HasSingleParameterDotFile = true;
HasDotTypeDotSizeDirective = true;

SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
MinInstAlignment = 8;

// The default is 4 and it only affects dwarf elf output.
// If not set correctly, the dwarf data will be
// messed up in random places by 4 bytes. .debug_line
// section will be parsable, but with odd offsets and
// line numbers, etc.
CodePointerSize = 8;

UseIntegratedAssembler = false;
}
Loading

0 comments on commit c3944e1

Please sign in to comment.