diff --git a/include/hermes/BCGen/HBC/BytecodeGenerator.h b/include/hermes/BCGen/HBC/BytecodeGenerator.h index 4ced54a4c44..b82fd604553 100644 --- a/include/hermes/BCGen/HBC/BytecodeGenerator.h +++ b/include/hermes/BCGen/HBC/BytecodeGenerator.h @@ -270,6 +270,11 @@ class BytecodeFunctionGenerator : public BytecodeInstructionGenerator { /// This class is used by the hermes backend. /// It wraps all data required to generate the module. class BytecodeModuleGenerator { + public: + using LiteralOffset = std::pair; + using LiteralOffsetMapTy = llvh::DenseMap; + + private: /// Mapping from Function * to a sequential ID. AllocationTable functionIDMap_{}; @@ -277,9 +282,6 @@ class BytecodeModuleGenerator { DenseMap> functionGenerators_{}; - /// Generate literals buffer for object/array. - SerializedLiteralGenerator literalGenerator_; - /// The mapping from strings to ID for strings in the resulting bytecode /// module. StringLiteralTable stringTable_{}; @@ -321,6 +323,11 @@ class BytecodeModuleGenerator { /// They are stored as chars in order to shorten bytecode size std::vector objValBuffer_{}; + /// A map from instruction to literal offset in the corresponding buffers. + /// \c arrayBuffer_, \c objKeyBuffer_, \c objValBuffer_. + /// This map is populated before instruction selection. + LiteralOffsetMapTy literalOffsetMap_{}; + /// Options controlling bytecode generation. BytecodeGenerationOptions options_; @@ -343,8 +350,7 @@ class BytecodeModuleGenerator { /// Constructor which enables optimizations if \p optimizationEnabled is set. BytecodeModuleGenerator( BytecodeGenerationOptions options = BytecodeGenerationOptions::defaults()) - : literalGenerator_(*this, options.optimizationEnabled), - options_(options) {} + : options_(options) {} /// Add a function to functionIDMap_ if not already exist. Returns the ID. unsigned addFunction(Function *F); @@ -390,6 +396,18 @@ class BytecodeModuleGenerator { /// \return the index of the bigint in the table. uint32_t addBigInt(bigint::ParsedBigInt bigint); + /// Set the serialized literal tables that this generator will use. Once set, + /// no further modifications are possible. + /// \param arrayBuffer buffer containing the serialized array literals. + /// \param objBuffer buffer containing the keys of serialized object literals. + /// \param valBuffer buffer containing the values of serialized object + /// literals. + void initializeSerializedLiterals( + std::vector &&arrayBuffer, + std::vector &&keyBuffer, + std::vector &&valBuffer, + LiteralOffsetMapTy &&offsetMap); + /// Adds a compiled regexp to the module table. /// \return the index of the regexp in the table. uint32_t addRegExp(CompiledRegExp *regexp); @@ -415,17 +433,6 @@ class BytecodeModuleGenerator { /// \param stringID the index of the corresponding source in the string table. void addFunctionSource(uint32_t functionID, uint32_t stringID); - /// Returns the starting offset of the elements. - uint32_t addArrayBuffer(ArrayRef elements); - - /// Add to the the object buffer using \keys as the array of keys, and - /// \vals as the array of values. - /// Returns a pair where the first value is the object's offset into the - /// key buffer, and the second value is its offset into the value buffer. - std::pair addObjectBuffer( - ArrayRef keys, - ArrayRef vals); - /// Serializes the array of literals given into a compact char buffer. /// The serialization format can be found in: /// include/hermes/VM/SerializedLiteralParser.h @@ -444,6 +451,17 @@ class BytecodeModuleGenerator { std::vector &buff, bool isKeyBuffer); + /// For a given instruction \p inst that has an associated serialized literal, + /// obtain the offset of the literal in the associated buffer. In case of + /// an object literal, it is a pair of offsets (key and value). In case of + /// array literal, only the first offset is used. + LiteralOffset serializedLiteralOffsetFor(const Instruction *inst) { + assert( + literalOffsetMap_.count(inst) && + "instruction has no serialized literal"); + return literalOffsetMap_[inst]; + } + /// \return a BytecodeModule. std::unique_ptr generate(); }; diff --git a/include/hermes/BCGen/HBC/ConsecutiveStringStorage.h b/include/hermes/BCGen/HBC/ConsecutiveStringStorage.h index cad1c3c2d73..274225b13e5 100644 --- a/include/hermes/BCGen/HBC/ConsecutiveStringStorage.h +++ b/include/hermes/BCGen/HBC/ConsecutiveStringStorage.h @@ -74,14 +74,21 @@ class ConsecutiveStringStorage { /// Construct from a list of unique strings. Note that this is only /// instantiated for a small number of different \p I types. - template - ConsecutiveStringStorage(I begin, I end, bool optimize = false); + /// \param Force8Bit if set to std::true_type, indicates that the input + /// is *not* utf-8 encoded and consists of 8-bit bytes. If set to + /// std::false_type, the input is utf-8 encoded. + template + ConsecutiveStringStorage(I begin, I end, Force8Bit, bool optimize); /// Construct from a list of unique strings. explicit ConsecutiveStringStorage( llvh::ArrayRef strings, bool optimize = false) - : ConsecutiveStringStorage(strings.begin(), strings.end(), optimize) {} + : ConsecutiveStringStorage( + strings.begin(), + strings.end(), + std::false_type{}, + optimize) {} /// Construct from a table and storage. ConsecutiveStringStorage( diff --git a/include/hermes/BCGen/HBC/SerializedLiteralGenerator.h b/include/hermes/BCGen/HBC/SerializedLiteralGenerator.h index b683fd20d66..7b02e122fa8 100644 --- a/include/hermes/BCGen/HBC/SerializedLiteralGenerator.h +++ b/include/hermes/BCGen/HBC/SerializedLiteralGenerator.h @@ -55,12 +55,9 @@ class SerializedLiteralGenerator { private: /// The bytecode module generator. BytecodeModuleGenerator &BMGen_; - /// Whether to perform de-duplication optimization or not. - bool deDuplicate_; public: - SerializedLiteralGenerator(BytecodeModuleGenerator &BMGen, bool deDuplicate) - : BMGen_(BMGen), deDuplicate_(deDuplicate) {} + SerializedLiteralGenerator(BytecodeModuleGenerator &BMGen) : BMGen_(BMGen) {} using TagType = unsigned char; @@ -78,10 +75,10 @@ class SerializedLiteralGenerator { static constexpr unsigned SequenceMax = (1 << 12) - 1; - /// Serialize input \p literals into \p buff. + /// Serialize input \p literals and append into \p buff. /// \p isKeyBuffer: whether this is generating object literal key buffer or /// not. - uint32_t serializeBuffer( + void serializeBuffer( llvh::ArrayRef literals, std::vector &buff, bool isKeyBuffer); diff --git a/lib/BCGen/HBC/BytecodeGenerator.cpp b/lib/BCGen/HBC/BytecodeGenerator.cpp index 40916cb6355..b441adac8ef 100644 --- a/lib/BCGen/HBC/BytecodeGenerator.cpp +++ b/lib/BCGen/HBC/BytecodeGenerator.cpp @@ -80,18 +80,6 @@ void BytecodeFunctionGenerator::setJumpTable( jumpTable_ = std::move(jumpTable); } -uint32_t BytecodeModuleGenerator::addArrayBuffer(ArrayRef elements) { - return literalGenerator_.serializeBuffer(elements, arrayBuffer_, false); -} - -std::pair BytecodeModuleGenerator::addObjectBuffer( - ArrayRef keys, - ArrayRef vals) { - return std::pair{ - literalGenerator_.serializeBuffer(keys, objKeyBuffer_, true), - literalGenerator_.serializeBuffer(vals, objValBuffer_, false)}; -} - std::unique_ptr BytecodeFunctionGenerator::generateBytecodeFunction( Function::DefinitionKind definitionKind, @@ -240,6 +228,20 @@ uint32_t BytecodeModuleGenerator::addBigInt(bigint::ParsedBigInt bigint) { return bigIntTable_.addBigInt(std::move(bigint)); } +void BytecodeModuleGenerator::initializeSerializedLiterals( + std::vector &&arrayBuffer, + std::vector &&keyBuffer, + std::vector &&valBuffer, + hermes::hbc::BytecodeModuleGenerator::LiteralOffsetMapTy &&offsetMap) { + assert( + arrayBuffer_.empty() && objKeyBuffer_.empty() && objValBuffer_.empty() && + literalOffsetMap_.empty() && "serialized literals already initialized"); + arrayBuffer_ = std::move(arrayBuffer); + objKeyBuffer_ = std::move(keyBuffer); + objValBuffer_ = std::move(valBuffer); + literalOffsetMap_ = std::move(offsetMap); +} + uint32_t BytecodeModuleGenerator::addRegExp(CompiledRegExp *regexp) { return regExpTable_.addRegExp(regexp); } diff --git a/lib/BCGen/HBC/ConsecutiveStringStorage.cpp b/lib/BCGen/HBC/ConsecutiveStringStorage.cpp index c599ebf810e..107b568d4b2 100644 --- a/lib/BCGen/HBC/ConsecutiveStringStorage.cpp +++ b/lib/BCGen/HBC/ConsecutiveStringStorage.cpp @@ -571,8 +571,8 @@ class StringTableBuilder { /// and end. Note that we do not always copy the underlying string data so /// the resulting builder must not outlive these strings. In delta /// optimizing mode, only new strings are added here and packed. - template - StringTableBuilder(I begin, I end) { + template + StringTableBuilder(I begin, I end, Force8Bit) { // Generate and store a StringEntry for each string. // Remember the index of each string in our StringEntry, so that we can // later output the table in the correct order. @@ -586,7 +586,7 @@ class StringTableBuilder { static_assert(sizeof(str.data()[0]) == 1, "strings must be UTF8"); const unsigned char *begin = (const unsigned char *)str.data(); const unsigned char *end = begin + str.size(); - if (isAllASCII(begin, end)) { + if (Force8Bit::value || isAllASCII(begin, end)) { ArrayRef astr(begin, end); asciiStrings_.emplace_back(index, astr); } else { @@ -713,14 +713,15 @@ class StringTableBuilder { namespace hermes { namespace hbc { -template +template ConsecutiveStringStorage::ConsecutiveStringStorage( I begin, I end, + Force8Bit, bool optimize) { // Prepare to build our string table. // Generate storage for our ASCII and u16 strings. - StringTableBuilder builder(begin, end); + StringTableBuilder builder(begin, end, Force8Bit{}); std::vector asciiStorage; std::vector u16Storage; builder.packIntoStorage(&asciiStorage, &u16Storage, optimize); @@ -741,16 +742,25 @@ ConsecutiveStringStorage::ConsecutiveStringStorage( template ConsecutiveStringStorage::ConsecutiveStringStorage( StringSetVector::const_iterator begin, StringSetVector::const_iterator end, + std::false_type, bool optimize); template ConsecutiveStringStorage::ConsecutiveStringStorage( StringSetVector::iterator begin, StringSetVector::iterator end, + std::false_type, bool optimize); template ConsecutiveStringStorage::ConsecutiveStringStorage( ArrayRef::const_iterator begin, ArrayRef::const_iterator end, + std::false_type, + bool optimize); + +template ConsecutiveStringStorage::ConsecutiveStringStorage( + StringSetVector::const_iterator begin, + StringSetVector::const_iterator end, + std::true_type, bool optimize); uint32_t ConsecutiveStringStorage::getEntryHash(size_t i) const { diff --git a/lib/BCGen/HBC/HBC.cpp b/lib/BCGen/HBC/HBC.cpp index 0c75f8c26d3..fbf3fd1e8fe 100644 --- a/lib/BCGen/HBC/HBC.cpp +++ b/lib/BCGen/HBC/HBC.cpp @@ -138,7 +138,244 @@ UniquingStringLiteralAccumulator stringAccumulatorFromBCProvider( std::move(css), std::move(isIdentifier)}; } -} // namespace +/// A container that deduplicates byte sequences, while keeping the original +/// insertion order with the duplicates. Note: strings are used as a +/// representation for code reuse and simplicity, but the contents are meant to +/// be interpreted as unsigned bytes. +/// It maintains: +/// - a StringSetVector containing the uniqued strings. +/// - a vector mapping each originally inserted string in order to an index in +/// the StringSetVector. +/// This is a specialized class used only by \c LiteralBufferBuilder. +/// NOTE: We use std::string instead of std::vector for code reuse. +class UniquedStringVector { + public: + /// Append a string. + void push_back(llvh::StringRef str) { + indexInSet_.push_back(set_.insert(str)); + } + + /// \return how many strings the vector contains, in other words, how many + /// times \c push_back() was called. + size_t size() const { + return indexInSet_.size(); + } + + /// \return the begin iterator over the uniqued set of strings in insertion + /// order. + StringSetVector::const_iterator beginSet() const { + return set_.begin(); + } + /// \return the end iterator over the uniqued set of strings in insertion + /// order. + StringSetVector::const_iterator endSet() const { + return set_.end(); + } + + /// \return the index in the uniqued set corresponding to the insertion index + /// \p insertionIndex. + uint32_t indexInSet(size_t insertionIndex) const { + return indexInSet_[insertionIndex]; + } + + private: + /// The uniqued string set in insertion order. + StringSetVector set_{}; + /// Index into the set of each original non-deduplicated string in insertion + /// order. + std::vector indexInSet_{}; +}; + +/// A utility class which collects all serialized literals from a module, +/// optionally deduplicates them, and installs them in the +/// BytecodeModuleGenerator. +class LiteralBufferBuilder { + public: + /// Constructor. + /// \param m the IR module to process. + /// \param shouldVisitFunction a predicate indicating whether a function + /// should be processed or not. (In some cases like segment splitting we + /// want to exclude part of the module.) + /// \param bmGen the BytecodeModuleGenerator to use. + /// \param optimize whether to deduplicate the serialized literals. + LiteralBufferBuilder( + Module *m, + const std::function &shouldVisitFunction, + BytecodeModuleGenerator &bmGen, + bool optimize) + : M_(m), + shouldVisitFunction_(shouldVisitFunction), + bmGen_(bmGen), + optimize_(optimize), + literalGenerator_(bmGen) {} + + /// Do everything: collect the literals, optionally deduplicate them, install + /// them in the BytecodeModuleGenerator. + void generate(); + + private: + /// Traverse the module, skipping functions that should not be visited, + /// and collect all serialized array and object literals and the corresponding + /// instruction. + void traverse(); + + // Serialization handlers for different instructions. + + void serializeLiteralFor(AllocArrayInst *AAI); + void serializeLiteralFor(HBCAllocObjectFromBufferInst *AOFB); + + /// Serialize the the input literals \p elements into the UniquedStringVector + /// \p dest. + /// \p isKeyBuffer: whether this is generating object literal key buffer or + /// not. + void serializeInto( + UniquedStringVector &dest, + llvh::ArrayRef elements, + bool isKeyBuffer); + + private: + /// The IR module to process. + Module *const M_; + /// A predicate indicating whether a function should be processed or not. (In + /// some cases like segment splitting we want to exclude part of the module.) + const std::function &shouldVisitFunction_; + /// The BytecodeModuleGenerator to use. + BytecodeModuleGenerator &bmGen_; + /// Whether to deduplicate the serialized literals. + bool const optimize_; + + /// The stateless generator object. + SerializedLiteralGenerator literalGenerator_; + + /// Temporary buffer to serialize literals into. We keep it around instead + /// of allocating a new one every time. + std::vector tempBuffer_{}; + + /// Each element is a serialized array literal. + UniquedStringVector arrays_{}; + + /// Each element records the instruction whose literal was serialized at the + /// corresponding index in \c arrays_. + std::vector arraysInst_{}; + + /// Each element is the keys portion of a serialized object literal. + UniquedStringVector objKeys_{}; + + /// Each element is the values portion of a serialized object literal. + UniquedStringVector objVals_{}; + + /// Each element records the instruction whose literal was serialized at the + /// corresponding index in \c objKeys_/objVals_. + std::vector objInst_{}; +}; + +void LiteralBufferBuilder::generate() { + traverse(); + + // Construct the serialized storage, optionally optimizing it. + ConsecutiveStringStorage arrayStorage{ + arrays_.beginSet(), arrays_.endSet(), std::true_type{}, optimize_}; + ConsecutiveStringStorage keyStorage{ + objKeys_.beginSet(), objKeys_.endSet(), std::true_type{}, optimize_}; + ConsecutiveStringStorage valStorage{ + objVals_.beginSet(), objVals_.endSet(), std::true_type{}, optimize_}; + + // Populate the offset map. + BytecodeModuleGenerator::LiteralOffsetMapTy literalOffsetMap{}; + + // Visit all array literals. + auto arrayView = const_cast(arrayStorage) + .getStringTableView(); + for (size_t i = 0, e = arraysInst_.size(); i != e; ++i) { + assert( + literalOffsetMap.count(arraysInst_[i]) == 0 && + "instruction literal can't be serialized twice"); + uint32_t arrayIndexInSet = arrays_.indexInSet(i); + literalOffsetMap[arraysInst_[i]] = BytecodeModuleGenerator::LiteralOffset{ + arrayView[arrayIndexInSet].getOffset(), UINT32_MAX}; + } + + // Visit all object literals - they are split in two buffers. + auto keyView = const_cast(keyStorage) + .getStringTableView(); + auto valView = const_cast(valStorage) + .getStringTableView(); + for (size_t i = 0, e = objInst_.size(); i != e; ++i) { + assert( + literalOffsetMap.count(objInst_[i]) == 0 && + "instruction literal can't be serialized twice"); + uint32_t keyIndexInSet = objKeys_.indexInSet(i); + uint32_t valIndexInSet = objVals_.indexInSet(i); + literalOffsetMap[objInst_[i]] = BytecodeModuleGenerator::LiteralOffset{ + keyView[keyIndexInSet].getOffset(), valView[valIndexInSet].getOffset()}; + } + + bmGen_.initializeSerializedLiterals( + arrayStorage.acquireStringStorage(), + keyStorage.acquireStringStorage(), + valStorage.acquireStringStorage(), + std::move(literalOffsetMap)); +} + +void LiteralBufferBuilder::traverse() { + for (auto &F : *M_) { + if (!shouldVisitFunction_(&F)) + continue; + + for (auto &BB : F) { + for (auto &I : BB) { + if (auto *AAI = dyn_cast(&I)) { + serializeLiteralFor(AAI); + } else if (auto *AOFB = dyn_cast(&I)) { + serializeLiteralFor(AOFB); + } + } + } + } +} + +void LiteralBufferBuilder::serializeInto( + UniquedStringVector &dest, + llvh::ArrayRef elements, + bool isKeyBuffer) { + tempBuffer_.clear(); + literalGenerator_.serializeBuffer(elements, tempBuffer_, isKeyBuffer); + dest.push_back( + llvh::StringRef((const char *)tempBuffer_.data(), tempBuffer_.size())); +} + +void LiteralBufferBuilder::serializeLiteralFor(AllocArrayInst *AAI) { + SmallVector elements; + for (unsigned i = 0, e = AAI->getElementCount(); i < e; ++i) { + elements.push_back(cast(AAI->getArrayElement(i))); + } + + serializeInto(arrays_, elements, false); + arraysInst_.push_back(AAI); +} + +void LiteralBufferBuilder::serializeLiteralFor( + HBCAllocObjectFromBufferInst *AOFB) { + unsigned e = AOFB->getKeyValuePairCount(); + if (!e) + return; + + SmallVector objKeys; + SmallVector objVals; + for (unsigned ind = 0; ind != e; ++ind) { + auto keyValuePair = AOFB->getKeyValuePair(ind); + objKeys.push_back(cast(keyValuePair.first)); + objVals.push_back(cast(keyValuePair.second)); + } + + serializeInto(objKeys_, objKeys, true); + serializeInto(objVals_, objVals, false); + assert( + objKeys_.size() == objVals_.size() && + "objKeys_ and objVals_ must be the same size"); + objInst_.push_back(AOFB); +} +}; // namespace std::unique_ptr hbc::generateBytecodeModule( Module *M, @@ -222,6 +459,13 @@ std::unique_ptr hbc::generateBytecodeModule( std::move(strings), options.optimizationEnabled)); } + // Generate the serialized literal buffers. + { + LiteralBufferBuilder litBuilder{ + M, shouldGenerate, BMGen, options.optimizationEnabled}; + litBuilder.generate(); + } + // Add each function to BMGen so that each function has a unique ID. for (auto &F : *M) { if (!shouldGenerate(&F)) { diff --git a/lib/BCGen/HBC/ISel.cpp b/lib/BCGen/HBC/ISel.cpp index 122a75a2b09..425fc682697 100644 --- a/lib/BCGen/HBC/ISel.cpp +++ b/lib/BCGen/HBC/ISel.cpp @@ -783,18 +783,13 @@ void HBCISel::generateAllocArrayInst(AllocArrayInst *Inst, BasicBlock *next) { if (elementCount == 0) { BCFGen_->emitNewArray(dstReg, sizeHint); } else { - SmallVector elements; - for (unsigned i = 0, e = Inst->getElementCount(); i < e; ++i) { - elements.push_back(cast(Inst->getArrayElement(i))); - } - auto bufIndex = - BCFGen_->BMGen_.addArrayBuffer(ArrayRef{elements}); - if (bufIndex <= UINT16_MAX) { + auto bufIndex = BCFGen_->BMGen_.serializedLiteralOffsetFor(Inst); + if (bufIndex.first <= UINT16_MAX) { BCFGen_->emitNewArrayWithBuffer( - encodeValue(Inst), sizeHint, elementCount, bufIndex); + encodeValue(Inst), sizeHint, elementCount, bufIndex.first); } else { BCFGen_->emitNewArrayWithBufferLong( - encodeValue(Inst), sizeHint, elementCount, bufIndex); + encodeValue(Inst), sizeHint, elementCount, bufIndex.first); } } } @@ -843,21 +838,13 @@ void HBCISel::generateHBCAllocObjectFromBufferInst( HBCAllocObjectFromBufferInst *Inst, BasicBlock *next) { auto result = encodeValue(Inst); - int e = Inst->getKeyValuePairCount(); - SmallVector objKeys; - SmallVector objVals; - for (int ind = 0; ind < e; ind++) { - auto keyValuePair = Inst->getKeyValuePair(ind); - objKeys.push_back(cast(keyValuePair.first)); - objVals.push_back(cast(keyValuePair.second)); - } + unsigned e = Inst->getKeyValuePairCount(); // size hint operand of NewObjectWithBuffer opcode is 16-bit. uint32_t sizeHint = std::min((uint32_t)UINT16_MAX, Inst->getSizeHint()->asUInt32()); - auto buffIdxs = BCFGen_->BMGen_.addObjectBuffer( - llvh::ArrayRef{objKeys}, llvh::ArrayRef{objVals}); + auto buffIdxs = BCFGen_->BMGen_.serializedLiteralOffsetFor(Inst); if (buffIdxs.first <= UINT16_MAX && buffIdxs.second <= UINT16_MAX) { BCFGen_->emitNewObjectWithBuffer( result, sizeHint, e, buffIdxs.first, buffIdxs.second); diff --git a/lib/BCGen/HBC/SerializedLiteralGenerator.cpp b/lib/BCGen/HBC/SerializedLiteralGenerator.cpp index cbb5d93b4ea..78a030350d6 100644 --- a/lib/BCGen/HBC/SerializedLiteralGenerator.cpp +++ b/lib/BCGen/HBC/SerializedLiteralGenerator.cpp @@ -43,7 +43,7 @@ void serializeValueToBuffer( } } // namespace -uint32_t SerializedLiteralGenerator::serializeBuffer( +void SerializedLiteralGenerator::serializeBuffer( llvh::ArrayRef literals, std::vector &buff, bool isKeyBuffer) { @@ -57,12 +57,6 @@ uint32_t SerializedLiteralGenerator::serializeBuffer( // they can be added to tempBuff after the tag is finalized std::vector tmpSeqBuffer; - // Store the constructed buffer in a separate vector. - // This vector will be searched for in \buff. If an exact match - // occurs, \buff will not be modified, and the match's offset will be - // returned. - std::vector tempBuff; - // Stores the length of the current type sequence size_t seqLength = 0; @@ -111,9 +105,8 @@ uint32_t SerializedLiteralGenerator::serializeBuffer( if (newTag != lastTag || seqLength == SequenceMax) { if (seqLength > 0) { - appendTagToBuffer(tempBuff, lastTag, seqLength); - tempBuff.insert( - tempBuff.end(), tmpSeqBuffer.begin(), tmpSeqBuffer.end()); + appendTagToBuffer(buff, lastTag, seqLength); + buff.insert(buff.end(), tmpSeqBuffer.begin(), tmpSeqBuffer.end()); tmpSeqBuffer.resize(0); } lastTag = newTag; @@ -159,25 +152,8 @@ uint32_t SerializedLiteralGenerator::serializeBuffer( } } // The last value in the buffer can't get serialized in the loop. - appendTagToBuffer(tempBuff, lastTag, seqLength); - tempBuff.insert(tempBuff.end(), tmpSeqBuffer.begin(), tmpSeqBuffer.end()); - - // If this array buffer has already been added, potentially as a substring of - // another, we can just point there instead. This simple search gives a nice - // little space saving, but at a quadratic cost (fast in practice though). - if (deDuplicate_) { - auto it = - std::search(buff.begin(), buff.end(), tempBuff.begin(), tempBuff.end()); - - if (it != buff.end()) { - return it - buff.begin(); - } - } - - // If it doesn't exist or we don't optimize, append it and return its offset. - uint32_t ret = buff.size(); - buff.insert(buff.end(), tempBuff.begin(), tempBuff.end()); - return ret; + appendTagToBuffer(buff, lastTag, seqLength); + buff.insert(buff.end(), tmpSeqBuffer.begin(), tmpSeqBuffer.end()); } } // namespace hbc diff --git a/lib/BCGen/HBC/UniquingFilenameTable.cpp b/lib/BCGen/HBC/UniquingFilenameTable.cpp index 0805a73ec10..e5d020e9584 100644 --- a/lib/BCGen/HBC/UniquingFilenameTable.cpp +++ b/lib/BCGen/HBC/UniquingFilenameTable.cpp @@ -19,7 +19,8 @@ uint32_t UniquingFilenameTable::addFilename(llvh::StringRef filename) { /* static */ ConsecutiveStringStorage UniquingFilenameTable::toStorage( UniquingFilenameTable table) { auto &filenames = table.filenames_; - return ConsecutiveStringStorage{filenames.begin(), filenames.end()}; + return ConsecutiveStringStorage{ + filenames.begin(), filenames.end(), std::false_type{}, false}; } } // namespace hbc diff --git a/test/hermes/array-literal-large.js b/test/hermes/array-literal-large.js new file mode 100644 index 00000000000..97211ecf80b --- /dev/null +++ b/test/hermes/array-literal-large.js @@ -0,0 +1,50 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + + +// RUN: %hermes %s > %t.js && %hermes -O %t.js -gc-sanitize-handles=0 +// REQUIRES: !slow_debug + +var numElems = 0; +var totalElems = 200; +var nextRecurse = 10; + +function makeArrayStr(maxSz){ + var arrStr = '['; + for (let i = 0; i < maxSz; i++) { + if (numElems > totalElems){ + break; + } + numElems++; + if (numElems % nextRecurse == 0){ + rnd = Math.floor(10 + Math.random() * 10); + arrStr += makeArrayStr(rnd) + ','; + } + arrStr += Math.floor(1 + Math.random() * 200) + ','; + } + arrStr += ']'; + return arrStr; +} + +function makeAssignVar(name){ + numElems = 0; + code = 'var ' + name + ' = ' + code += makeArrayStr(totalElems); + code += ';\n'; + return code +} + +function makeTest(){ + var code = ''; + var N = 4000; + for (var i = 0; i < N; i++){ + code += makeAssignVar('arr' + i); + } + return code; +} + +print(makeTest()) diff --git a/test/hermes/array-literal.js b/test/hermes/array-literal.js new file mode 100644 index 00000000000..d20028d66d9 --- /dev/null +++ b/test/hermes/array-literal.js @@ -0,0 +1,23 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// RUN: %hermes -O -target=HBC %s | %FileCheck --match-full-lines %s +// RUN: %hermes -O -target=HBC -emit-binary -out %t.hbc %s && %hermes %t.hbc | %FileCheck --match-full-lines %s +// Test that literal arrays work. +"use strict"; + +print('Array Literal'); +// CHECK-LABEL: Array Literal +var arr = [3, 5, null, true, false]; +print(arr.__proto__.constructor === Array); +// CHECK-NEXT: true +print(arr.toString()); +// CHECK-NEXT: 3,5,,true,false +print(arr[0]); +// CHECK-NEXT: 3 +print(arr[4]); +// CHECK-NEXT: false diff --git a/unittests/BCGen/HBC.cpp b/unittests/BCGen/HBC.cpp index 1333c787d43..e49104314b5 100644 --- a/unittests/BCGen/HBC.cpp +++ b/unittests/BCGen/HBC.cpp @@ -317,48 +317,101 @@ TEST(HBCBytecodeGen, ExceptionTableTest) { } TEST(HBCBytecodeGen, ArrayBufferTest) { - // Since deserialization of the array buffer now requires a codeblock, - // the only thing that can be checked at BCGen time is that it uses - // the proper number of bytes for the serialization format. - std::string Result; - llvh::raw_string_ostream OS(Result); - - auto Ctx = std::make_shared(); - Module M(Ctx); - IRBuilder Builder(&M); - - BytecodeModuleGenerator BMG; - BMG.initializeStringTable(stringsForTest({"abc"})); - - Function *F = Builder.createTopLevelFunction( - M.getInitialScope()->createInnerScope(), true); - auto BFG = BytecodeFunctionGenerator::create(BMG, 3); - BFG->emitMov(1, 2); - std::vector arr1{ - Builder.getLiteralNumber(1), - Builder.getLiteralBool(true), - Builder.getLiteralBool(false), - Builder.getLiteralNull(), - Builder.getLiteralNull(), - Builder.getLiteralString("abc"), - }; - BMG.addArrayBuffer(llvh::ArrayRef{arr1}); + auto src = R"( +var arr = [1, true, false, null, null, 'abc'] +)"; + auto BM = bytecodeModuleForSource(src); + ASSERT_EQ(BM->getArrayBufferSize(), 10u); +} - BMG.setEntryPointIndex(BMG.addFunction(F)); - BMG.setFunctionGenerator(F, std::move(BFG)); +TEST(HBCBytecodeGen, ObjectBufferTest) { + auto src = R"( +var obj = {a:1, b:2, c:3}; +)"; + auto BM = bytecodeModuleForSource(src); + ASSERT_EQ(BM->getObjectKeyBufferSize(), 4u); + ASSERT_EQ(BM->getObjectValueBufferSize(), 13u); +} - std::shared_ptr BM = BMG.generate(); - // auto &BF = BM->getGlobalCode(); - ASSERT_EQ(BM->getArrayBufferSize(), 10u); +// For the following 'easy' tests, exact duplicate literals are used. These will +// always be de-duplicated, so we don't have to turn optimizations on. +TEST(HBCBytecodeGen, EasyArrayDedupBufferTest) { + auto singleArrCode = R"( +var arr = [1, null, undefined, 9n, 'hi', {}, Symbol('sym')]; +)"; + auto dupArrCode = R"( +var arr1 = [1, null, undefined, 9n, 'hi', {}, Symbol('sym')]; +var arr2 = [1, null, undefined, 9n, 'hi', {}, Symbol('sym')]; +var arr3 = [1, null, undefined, 9n, 'hi', {}, Symbol('sym')]; +var arr4 = [1, null, undefined, 9n, 'hi', {}, Symbol('sym')]; +var s = arr1[0] + arr2[0] + arr3[0] + arr4[0]; +)"; + auto singleArrBM = bytecodeModuleForSource(singleArrCode); + auto manyArrBM = bytecodeModuleForSource(dupArrCode); + // If de-duplication is working correctly, the amount of space that a single + // array literal takes up should be the same amount of space that N identical + // array literals take up. + ASSERT_EQ(singleArrBM->getArrayBufferSize(), manyArrBM->getArrayBufferSize()); +} - BytecodeSerializer BS{OS}; - BS.serialize(*BM, SHA1{}); +TEST(HBCBytecodeGen, EasyObjectDedupBufferTest) { + auto singleObjCode = R"( +var obj = {a:1, b:null, c:undefined, d:9n, e:'hi', f:{}, g:Symbol('sym')}; +)"; + auto dupObjsCode = R"( +var obj1 = {a:1, b:null, c:undefined, d:9n, e:'hi', f:{}, g:Symbol('sym')}; +var obj2 = {a:1, b:null, c:undefined, d:9n, e:'hi', f:{}, g:Symbol('sym')}; +var obj3 = {a:1, b:null, c:undefined, d:9n, e:'hi', f:{}, g:Symbol('sym')}; +var obj4 = {a:1, b:null, c:undefined, d:9n, e:'hi', f:{}, g:Symbol('sym')}; +var s = obj1.a + obj2.a + obj3.a + obj4.a; +)"; + auto singleObjBM = bytecodeModuleForSource(singleObjCode); + auto dedupBM = bytecodeModuleForSource(dupObjsCode); + // If de-duplication is working correctly, the amount of space that a single + // object literal takes up should be the same amount of space that N identical + // object literals take up. + ASSERT_EQ( + singleObjBM->getObjectKeyBufferSize(), dedupBM->getObjectKeyBufferSize()); + ASSERT_EQ( + singleObjBM->getObjectValueBufferSize(), + dedupBM->getObjectValueBufferSize()); +} - auto bytecode = hbc::BCProviderFromBuffer::createBCProviderFromBuffer( - std::make_unique(OS.str())) - .first; +// For the next 'hard' tests, the literals are not exact duplicates. If +// optimizations are off, we will simply give up there and not perform any +// de-duplications. However, if optimizations are on, then we perform a more +// advanced algorithm for de-duplicating, which will result in a smaller buffer +// size. +TEST(HBCBytecodeGen, HardArrayDedupBufferTest) { + auto almostDupCode = R"( +var arr1 = [false, true, null, 'hi', null, true, false, 'bye']; +var arr2 = ['diff', 4, 5, 6, false, true, null, 'hi', null, true, false, 'bye']; +var s = arr1[0] + arr2[1]; +)"; + auto dupBM = bytecodeModuleForSource(almostDupCode); + auto dedupOpts = BytecodeGenerationOptions::defaults(); + dedupOpts.optimizationEnabled = true; + auto dedupBM = bytecodeModuleForSource(almostDupCode, dedupOpts); + // The bytecode module which performed optimizations should have a smaller + // buffer size than the one that didn't. + ASSERT_LT(dedupBM->getArrayBufferSize(), dupBM->getArrayBufferSize()); +} - ASSERT_EQ(bytecode->getArrayBuffer().size(), 10u); +TEST(HBCBytecodeGen, HardObjectDedupBufferTest) { + auto almostDupCode = R"( +var obj1 = {a:10,b:11,c:12,1:null,2:true,3:false}; +var obj2 = {a:10,b:11,c:12}; +var s = obj1.a + obj2.a; +)"; + auto dupBM = bytecodeModuleForSource(almostDupCode); + auto dedupOpts = BytecodeGenerationOptions::defaults(); + dedupOpts.optimizationEnabled = true; + auto dedupBM = bytecodeModuleForSource(almostDupCode, dedupOpts); + // The bytecode module which performed optimizations should have a smaller + // buffer size than the one that didn't. + ASSERT_LT(dedupBM->getObjectKeyBufferSize(), dupBM->getObjectKeyBufferSize()); + ASSERT_LT( + dedupBM->getObjectValueBufferSize(), dupBM->getObjectValueBufferSize()); } TEST(SpillRegisterTest, SpillsParameters) { diff --git a/unittests/VMRuntime/CMakeLists.txt b/unittests/VMRuntime/CMakeLists.txt index 53996d59c57..8ba605d4c2e 100644 --- a/unittests/VMRuntime/CMakeLists.txt +++ b/unittests/VMRuntime/CMakeLists.txt @@ -52,7 +52,6 @@ set(RTSources NativeFunctionTest.cpp NativeStateTest.cpp MarkBitArrayNCTest.cpp - ObjectBufferTest.cpp ObjectModelTest.cpp OperationsTest.cpp PredefinedStringsTest.cpp diff --git a/unittests/VMRuntime/ObjectBufferTest.cpp b/unittests/VMRuntime/ObjectBufferTest.cpp deleted file mode 100644 index 3cc2d49453a..00000000000 --- a/unittests/VMRuntime/ObjectBufferTest.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * - * This source code is licensed under the MIT license found in the - * LICENSE file in the root directory of this source tree. - */ - -#define DEBUG_TYPE "hbc-unittests" - -#include "TestHelpers.h" - -#include "hermes/BCGen/HBC/BytecodeGenerator.h" -#include "hermes/IR/IR.h" -#include "hermes/IR/IRBuilder.h" -#include "hermes/VM/CodeBlock.h" -#include "hermes/VM/Domain.h" -#include "hermes/VM/Operations.h" -#include "hermes/VM/Runtime.h" - -#include "gtest/gtest.h" - -#include "llvh/Support/raw_ostream.h" - -using namespace hermes::vm; -using namespace hermes::hbc; - -namespace { - -using ObjectBufferTest = RuntimeTestFixture; - -TEST_F(ObjectBufferTest, TestNewObjectWithBuffer) { - auto Ctx = std::make_shared(); - hermes::Module M(Ctx); - hermes::IRBuilder Builder(&M); - - /* - ; Store the object outlined below: - ; obj = { - ; 'a': false - ; 'b': true - ; 'c': null - ; 'd': "foo" - ; 'e': {"bar": null} - ; } - ; First five values are added through NewObjectWithBuffer - ; while the sixth value is added through PutNewOwnById. - ; This test makes sure that each value is correctly stored - ; and that two objects can correctly use the same buffers. - */ - - const unsigned FRAME_SIZE = 4; - BytecodeModuleGenerator BMG; - - { // Make BMG aware of the strings it needs. - UniquingStringLiteralAccumulator strings; - - // Name of the top-level function. - strings.addString("global", /* isIdentifier */ false); - - // Strings used in the test. - strings.addString("a", /* isIdentifier */ true); - strings.addString("b", /* isIdentifier */ true); - strings.addString("c", /* isIdentifier */ true); - strings.addString("d", /* isIdentifier */ true); - strings.addString("e", /* isIdentifier */ true); - strings.addString("foo", /* isIdentifier */ false); - strings.addString("bar", /* isIdentifier */ true); - - BMG.initializeStringTable( - UniquingStringLiteralAccumulator::toTable(std::move(strings))); - } - - auto BFG = BytecodeFunctionGenerator::create(BMG, FRAME_SIZE); - - // Need to generate the BytecodeModule twice. - // Runtime needs to agree on string table contents with the bytecode, - // which means the runtime must be initialized with a BytecodeModule. - // First module just inserts the proper string indices into the runtime, - // the second module actually generates instructions. - std::vector objKeys{ - Builder.getLiteralString("a"), - Builder.getLiteralString("b"), - Builder.getLiteralString("c"), - Builder.getLiteralString("d")}; - - std::vector objVals{ - Builder.getLiteralBool(false), - Builder.getLiteralBool(true), - Builder.getLiteralNull(), - Builder.getLiteralString("foo")}; - - std::vector innerObjKeys{Builder.getLiteralString("bar")}; - std::vector innerObjVals{Builder.getLiteralNull()}; - - auto objIdxs = BMG.addObjectBuffer( - llvh::ArrayRef{objKeys}, - llvh::ArrayRef{objVals}); - auto innerObjIdxs = BMG.addObjectBuffer( - llvh::ArrayRef{innerObjKeys}, - llvh::ArrayRef{innerObjVals}); - - auto IDa = BMG.getStringID("a"); - auto IDb = BMG.getStringID("b"); - auto IDc = BMG.getStringID("c"); - auto IDd = BMG.getStringID("d"); - auto IDe = BMG.getStringID("e"); - auto IDfoo = BMG.getStringID("foo"); - auto IDbar = BMG.getStringID("bar"); - - BFG->emitLoadConstInt(3, 1); - BFG->emitNewObjectWithBuffer(0, 5, 4, objIdxs.first, objIdxs.second); - BFG->emitNewObjectWithBuffer( - 1, 1, 1, innerObjIdxs.first, innerObjIdxs.second); - BFG->emitPutNewOwnById(0, 1, IDe); - - BFG->emitGetById(1, 0, 0, IDa); - BFG->emitLoadConstFalse(2); - BFG->emitStrictEq(2, 1, 2); - BFG->emitBitAnd(3, 2, 3); - - BFG->emitGetById(1, 0, 0, IDb); - BFG->emitLoadConstTrue(2); - BFG->emitStrictEq(2, 1, 2); - BFG->emitBitAnd(3, 2, 3); - - BFG->emitGetById(1, 0, 0, IDc); - BFG->emitLoadConstNull(2); - BFG->emitStrictEq(2, 1, 2); - BFG->emitBitAnd(3, 2, 3); - - BFG->emitGetById(1, 0, 0, IDd); - BFG->emitLoadConstString(2, IDfoo); - BFG->emitStrictEq(2, 1, 2); - BFG->emitBitAnd(3, 2, 3); - - BFG->emitGetById(1, 0, 0, IDe); - BFG->emitGetById(1, 1, 0, IDbar); - BFG->emitLoadConstNull(2); - BFG->emitStrictEq(2, 1, 2); - BFG->emitBitAnd(3, 2, 3); - - BFG->emitRet(3); - BFG->setHighestReadCacheIndex(255); - BFG->setHighestWriteCacheIndex(255); - BFG->bytecodeGenerationComplete(); - auto F = Builder.createTopLevelFunction( - M.getInitialScope()->createInnerScope(), true); - BMG.addFunction(F); - BMG.setFunctionGenerator(F, std::move(BFG)); - - auto *runtimeModule = RuntimeModule::createUninitialized(runtime, domain); - - runtimeModule->initializeWithoutCJSModulesMayAllocate( - BCProviderFromSrc::createBCProviderFromSrc(BMG.generate())); - - auto codeBlock = runtimeModule->getCodeBlockMayAllocate(0); - CallResult status{ExecutionStatus::EXCEPTION}; - { - ScopedNativeCallFrame frame{ - runtime, - 0, - HermesValue::encodeUndefinedValue(), - HermesValue::encodeUndefinedValue(), - HermesValue::encodeUndefinedValue()}; - status = runtime.interpretFunction(codeBlock); - } - auto frames = runtime.getStackFrames(); - ASSERT_TRUE(frames.begin() == frames.end()); - ASSERT_EQ( - StackFrameLayout::CalleeExtraRegistersAtStart, runtime.getStackLevel()); - ASSERT_EQ(ExecutionStatus::RETURNED, status.getStatus()); - ASSERT_EQ(1, status.getValue().getDouble()); -} - -} // anonymous namespace -#undef DEBUG_TYPE