From 5e4a4bae6544eda7d6e5be923bd1086921ffcb1b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 18 Sep 2024 15:55:43 -0700 Subject: [PATCH 1/2] [NFC + bugfix] Remove BreakTargetLocation from GUFA (#6956) Before, a br would send its value to a BreakTargetLocation. That would then be linked to the target: { br's value } => BreakTargetLocation(target name) => { location of target } This PR skips the middle: { br's value } => { location of target } It just connects breaks directly to the targets. We can do that if we keep a map of the targets as we go. This is 2% faster as well as simplifies the code, as an NFC refactoring. But it also fixes a bug: we have handling on ExpressionLocation that filters values as they come in (they must accord with the expression's type). We were not doing that on BreakTargetLocation, leading to an assert. Removing BreakTargetLocation entirely is easier and better than adding filtering logic for it. Fixes #6955 --- src/ir/possible-contents.cpp | 63 +++++++++++++++++++----------------- src/ir/possible-contents.h | 23 ------------- 2 files changed, 34 insertions(+), 52 deletions(-) diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index aa656465307..17e40f1d805 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -503,12 +503,30 @@ struct CollectedFuncInfo { std::unordered_map childParents; }; +// Does a walk while maintaining a map of names of branch targets to those +// expressions, so they can be found by their name. +// TODO: can this replace ControlFlowWalker in other places? +template> +struct BreakTargetWalker : public PostWalker { + std::unordered_map breakTargets; + + Expression* findBreakTarget(Name name) { return breakTargets[name]; } + + static void scan(SubType* self, Expression** currp) { + auto* curr = *currp; + BranchUtils::operateOnScopeNameDefs( + curr, [&](Name name) { self->breakTargets[name] = curr; }); + + PostWalker::scan(self, currp); + } +}; + // Walk the wasm and find all the links we need to care about, and the locations // and roots related to them. This builds up a CollectedFuncInfo data structure. // After all InfoCollectors run, those data structures will be merged and the // main flow will begin. struct InfoCollector - : public PostWalker> { + : public BreakTargetWalker> { CollectedFuncInfo& info; InfoCollector(CollectedFuncInfo& info) : info(info) {} @@ -553,9 +571,6 @@ struct InfoCollector return; } - // Values sent to breaks to this block must be received here. - handleBreakTarget(curr); - // The final item in the block can flow a value to here as well. receiveChildValue(curr->list.back(), curr); } @@ -1151,8 +1166,7 @@ struct InfoCollector for (Index i = 0; i < params.size(); i++) { if (isRelevant(params[i])) { info.links.push_back( - {TagLocation{tag, i}, - BreakTargetLocation{getFunction(), target, i}}); + {TagLocation{tag, i}, getBreakTargetLocation(target, i)}); } } @@ -1164,7 +1178,7 @@ struct InfoCollector addRoot(location, PossibleContents::fromType(Type(HeapType::exn, NonNullable))); info.links.push_back( - {location, BreakTargetLocation{getFunction(), target, exnrefIndex}}); + {location, getBreakTargetLocation(target, exnrefIndex)}); } } } @@ -1279,6 +1293,13 @@ struct InfoCollector // Helpers + // Returns the location of a break target by the name (e.g. returns the + // location of a block, if the name is the name of a block). Also receives the + // index in a tuple, if this is part of a tuple value. + Location getBreakTargetLocation(Name target, Index i) { + return ExpressionLocation{findBreakTarget(target), i}; + } + // Handles the value sent in a break instruction. Does not handle anything // else like the condition etc. void handleBreakValue(Expression* curr) { @@ -1288,26 +1309,13 @@ struct InfoCollector for (Index i = 0; i < value->type.size(); i++) { // Breaks send the contents of the break value to the branch target // that the break goes to. - info.links.push_back( - {ExpressionLocation{value, i}, - BreakTargetLocation{getFunction(), target, i}}); + info.links.push_back({ExpressionLocation{value, i}, + getBreakTargetLocation(target, i)}); } } }); } - // Handles receiving values from breaks at the target (as in a block). - void handleBreakTarget(Expression* curr) { - if (isRelevant(curr->type)) { - BranchUtils::operateOnScopeNameDefs(curr, [&](Name target) { - for (Index i = 0; i < curr->type.size(); i++) { - info.links.push_back({BreakTargetLocation{getFunction(), target, i}, - ExpressionLocation{curr, i}}); - } - }); - } - } - // Connect a child's value to the parent, that is, all content in the child is // now considered possible in the parent as well. void receiveChildValue(Expression* child, Expression* parent) { @@ -2400,16 +2408,16 @@ bool Flower::updateContents(LocationIndex locationIndex, } } - // After filtering we should always have more precise information than "many" - // - in the worst case, we can have the type declared in the wasm. - assert(!contents.isMany()); - #if defined(POSSIBLE_CONTENTS_DEBUG) && POSSIBLE_CONTENTS_DEBUG >= 2 std::cout << " updateContents has something new\n"; contents.dump(std::cout, &wasm); std::cout << '\n'; #endif + // After filtering we should always have more precise information than "many" + // - in the worst case, we can have the type declared in the wasm. + assert(!contents.isMany()); + // Add a work item if there isn't already. workQueue.insert(locationIndex); @@ -2896,9 +2904,6 @@ void Flower::dump(Location location) { << '\n'; } else if (auto* loc = std::get_if(&location)) { std::cout << " globalloc " << loc->name << '\n'; - } else if (auto* loc = std::get_if(&location)) { - std::cout << " branchloc " << loc->func->name << " : " << loc->target - << " tupleIndex " << loc->tupleIndex << '\n'; } else if (std::get_if(&location)) { std::cout << " sigparamloc " << '\n'; } else if (std::get_if(&location)) { diff --git a/src/ir/possible-contents.h b/src/ir/possible-contents.h index 945d59d2623..21f5ecb4535 100644 --- a/src/ir/possible-contents.h +++ b/src/ir/possible-contents.h @@ -402,21 +402,6 @@ struct ResultLocation { } }; -// The location of a break target in a function, identified by its name. -struct BreakTargetLocation { - Function* func; - Name target; - // As in ExpressionLocation, the index inside the tuple, or 0 if not a tuple. - // That is, if the branch target has a tuple type, then each branch to that - // location sends a tuple, and we'll have a separate BreakTargetLocation for - // each, indexed by the index in the tuple that the branch sends. - Index tupleIndex; - bool operator==(const BreakTargetLocation& other) const { - return func == other.func && target == other.target && - tupleIndex == other.tupleIndex; - } -}; - // The location of a global in the module. struct GlobalLocation { Name name; @@ -523,7 +508,6 @@ using Location = std::variant struct hash { } }; -template<> struct hash { - size_t operator()(const wasm::BreakTargetLocation& loc) const { - return std::hash>{}( - {size_t(loc.func), loc.target, loc.tupleIndex}); - } -}; - template<> struct hash { size_t operator()(const wasm::GlobalLocation& loc) const { return std::hash{}(loc.name); From e2ce099f7e85b506cb45d306ec7c89a237d2e7c6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 18 Sep 2024 16:03:50 -0700 Subject: [PATCH 2/2] [NFC] Avoid collecting unnecessary parents in OptimizeAddedConstants (#6953) This makes the pass 20% faster. --- src/passes/OptimizeAddedConstants.cpp | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/passes/OptimizeAddedConstants.cpp b/src/passes/OptimizeAddedConstants.cpp index f03109223e2..142f1002491 100644 --- a/src/passes/OptimizeAddedConstants.cpp +++ b/src/passes/OptimizeAddedConstants.cpp @@ -32,13 +32,35 @@ #include #include -#include #include #include #include namespace wasm { +namespace { + +// Similar to Parents from parents.h, but we only care about gets, so it is much +// more efficient to just collect their parents. +struct GetParents { + GetParents(Expression* expr) { inner.walk(expr); } + + Expression* getParent(LocalGet* curr) const { + auto iter = inner.parentMap.find(curr); + assert(iter != inner.parentMap.end()); + return iter->second; + } + +private: + struct Inner : public ExpressionStackWalker { + void visitLocalGet(LocalGet* curr) { parentMap[curr] = getParent(); } + + std::unordered_map parentMap; + } inner; +}; + +} // anonymous namespace + template class MemoryAccessOptimizer { public: MemoryAccessOptimizer(P* parent, @@ -356,7 +378,7 @@ struct OptimizeAddedConstants // g(a, offset=10) // but if x has other uses, then avoid doing so - we'll be doing that add // anyhow, so the load/store offset trick won't actually help. - Parents parents(getFunction()->body); + GetParents parents(getFunction()->body); for (auto& [location, _] : localGraph->locations) { if (auto* set = location->dynCast()) { if (auto* add = set->value->dynCast()) {