Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
elvin-n committed Jul 28, 2022
1 parent 8a0fce2 commit 1c8abb5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 18 deletions.
44 changes: 29 additions & 15 deletions src/relay/transforms/annotate_texture_storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
*
* - AnnotateMemoryScope calls *target.CollectStorageInfo for all target been represented
* in the graph and rewrites graph modifying or inserting of VirtualDevice with required
* memory_scop collected from the CollectStorageInfo
* memory_scope collected from the CollectStorageInfo
*/

#include <tvm/relay/attrs/nn.h>
Expand Down Expand Up @@ -119,9 +119,7 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
if (call->checked_type().as<TensorTypeNode>()) {
std::string scope = "global.texture";
if (const auto* ttype = call->checked_type().as<TensorTypeNode>()) {
if (ttype->shape.size() == 5) {
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(call)));
}
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(call)));
}
storage_scope_[call].push_back(scope);
} else {
Expand Down Expand Up @@ -175,8 +173,26 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
}
}

/**
* Defines the name of the memory scope which can fit the tensor of required shape
*
* The scope stands for "global" if tensor does not satisfy current flattening rules for textures
* (texture currently has to be 5d tensors with value eq 4 in the last dimension)
*
* The packing layout inside the texture scope (the part after the dash) is defined
* during the shape itself. Hardware can have limitations on the texture spatial dimensions
* we must not exceed these sizes. In addition to the fitting of h/w limitation we want to
* get balanced packing where final spatial sizes of textures will not be too different
* @param shape shape to be analyzed
* @param vd VirtualDevice for the tensors determined of memory scope
* @return string representing memory scope either "global" or "global.texture-layout"
*/
std::string Scope(Array<PrimExpr> shape, const VirtualDevice& vd) {
if (vd != VirtualDevice::FullyUnconstrained()) {
// currently we support only textures been made from 5d tensors
// 5d requirement is not limitation of textures in general, it is limitation how
// we are representing memory scopes/layout and flattening of textures in tir
if (vd != VirtualDevice::FullyUnconstrained() && shape.size() == 5 &&
shape[4].as<IntImmNode>()->value == 4) {
std::map<int, std::string> diffs;
int limit =
vd->target->GetAttr<Integer>("texture_spatial_limit").value_or(Integer(16384))->value;
Expand Down Expand Up @@ -220,13 +236,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {

bool expr_is_rgba_vectorizable = false;
if (const auto* ttype = expr->checked_type().as<TensorTypeNode>()) {
if (ttype->shape.size() == 5) {
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(expr)));
if (scope != "global") {
auto inner_dim = ttype->shape.back().as<IntImmNode>();
if (inner_dim && inner_dim->value == 4) {
expr_is_rgba_vectorizable = true;
}
scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(expr)));
if (scope != "global") {
auto inner_dim = ttype->shape.back().as<IntImmNode>();
if (inner_dim && inner_dim->value == 4) {
expr_is_rgba_vectorizable = true;
}
}
}
Expand Down Expand Up @@ -347,11 +361,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
* Currently this workflow supports analysis and rewriting of VirtualDevice for
* Constants and function Variables
*/
class VDRewriter : public transform::DeviceAwareExprMutator {
class RewriteVDStorageScopes : public transform::DeviceAwareExprMutator {
using VarMap = std::unordered_map<Expr, Var, ObjectPtrHash, ObjectPtrEqual>;

public:
explicit VDRewriter(const Map<Expr, Array<String>>& storage_scope)
explicit RewriteVDStorageScopes(const Map<Expr, Array<String>>& storage_scope)
: transform::DeviceAwareExprMutator(Optional<IRModule>()), storage_scope_(storage_scope) {}

Function Rewrite(const Expr& expr) { return Downcast<Function>(Mutate(expr)); }
Expand Down Expand Up @@ -486,7 +500,7 @@ Map<Expr, Array<String>> CollectStorageInfo(const Expr& expr) {
Expr AnnotateMemoryScopeExpr(const Expr& expr, const IRModule& mod, CompilationConfig config) {
auto storage_scope = CollectStorageInfo(expr);
if (storage_scope.size()) {
return VDRewriter(storage_scope).Rewrite(expr);
return RewriteVDStorageScopes(storage_scope).Rewrite(expr);
} else {
return expr;
}
Expand Down
5 changes: 2 additions & 3 deletions tests/python/relay/test_conv2d_nchw_texture.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,6 @@ def test_residual_block():
build_run_compare(mod, params1, {"data": input_shape}, dtype, target, static_memory_scope)



@tvm.testing.requires_opencl
def test_concat():
"""
Expand Down Expand Up @@ -737,7 +736,7 @@ def test_pooling_branching_texture_params():
\ /
add <- to have the only one output, will be fused
| <- buffer
layout_transform (NCHW4c->NCHW)
layout_transform (NCHW4c->NCHW)
"""
target = "opencl --device=adreno"
dtype = "float16"
Expand Down Expand Up @@ -865,7 +864,7 @@ def test_branching_texture_params():
\ /
add <- to have the only one output
| <- buffer
layout_transform (NCHW4c->NCHW)
layout_transform (NCHW4c->NCHW)
"""
target = "opencl --device=adreno"
dtype = "float16"
Expand Down

0 comments on commit 1c8abb5

Please sign in to comment.