Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use dedicated logger instead of the root one #478

Merged
merged 3 commits into from
Oct 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions xformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
except ImportError:
pass


logger = logging.getLogger("xformers")

_is_sparse_available: bool = True

# Set to true to utilize functorch
Expand Down Expand Up @@ -66,8 +69,8 @@ def _register_extensions():
_register_extensions()
except (ImportError, OSError) as e:
print(e)
logging.warning(
f"WARNING: {e}\nNeed to compile C++ extensions to get sparse attention suport."
logger.warning(
f"WARNING: {e}\nNeed to compile C++ extensions to get sparse attention support."
+ " Please run python setup.py build develop"
)
_is_sparse_available = False
Expand All @@ -94,7 +97,7 @@ def _is_triton_available():

return True
except (ImportError, AttributeError) as e:
logging.warning(
logger.warning(
f"A matching Triton is not available, some optimizations will not be enabled.\nError caught was: {e}"
)
return False
Expand All @@ -104,7 +107,7 @@ def _is_triton_available():
try:
from xformers.components.nvfuser import NVFusedBiasActivationDropout # noqa
except ImportError as e:
logging.warning(
logger.warning(
f"Functorch is not available, some optimizations will not be enabled.\nError caught was: {e}"
)
_is_functorch_available = False
5 changes: 4 additions & 1 deletion xformers/components/attention/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from .attention_mask import AttentionMask
from .base import Attention, AttentionConfig # noqa

logger = logging.getLogger("xformers")


# CREDITS: Classy Vision registry mechanism

ATTENTION_REGISTRY: Dict[str, Any] = {}
Expand All @@ -45,7 +48,7 @@ def build_attention(config: Union[Dict[str, Any], AttentionConfig]):
)
except KeyError as e:
name = config["name"]
logging.warning(f"{name} not available among {ATTENTION_REGISTRY.keys()}")
logger.warning(f"{name} not available among {ATTENTION_REGISTRY.keys()}")
raise e
else:
config_instance = config
Expand Down
13 changes: 9 additions & 4 deletions xformers/components/attention/blocksparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from xformers import _is_triton_available
from xformers.components.attention import Attention, AttentionConfig, register_attention

logger = logging.getLogger("xformers")


_is_blocksparse_available = _is_triton_available()


if _is_blocksparse_available:
from triton.ops.blocksparse import matmul as blocksparse_matmul # type: ignore
from triton.ops.blocksparse import softmax as blocksparse_softmax # type: ignore
Expand All @@ -22,7 +27,7 @@

# Blocksparse requires Tensor cores
if gpu_capabilities_older_than_70():
logging.warning(
logger.warning(
"Blocksparse is not available: the current GPU does not expose Tensor cores"
)
_is_blocksparse_available = False
Expand Down Expand Up @@ -64,14 +69,14 @@ def __init__(
**kwargs,
):
if layout.dim() == 2:
logging.warning(
logger.warning(
"The layout passed is lacking a head dimension and a batch dimension"
)
logging.warning(
logger.warning(
"Now assuming that the same layout is to be used across all heads"
)
layout = layout.unsqueeze(0).expand(num_heads, -1, -1)
logging.warning(f"New layout dimensions: {layout.shape}")
logger.warning(f"New layout dimensions: {layout.shape}")

assert block_size in (
16,
Expand Down
5 changes: 4 additions & 1 deletion xformers/components/attention/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
from xformers.components.attention.blocksparse import BlockSparseAttention


logger = logging.getLogger("xformers")


def _create_random_sparsity(matrix, sparsity, divisible_by=4):
assert matrix.ndim == 3
keep = torch.rand_like(matrix[0], dtype=torch.float32) > sparsity
Expand Down Expand Up @@ -311,7 +314,7 @@ def scaled_dot_product_attention(
)

if switch_to_blocksparse:
logging.info("Switching causal attention to Triton blocksparse...")
logger.info("Switching causal attention to Triton blocksparse...")
return blocksparse_attention(q, k, v, dropout, block_size)

with torch.cuda.amp.autocast(enabled=False) if autocast_disabled else nullcontext():
Expand Down
4 changes: 3 additions & 1 deletion xformers/components/attention/favor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
SMReg,
)

logger = logging.getLogger("xformers")


@dataclass
class FavorAttentionConfig(AttentionConfig):
Expand Down Expand Up @@ -84,7 +86,7 @@ def __init__(
self.dim_features = 2 * (
self.dim_features // 2
) # needs to be even for some variants
logging.info(
logger.info(
f"FAVOR: Automatically setting the random mapping dimension to {self.dim_features} from {dim_head}"
)
else:
Expand Down
4 changes: 3 additions & 1 deletion xformers/components/attention/nystrom.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
reshape_key_padding_mask,
)

logger = logging.getLogger("xformers")


@dataclass
class NystromSelfAttentionConfig(AttentionConfig):
Expand Down Expand Up @@ -182,7 +184,7 @@ def forward(

if key_padding_mask is not None:
if key_padding_mask.dtype == torch.bool:
logging.warning(
logger.warning(
"Bool mask found, but an additive mask is expected. Converting but this is slow"
)

Expand Down
4 changes: 3 additions & 1 deletion xformers/components/attention/ortho.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
scaled_query_key_softmax,
)

logger = logging.getLogger("xformers")


class LandmarkSelection(str, Enum):
Orthogonal = "orthogonal"
Expand Down Expand Up @@ -105,7 +107,7 @@ def forward(
landmarks = self._cluster_landmarks(q, spherical=True)

if att_mask is not None:
logging.warning(
logger.warning(
"Orthoformer: attention mask passed alongside with using landmarks to reduce dimensions. \
The two are typically not compatible"
)
Expand Down
4 changes: 3 additions & 1 deletion xformers/components/attention/scaled_dot_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
)
from xformers.components.attention.core import scaled_dot_product_attention

logger = logging.getLogger("xformers")


@dataclass
class ScaledDotProductConfig(AttentionConfig):
Expand Down Expand Up @@ -118,7 +120,7 @@ def forward(
if isinstance(att_mask, AttentionMask):
att_mask = att_mask.make_crop(seq_len=q.shape[-2])
else:
logging.error(
logger.error(
"Mismatching sparse attention mask and sequence length."
+ " Please pad the inputs or adjust the attention mask"
)
Expand Down
5 changes: 4 additions & 1 deletion xformers/components/feedforward/fused_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
register_feedforward,
)

logger = logging.getLogger("xformers")


if torch.cuda.is_available():
try:
from xformers.triton import FusedDropoutBias
Expand Down Expand Up @@ -73,4 +76,4 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
return self.mlp(inputs)

except ImportError:
logging.warning("Triton is not available, FusedMLP will not be enabled.")
logger.warning("Triton is not available, FusedMLP will not be enabled.")
9 changes: 6 additions & 3 deletions xformers/components/feedforward/mixture_of_experts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
register_feedforward,
)

logger = logging.getLogger("xformers")


_is_fairscale_available = True

try:
Expand All @@ -27,7 +30,7 @@
from xformers.components.feedforward import MLP

except ImportError:
logging.warning(
logger.warning(
"Either FairScale or torch distributed is not available, MixtureOfExperts will not be exposed."
" Please install them if you would like to use MoE"
)
Expand Down Expand Up @@ -105,8 +108,8 @@ def __init__(
assert number_of_experts >= number_of_local_experts
else:
if dist.get_world_size() == 1:
logging.warning("Local experts no specified but world size of 1")
logging.warning("Assuming that all experts are local")
logger.warning("Local experts no specified but world size of 1")
logger.warning("Assuming that all experts are local")
number_of_local_experts = number_of_experts
else:
number_of_local_experts = 1
Expand Down
6 changes: 4 additions & 2 deletions xformers/components/input_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import torch
from torch import nn

logger = logging.getLogger("xformers")


@dataclass
class InputProjectionConfig:
Expand Down Expand Up @@ -53,7 +55,7 @@ def __init__(
key_proj_params.bias,
)
else:
logging.info(
logger.info(
"No Key projection parameters were passed, assuming that the weights"
+ " are shared with the query projection"
)
Expand All @@ -66,7 +68,7 @@ def __init__(
value_proj_params.bias,
)
else:
logging.info(
logger.info(
"No Value projection parameters were passed, assuming that the weights"
+ " are shared with the query projection"
)
Expand Down
4 changes: 3 additions & 1 deletion xformers/components/multi_head_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from xformers.components.input_projection import InputProjection, InputProjectionConfig
from xformers.components.positional_embedding import RotaryEmbedding

logger = logging.getLogger("xformers")


@dataclass
class MultiHeadDispatchConfig:
Expand Down Expand Up @@ -90,7 +92,7 @@ def __init__(
super().__init__()

if isinstance(bias, bool):
logging.warning(
logger.warning(
"Single bias value provided for the MHA projections."
+ f" Assuming the same parameter ({bias}) is to be used everywhere"
)
Expand Down
6 changes: 4 additions & 2 deletions xformers/factory/block_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
xFormerEncoderConfig,
)

logger = logging.getLogger("xformers")


def _get_ln_factory(
d_model: int,
Expand Down Expand Up @@ -113,7 +115,7 @@ def __init__(self, config: xFormerEncoderConfig, **kwargs):
mha_dim = config.multi_head_config["dim_model"]

if pos_encoding_dim != mha_dim:
logging.warning(
logger.warning(
f"The embedding dim and model dim do not match ({pos_encoding_dim} vs {mha_dim}), adding a projector layer." # noqa
)
self.embedding_projector = nn.Linear(pos_encoding_dim, mha_dim)
Expand Down Expand Up @@ -257,7 +259,7 @@ def __init__(self, config: xFormerDecoderConfig, **kwargs):

if pos_encoding_dim != mha_dim:

logging.warning(
logger.warning(
f"The embedding dim and model dim do not match ({pos_encoding_dim} vs {mha_dim}), adding a projector layer." # noqa
)

Expand Down
4 changes: 2 additions & 2 deletions xformers/factory/hydra_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from xformers.components.feedforward import FEEDFORWARD_REGISTRY
from xformers.components.positional_embedding import POSITION_EMBEDDING_REGISTRY

log = logging.getLogger(__name__)
logger = logging.getLogger("xformers")


def import_xformer_config_schema():
Expand All @@ -33,4 +33,4 @@ def import_xformer_config_schema():
try:
cs.store(name=f"{kk}_schema", node=v[kk].config, group=f"xformers/{k}")
except ValidationError as e:
log.debug(f"Error registering {kk}_schema, error: {e}")
logger.debug(f"Error registering {kk}_schema, error: {e}")
4 changes: 3 additions & 1 deletion xformers/factory/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from xformers.factory.block_factory import xFormerDecoderBlock, xFormerEncoderBlock
from xformers.factory.weight_init import get_weight_init_fn, xFormerWeightInit

logger = logging.getLogger("xformers")


@dataclass(init=False)
class xFormerConfig:
Expand Down Expand Up @@ -183,7 +185,7 @@ def __init__(
and decoders[0].pose_encoding
and not config.reversible
):
logging.info("Tying encoder and decoder embeddings, as requested")
logger.info("Tying encoder and decoder embeddings, as requested")
encoders[0].pose_encoding = decoders[0].pose_encoding

self.encoders: torch.nn.Module = (
Expand Down
5 changes: 4 additions & 1 deletion xformers/factory/weight_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
_no_grad_uniform_,
)

logger = logging.getLogger("xformers")


_assert_if_not_initialized = False


Expand Down Expand Up @@ -125,7 +128,7 @@ def _maybe_report_no_init(module, name):
return

# This is unexpected, warn about a possible unhandled weight
logging.warning(
logger.warning(
f"Not initializing weights in {name}, this could be a mistake.\nModule {module}"
)

Expand Down
9 changes: 6 additions & 3 deletions xformers/sparse/blocksparse_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,20 @@
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import logging

import torch

from xformers.ops import masked_matmul

logger = logging.getLogger("xformers")


try:
from triton.ops.blocksparse import matmul as blocksparse_matmul
from triton.ops.blocksparse import softmax as blocksparse_softmax
except ImportError as e:
import logging

logging.warning(
logger.warning(
"Triton is not available, some optimizations will not be enabled.\n"
+ f"This is just a warning: {e}"
)
Expand Down
Loading