From 203c5133142307ed9d037f94f140d10aa9555e72 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 12:39:25 +0000
Subject: [PATCH 01/24] ignore more things

---
 .gitignore | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 32ec9ad7..898d43c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,7 @@ core
 __pycache__
 *.egg-info
 dist
-build
\ No newline at end of file
+build
+*.so
+.vscode/
+.coverage.*

From 2f823b246d4c4a123c74d6f51cfb5fdb5505325d Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 12:40:01 +0000
Subject: [PATCH 02/24] update setup.py

---
 setup.py | 164 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 118 insertions(+), 46 deletions(-)

diff --git a/setup.py b/setup.py
index 95c4a0e8..bdf78bb9 100644
--- a/setup.py
+++ b/setup.py
@@ -1,67 +1,139 @@
-from setuptools import setup, find_packages
+# Copyright 2024 MosaicML MegaBlocks authors
+# SPDX-License-Identifier: Apache-2.0
+
 import os
-import torch
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+import warnings
 
-if os.environ.get("TORCH_CUDA_ARCH_LIST"):
-    # Let PyTorch builder to choose device to target for.
-    device_capability = ""
-else:
-    device_capability = torch.cuda.get_device_capability()
-    device_capability = f"{device_capability[0]}{device_capability[1]}"
+from setuptools import find_packages, setup
 
-nvcc_flags = [
-    "--ptxas-options=-v",
-    "--optimize=2",
-]
-if device_capability:
-    nvcc_flags.append(
-        f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}"
-    )
-
-ext_modules = [
-    CUDAExtension(
-        "megablocks_ops",
-        ["csrc/ops.cu"],
-        include_dirs=["csrc"],
-        extra_compile_args={"cxx": ["-fopenmp"], "nvcc": nvcc_flags},
-    )
-]
+# We require torch in setup.py to build cpp extensions "ahead of time"
+# More info here: # https://pytorch.org/tutorials/advanced/cpp_extension.html
+is_torch_installed = False
+try:
+    import torch
+    from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CUDAExtension
+    is_torch_installed = True
+except ModuleNotFoundError as e:
+    raise ModuleNotFoundError(
+        "No module named 'torch'. Torch is required to install this repo."
+    ) from e
 
-install_requires=[
-    "triton>=2.1.0",
-    "stanford-stk==0.7.0",
+###############################################################################
+# Requirements
+###############################################################################
+
+install_requires = [
+    'numpy>=1.21.5,<2.1.0',
+    'torch>=2.3.0,<2.4',
+    'triton>=2.1.0',
+    # 'stanford-stk==0.7.0',
+    'stanford-stk @ git+https://git@github.com/eitanturok/stk.git'
 ]
 
 extra_deps = {}
 
-extra_deps["gg"] = [
-    "grouped_gemm==0.1.4",
+extra_deps['gg'] = [
+    'grouped_gemm==0.1.4',
 ]
 
-extra_deps["dev"] = [
-    "absl-py",
+extra_deps['dev'] = [
+    'absl-py',
+    'coverage[toml]==7.4.4',
+    'pytest_codeblocks>=0.16.1,<0.17',
+    'pytest-cov>=4,<5',
+    'pytest>=7.2.1,<8',
+    'pre-commit>=3.4.0,<4',
 ]
 
-extra_deps['all'] = set(dep for deps in extra_deps.values() for dep in deps)
+extra_deps['all'] = list(
+    set(dep for deps in extra_deps.values() for dep in deps))
+
+###############################################################################
+# Extension Modules
+###############################################################################
+
+cmdclass = {}
+ext_modules = []
+
+# Only install CUDA extensions if available
+if 'cu' in torch.__version__ and CUDA_HOME is not None:
+
+    cmdclass = {'build_ext': BuildExtension}
+    nvcc_flags = ['--ptxas-options=-v', '--optimize=2']
+
+    if os.environ.get('TORCH_CUDA_ARCH_LIST'):
+        # Let PyTorch builder to choose device to target for.
+        device_capability = ''
+    else:
+        device_capability_tuple = torch.cuda.get_device_capability()
+        device_capability = f'{device_capability_tuple[0]}{device_capability_tuple[1]}'
+
+    if device_capability:
+        nvcc_flags.append(
+            f'--generate-code=arch=compute_{device_capability},code=sm_{device_capability}'
+        )
+
+    ext_modules = [
+        CUDAExtension(
+            'megablocks_ops',
+            ['csrc/ops.cu'],
+            include_dirs=['csrc'],
+            extra_compile_args={
+                'cxx': ['-fopenmp'],
+                'nvcc': nvcc_flags
+            },
+        )
+    ]
+elif CUDA_HOME is None:
+    warnings.warn(
+        'Attempted to install CUDA extensions, but CUDA_HOME was None. ' +
+        'Please install CUDA and ensure that the CUDA_HOME environment ' +
+        'variable points to the installation location.')
+else:
+    warnings.warn('Warning: No CUDA devices; cuda code will not be compiled.')
+
+###############################################################################
+# README
+###############################################################################
+
+# convert README to long description on PyPI, optionally skipping certain
+# marked sections if present (e.g., for coverage / code quality badges)
+with open('README.md', 'r', encoding='utf-8') as fh:
+    long_description = fh.read()
+while True:
+    start_tag = '<!-- LONG_DESCRIPTION_SKIP_START -->'
+    end_tag = '<!-- LONG_DESCRIPTION_SKIP_END -->'
+    start = long_description.find(start_tag)
+    end = long_description.find(end_tag)
+    if start == -1:
+        assert end == -1, 'Skipped section starts and ends imbalanced'
+        break
+    else:
+        assert end != -1, 'Skipped section starts and ends imbalanced'
+        long_description = long_description[:start] + long_description[
+            end + len(end_tag):]
+
+###############################################################################
+# README
+###############################################################################
 
 setup(
-    name="megablocks",
-    version="0.5.1",
-    author="Trevor Gale",
-    author_email="tgale@stanford.edu",
-    description="MegaBlocks",
-    long_description=open('README.md').read(),
+    name='megablocks',
+    version='0.5.1',
+    author='Trevor Gale',
+    author_email='tgale@stanford.edu',
+    description='MegaBlocks',
+    long_description=long_description,
     long_description_content_type='text/markdown',
-    url="https://github.com/stanford-futuredata/megablocks",
+    url='https://github.com/stanford-futuredata/megablocks',
     classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: BSD License",
-        "Operating System :: Unix",
+        'Programming Language :: Python :: 3',
+        'License :: OSI Approved :: BSD License',
+        'Operating System :: Unix',
     ],
-    packages=find_packages(),
+    packages=find_packages(exclude=['tests*']),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension},
+    cmdclass=cmdclass,
     install_requires=install_requires,
     extras_require=extra_deps,
 )

From 2299a117ef4b6d4b41000a57facc76bb43d9ceca Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 12:41:33 +0000
Subject: [PATCH 03/24] init pyproject.toml

---
 pyproject.toml | 504 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 504 insertions(+)
 create mode 100644 pyproject.toml

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..898fdfb6
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,504 @@
+# build requirements
+[build-system]
+# if you get an error like "RuntimeError: Cuda extensions are being compiled with a version of Cuda that does not match the version used to compile Pytorch binaries.  Pytorch binaries were compiled with Cuda 11.7.", you need to:
+#   pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+# then build via:
+#   pip install --no-build-isolation -e .
+# since, by default, torch 2.0.1 only uses cuda 11.7
+requires = ["setuptools < 70.0.0", "packaging >= 21.0.0", "torch >= 2.3.0, < 2.4"]
+build-backend = "setuptools.build_meta"
+
+# iSort
+[tool.isort]
+multi_line_output = 0
+line_length = 80
+skip = [ "env", "wandb", "runs", "build", "node_modules", "examples/inference-deployments/mpt/mpt_7b_ft_handler.py" ]
+
+# Coverage
+[tool.coverage.run]
+parallel = true
+branch = true
+relative_files = true
+concurrency = ["thread"]
+include = [
+    "megablocks/*"
+]
+
+# Pyright
+[tool.pyright]
+exclude = ['env-**','**/ci-testing']
+stubPath = ""  # suppress useless 'stubPath is not a valid directory' errors
+
+reportUnnecessaryIsInstance = "warning"
+reportMissingTypeStubs = "none"
+reportIncompatibleMethodOverride = "none"
+reportIncompatibleVariableOverride = "error"
+reportUnusedImport = "error"
+reportUnusedClass = "warning"
+reportUnusedFunction = "warning"
+reportUnusedVariable = "error"
+reportDuplicateImport = "error"
+reportWildcardImportFromLibrary = "error"
+reportUntypedFunctionDecorator = "warning"
+reportPrivateImportUsage = "warning"
+reportUndefinedVariable = "error"
+strictParameterNoneValue = true
+reportPropertyTypeMismatch = "error"
+reportUntypedNamedTuple = "error"
+reportUnnecessaryCast = "error"
+reportInvalidTypeVarUse = "error"
+reportOverlappingOverload = "error"
+reportUninitializedInstanceVariable = "error"
+reportInvalidStringEscapeSequence = "error"
+reportMissingParameterType = "warning"  # TODO: make this an error
+reportCallInDefaultInitializer = "none"  # TODO: make this an error
+reportUnnecessaryComparison = "warning"
+reportSelfClsParameterName = "error"
+reportImplicitStringConcatenation = "warning"  # TODO: make this an error
+reportInvalidStubStatement = "error"
+reportIncompleteStub = "error"
+reportUnsupportedDunderAll = "error"
+reportUnusedCoroutine = "error"
+
+# Pytest
+[tool.pytest.ini_options]
+# By default, skip gpu tests
+addopts = "--tb=short -m 'not gpu'"
+
+markers = [
+    # For distributed testing
+    "world_size(val)",
+    # Should be run during daily regression
+    "daily",
+    # Whether the test will be reading data from a remote source, and may require credentials
+    "remote",
+    # whether the test requires a gpu
+    "gpu",
+]
+
+filterwarnings = [
+    # "error",  # warnings should be treated like errors, but still need to fix some warnings
+    'ignore:ExtraArgumentWarning',  # extra arguments originate from pytest-specific CLI args
+    'ignore:DistributedDefaultValueWarning',  # default distributed values are fine
+    'ignore:NoDistributedWarning',  # running without distributed is fine
+    'ignore:Deterministic mode is activated:UserWarning',  # all tests run with deterministic mode
+    'ignore:SubsetNumBatchesWarning',  # different subsets OK for testing
+    'ignore:No optimizer:UserWarning',  # testing defaults
+    'ignore:No scheduler:UserWarning',  # testing defaults
+    'ignore::DeprecationWarning:tensorboard',  # ignore tensorboard
+]
+
+# Enable logging for pytest
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+
+
+# Yapf
+[tool.yapf]
+# Align closing bracket with visual indentation.
+align_closing_bracket_with_visual_indent = false
+
+# Allow dictionary keys to exist on multiple lines. For example:
+#
+#   x = {
+#       ('this is the first element of a tuple',
+#        'this is the second element of a tuple'):
+#            value,
+#   }
+allow_multiline_dictionary_keys = false
+
+# Allow lambdas to be formatted on more than one line.
+allow_multiline_lambdas = false
+
+# Allow splitting before a default / named assignment in an argument list.
+allow_split_before_default_or_named_assigns = true
+
+# Allow splits before the dictionary value.
+allow_split_before_dict_value = true
+
+#   Let spacing indicate operator precedence. For example:
+#
+#     a = 1 * 2 + 3 / 4
+#     b = 1 / 2 - 3 * 4
+#     c = (1 + 2) * (3 - 4)
+#     d = (1 - 2) / (3 + 4)
+#     e = 1 * 2 - 3
+#     f = 1 + 2 + 3 + 4
+#
+# will be formatted as follows to indicate precedence:
+#
+#     a = 1*2 + 3/4
+#     b = 1/2 - 3*4
+#     c = (1+2) * (3-4)
+#     d = (1-2) / (3+4)
+#     e = 1*2 - 3
+#     f = 1 + 2 + 3 + 4
+#
+arithmetic_precedence_indication = false
+
+# Number of blank lines surrounding top-level function and class
+# definitions.
+blank_lines_around_top_level_definition = 2
+
+# Insert a blank line before a class-level docstring.
+blank_line_before_class_docstring = false
+
+# Insert a blank line before a module docstring.
+blank_line_before_module_docstring = true
+
+# Insert a blank line before a 'def' or 'class' immediately nested
+# within another 'def' or 'class'. For example:
+#
+#   class Foo:
+#                      # <------ this blank line
+#     def method():
+#       ...
+blank_line_before_nested_class_or_def = true
+
+# Do not split consecutive brackets. Only relevant when
+# dedent_closing_brackets is set. For example:
+#
+#    call_func_that_takes_a_dict(
+#        {
+#            'key1': 'value1',
+#            'key2': 'value2',
+#        }
+#    )
+#
+# would reformat to:
+#
+#    call_func_that_takes_a_dict({
+#        'key1': 'value1',
+#        'key2': 'value2',
+#    })
+coalesce_brackets = false
+
+# The column limit.
+column_limit = 80
+
+# The style for continuation alignment. Possible values are:
+#
+# - SPACE: Use spaces for continuation alignment. This is default behavior.
+# - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
+#   (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs or
+#   CONTINUATION_INDENT_WIDTH spaces) for continuation alignment.
+# - VALIGN-RIGHT: Vertically align continuation lines to multiple of
+#   INDENT_WIDTH columns. Slightly right (one tab or a few spaces) if
+#   cannot vertically align continuation lines with indent characters.
+continuation_align_style = 'SPACE'
+
+# Indent width used for line continuations.
+continuation_indent_width = 4
+
+# Put closing brackets on a separate line, dedented, if the bracketed
+# expression can't fit in a single line. Applies to all kinds of brackets,
+# including function definitions and calls. For example:
+#
+#   config = {
+#       'key1': 'value1',
+#       'key2': 'value2',
+#   }        # <--- this bracket is dedented and on a separate line
+#
+#   time_series = self.remote_client.query_entity_counters(
+#       entity='dev3246.region1',
+#       key='dns.query_latency_tcp',
+#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
+#       start_ts=now()-timedelta(days=3),
+#       end_ts=now(),
+#   )        # <--- this bracket is dedented and on a separate line
+dedent_closing_brackets = false
+
+# Disable the heuristic which places each list element on a separate line
+# if the list is comma-terminated.
+disable_ending_comma_heuristic = false
+
+# Place each dictionary entry onto its own line.
+each_dict_entry_on_separate_line = true
+
+# Require multiline dictionary even if it would normally fit on one line.
+# For example:
+#
+#   config = {
+#       'key1': 'value1'
+#   }
+force_multiline_dict = false
+
+# The regex for an i18n comment. The presence of this comment stops
+# reformatting of that line, because the comments are required to be
+# next to the string they translate.
+i18n_comment = '#\..*'
+
+# The i18n function call names. The presence of this function stops
+# reformattting on that line, because the string it has cannot be moved
+# away from the i18n comment.
+i18n_function_call = 'N_, _'
+
+# Indent blank lines.
+indent_blank_lines = false
+
+# Put closing brackets on a separate line, indented, if the bracketed
+# expression can't fit in a single line. Applies to all kinds of brackets,
+# including function definitions and calls. For example:
+#
+#   config = {
+#       'key1': 'value1',
+#       'key2': 'value2',
+#       }        # <--- this bracket is indented and on a separate line
+#
+#   time_series = self.remote_client.query_entity_counters(
+#       entity='dev3246.region1',
+#       key='dns.query_latency_tcp',
+#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
+#       start_ts=now()-timedelta(days=3),
+#       end_ts=now(),
+#       )        # <--- this bracket is indented and on a separate line
+indent_closing_brackets = false
+
+# Indent the dictionary value if it cannot fit on the same line as the
+# dictionary key. For example:
+#
+#   config = {
+#       'key1':
+#           'value1',
+#       'key2': value1 +
+#               value2,
+#   }
+indent_dictionary_value = true
+
+# The number of columns to use for indentation.
+indent_width = 4
+
+# Join short lines into one line. E.g., single line 'if' statements.
+join_multiple_lines = false
+
+# Do not include spaces around selected binary operators. For example:
+#
+#   1 + 2 * 3 - 4 / 5
+#
+# will be formatted as follows when configured with "*,/":
+#
+#   1 + 2*3 - 4/5
+no_spaces_around_selected_binary_operators = ''
+
+# Use spaces around default or named assigns.
+spaces_around_default_or_named_assign = false
+
+# Adds a space after the opening '{' and before the ending '}' dict delimiters.
+#
+#   {1: 2}
+#
+# will be formatted as:
+#
+#   { 1: 2 }
+spaces_around_dict_delimiters = false
+
+# Adds a space after the opening '[' and before the ending ']' list delimiters.
+#
+#   [1, 2]
+#
+# will be formatted as:
+#
+#   [ 1, 2 ]
+spaces_around_list_delimiters = false
+
+# Use spaces around the power operator.
+spaces_around_power_operator = false
+
+# Use spaces around the subscript / slice operator.  For example:
+#
+#   my_list[1 : 10 : 2]
+spaces_around_subscript_colon = false
+
+# Adds a space after the opening '(' and before the ending ')' tuple delimiters.
+#
+#   (1, 2, 3)
+#
+# will be formatted as:
+#
+#   ( 1, 2, 3 )
+spaces_around_tuple_delimiters = false
+
+# The number of spaces required before a trailing comment.
+# This can be a single value (representing the number of spaces
+# before each trailing comment) or list of values (representing
+# alignment column values; trailing comments within a block will
+# be aligned to the first column value that is greater than the maximum
+# line length within the block). For example:
+#
+# With spaces_before_comment=5:
+#
+#   1 + 1 # Adding values
+#
+# will be formatted as:
+#
+#   1 + 1     # Adding values <-- 5 spaces between the end of the statement and comment
+#
+# With spaces_before_comment = '15, 20:'
+#
+#   1 + 1 # Adding values
+#   two + two # More adding
+#
+#   longer_statement # This is a longer statement
+#   short # This is a shorter statement
+#
+#   a_very_long_statement_that_extends_beyond_the_final_column # Comment
+#   short # This is a shorter statement
+#
+# will be formatted as:
+#
+#   1 + 1          # Adding values <-- end of line comments in block aligned to col 15
+#   two + two      # More adding
+#
+#   longer_statement    # This is a longer statement <-- end of line comments in block aligned to col 20
+#   short               # This is a shorter statement
+#
+#   a_very_long_statement_that_extends_beyond_the_final_column  # Comment <-- the end of line comments are aligned based on the line length
+#   short                                                       # This is a shorter statement
+#
+spaces_before_comment = 2
+
+# Insert a space between the ending comma and closing bracket of a list,
+# etc.
+space_between_ending_comma_and_closing_bracket = false
+
+# Use spaces inside brackets, braces, and parentheses.  For example:
+#
+#   method_call( 1 )
+#   my_dict[ 3 ][ 1 ][ get_index( *args, **kwargs ) ]
+#   my_set = { 1, 2, 3 }
+space_inside_brackets = false
+
+# Split before arguments
+split_all_comma_separated_values = false
+
+# Split before arguments, but do not split all subexpressions recursively
+# (unless needed).
+split_all_top_level_comma_separated_values = false
+
+# Split before arguments if the argument list is terminated by a
+# comma.
+split_arguments_when_comma_terminated = false
+
+# Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
+# rather than after.
+split_before_arithmetic_operator = false
+
+# Set to True to prefer splitting before '&', '|' or '^' rather than
+# after.
+split_before_bitwise_operator = false
+
+# Split before the closing bracket if a list or dict literal doesn't fit on
+# a single line.
+split_before_closing_bracket = true
+
+# Split before a dictionary or set generator (comp_for). For example, note
+# the split before the 'for':
+#
+#   foo = {
+#       variable: 'Hello world, have a nice day!'
+#       for variable in bar if variable != 42
+#   }
+split_before_dict_set_generator = false
+
+# Split before the '.' if we need to split a longer expression:
+#
+#   foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
+#
+# would reformat to something like:
+#
+#   foo = ('This is a really long string: {}, {}, {}, {}'
+#          .format(a, b, c, d))
+split_before_dot = false
+
+# Split after the opening paren which surrounds an expression if it doesn't
+# fit on a single line.
+split_before_expression_after_opening_paren = false
+
+# If an argument / parameter list is going to be split, then split before
+# the first argument.
+split_before_first_argument = false
+
+# Set to True to prefer splitting before 'and' or 'or' rather than
+# after.
+split_before_logical_operator = false
+
+# Split named assignments onto individual lines.
+split_before_named_assigns = true
+
+# Set to True to split list comprehensions and generators that have
+# non-trivial expressions and multiple clauses before each of these
+# clauses. For example:
+#
+#   result = [
+#       a_long_var + 100 for a_long_var in xrange(1000)
+#       if a_long_var % 10]
+#
+# would reformat to something like:
+#
+#   result = [
+#       a_long_var + 100
+#       for a_long_var in xrange(1000)
+#       if a_long_var % 10]
+split_complex_comprehension = true
+
+# The penalty for splitting right after the opening bracket.
+split_penalty_after_opening_bracket = 300
+
+# The penalty for splitting the line after a unary operator.
+split_penalty_after_unary_operator = 10000
+
+# The penalty of splitting the line around the '+', '-', '*', '/', '//',
+# ``%``, and '@' operators.
+split_penalty_arithmetic_operator = 300
+
+# The penalty for splitting right before an if expression.
+split_penalty_before_if_expr = 0
+
+# The penalty of splitting the line around the '&', '|', and '^'
+# operators.
+split_penalty_bitwise_operator = 300
+
+# The penalty for splitting a list comprehension or generator
+# expression.
+split_penalty_comprehension = 2100
+
+# The penalty for characters over the column limit.
+split_penalty_excess_character = 7000
+
+# The penalty incurred by adding a line split to the unwrapped line. The
+# more line splits added the higher the penalty.
+split_penalty_for_added_line_split = 20
+
+# The penalty of splitting a list of "import as" names. For example:
+#
+#   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
+#                                                             long_argument_2,
+#                                                             long_argument_3)
+#
+# would reformat to something like:
+#
+#   from a_very_long_or_indented_module_name_yada_yad import (
+#       long_argument_1, long_argument_2, long_argument_3)
+split_penalty_import_names = 0
+
+# The penalty of splitting the line around the 'and' and 'or'
+# operators.
+split_penalty_logical_operator = 300
+
+# Use the Tab character for indentation.
+use_tabs = false
+
+# Ignore directories
+[tool.yapfignore]
+ignore_patterns = [
+    "runs/**/*.py",
+    "wandb/**/*.py",
+    "build/**/*.py",
+]
+
+[tool.pydocstyle]
+convention="google"
+add_ignore="D100,D101,D102,D103,D104,D105,D107,D400,D401,D415"
+add_select="D404"

From 12527472f4bd2b180d3c325e3030b94ef1134bb4 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 12:42:27 +0000
Subject: [PATCH 04/24] update README

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index e9083652..07c253be 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ NOTE: This assumes you have `numpy` and `torch` installed.
 
 Installing `megablocks[gg]` enables dMoE computation with grouped GEMM. This feature is enabled by setting the `mlp_impl` argument to `grouped`. This is currently our recommended path for Hopper-generation GPUs.
 
+To contribute to MegaBlock, install `megablocks[dev]`. Run `pre-commit install` to configure the [pre-commit](https://pre-commit.com/) hook and then run `pre-commit run` before each commit to automatically format the code.
+
 MegaBlocks can be installed with all dependencies via the `megablocks[all]` package.
 
 # :steam_locomotive: Usage

From 43e02c92ea8de956f9114d32d1dcf867612f4e2f Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 16:47:26 +0000
Subject: [PATCH 05/24] remove section seperations

---
 setup.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/setup.py b/setup.py
index bdf78bb9..b42c1cde 100644
--- a/setup.py
+++ b/setup.py
@@ -11,17 +11,14 @@
 is_torch_installed = False
 try:
     import torch
-    from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CUDAExtension
+    from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension,
+                                           CUDAExtension,)
     is_torch_installed = True
 except ModuleNotFoundError as e:
     raise ModuleNotFoundError(
         "No module named 'torch'. Torch is required to install this repo."
     ) from e
 
-###############################################################################
-# Requirements
-###############################################################################
-
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
     'torch>=2.3.0,<2.4',
@@ -48,9 +45,6 @@
 extra_deps['all'] = list(
     set(dep for deps in extra_deps.values() for dep in deps))
 
-###############################################################################
-# Extension Modules
-###############################################################################
 
 cmdclass = {}
 ext_modules = []
@@ -92,9 +86,6 @@
 else:
     warnings.warn('Warning: No CUDA devices; cuda code will not be compiled.')
 
-###############################################################################
-# README
-###############################################################################
 
 # convert README to long description on PyPI, optionally skipping certain
 # marked sections if present (e.g., for coverage / code quality badges)
@@ -113,10 +104,6 @@
         long_description = long_description[:start] + long_description[
             end + len(end_tag):]
 
-###############################################################################
-# README
-###############################################################################
-
 setup(
     name='megablocks',
     version='0.5.1',

From 36d10a70f1a0d8c00065cf96084dda1e7214699f Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 16:47:49 +0000
Subject: [PATCH 06/24] remove unused variable

---
 setup.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/setup.py b/setup.py
index b42c1cde..4cc5a531 100644
--- a/setup.py
+++ b/setup.py
@@ -8,12 +8,10 @@
 
 # We require torch in setup.py to build cpp extensions "ahead of time"
 # More info here: # https://pytorch.org/tutorials/advanced/cpp_extension.html
-is_torch_installed = False
 try:
     import torch
     from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension,
                                            CUDAExtension,)
-    is_torch_installed = True
 except ModuleNotFoundError as e:
     raise ModuleNotFoundError(
         "No module named 'torch'. Torch is required to install this repo."

From eeb0d0872c1c5004faf8d0088c788099b0940f16 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 16:49:10 +0000
Subject: [PATCH 07/24] add docstring

---
 setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.py b/setup.py
index 4cc5a531..3afec6e4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,8 @@
 # Copyright 2024 MosaicML MegaBlocks authors
 # SPDX-License-Identifier: Apache-2.0
 
+"""MegaBlocks package setup."""
+
 import os
 import warnings
 

From c651d966f6422efd8d262fe131140bbb3157ad93 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 18:24:49 +0000
Subject: [PATCH 08/24] read version from megablocks/__init__.py

---
 megablocks/__init__.py | 14 ++++++++++++--
 megablocks/_version.py |  6 ++++++
 setup.py               | 26 ++++++++++++++++++++++++--
 3 files changed, 42 insertions(+), 4 deletions(-)
 create mode 100644 megablocks/_version.py

diff --git a/megablocks/__init__.py b/megablocks/__init__.py
index 90e45114..d792a8fd 100644
--- a/megablocks/__init__.py
+++ b/megablocks/__init__.py
@@ -1,2 +1,12 @@
-import megablocks.layers.dmoe
-import megablocks.layers.moe
+# Copyright 2024 MosaicML MegaBlocks authors
+# SPDX-License-Identifier: Apache-2.0
+
+from megablocks._version import __version__
+from megablocks.layers import dmoe, moe
+
+"""Some key classes are available directly in the ``MegaBlocks`` namespace."""
+
+__all__ = [
+    'dmoe',
+    'moe',
+]
diff --git a/megablocks/_version.py b/megablocks/_version.py
new file mode 100644
index 00000000..2bb5d505
--- /dev/null
+++ b/megablocks/_version.py
@@ -0,0 +1,6 @@
+# Copyright 2022 MegaBlocks Composer authors
+# SPDX-License-Identifier: Apache-2.0
+
+"""The MegaBlocks Version."""
+
+__version__ = '0.5.1'
diff --git a/setup.py b/setup.py
index 3afec6e4..fbd6a8e3 100644
--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@
 """MegaBlocks package setup."""
 
 import os
+import re
 import warnings
 
 from setuptools import find_packages, setup
@@ -19,6 +20,27 @@
         "No module named 'torch'. Torch is required to install this repo."
     ) from e
 
+
+_PACKAGE_NAME = 'megablocks'
+_PACKAGE_DIR = 'megablocks'
+_REPO_REAL_PATH = os.path.dirname(os.path.realpath(__file__))
+_PACKAGE_REAL_PATH = os.path.join(_REPO_REAL_PATH, _PACKAGE_DIR)
+
+# Read the package version
+# We can't use `.__version__` from the library since it's not installed yet
+with open(os.path.join(_PACKAGE_REAL_PATH, '__init__.py'), encoding='utf-8') as f:
+    content = f.read()
+    print(f"Content: {content}")
+# regex: '__version__', whitespace?, '=', whitespace, quote, version, quote
+# we put parens around the version so that it becomes elem 1 of the match
+expr = re.compile(
+    r"""^__version__\s*=\s*['"]([0-9]+\.[0-9]+\.[0-9]+(?:\.\w+)?)['"]""",
+    re.MULTILINE,
+)
+repo_version = expr.findall(content)[0]
+
+
+
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
     'torch>=2.3.0,<2.4',
@@ -105,8 +127,8 @@
             end + len(end_tag):]
 
 setup(
-    name='megablocks',
-    version='0.5.1',
+    name=_PACKAGE_NAME,
+    version=repo_version,
     author='Trevor Gale',
     author_email='tgale@stanford.edu',
     description='MegaBlocks',

From f62c03492e0561da83633ad8bcf290378539b406 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 18:31:55 +0000
Subject: [PATCH 09/24] fix reading repo version

---
 setup.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/setup.py b/setup.py
index fbd6a8e3..d2db1600 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,6 @@
 """MegaBlocks package setup."""
 
 import os
-import re
 import warnings
 
 from setuptools import find_packages, setup
@@ -17,7 +16,7 @@
                                            CUDAExtension,)
 except ModuleNotFoundError as e:
     raise ModuleNotFoundError(
-        "No module named 'torch'. Torch is required to install this repo."
+        "No module named 'torch'. `torch` is required to install this repo."
     ) from e
 
 
@@ -28,18 +27,11 @@
 
 # Read the package version
 # We can't use `.__version__` from the library since it's not installed yet
-with open(os.path.join(_PACKAGE_REAL_PATH, '__init__.py'), encoding='utf-8') as f:
-    content = f.read()
-    print(f"Content: {content}")
-# regex: '__version__', whitespace?, '=', whitespace, quote, version, quote
-# we put parens around the version so that it becomes elem 1 of the match
-expr = re.compile(
-    r"""^__version__\s*=\s*['"]([0-9]+\.[0-9]+\.[0-9]+(?:\.\w+)?)['"]""",
-    re.MULTILINE,
-)
-repo_version = expr.findall(content)[0]
-
-
+with open(os.path.join(_PACKAGE_REAL_PATH, '_version.py'), encoding='utf-8') as f:
+    version_globals = {}
+    version_locals = {}
+    exec(f.read(), version_globals, version_locals)
+    repo_version = version_locals['__version__']
 
 install_requires = [
     'numpy>=1.21.5,<2.1.0',

From 66454f780ece675f156f7d6bcae1221df5074311 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:28:00 +0000
Subject: [PATCH 10/24] add type hints

---
 setup.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index d2db1600..26597cbe 100644
--- a/setup.py
+++ b/setup.py
@@ -5,6 +5,7 @@
 
 import os
 import warnings
+from typing import Any, Dict, Mapping
 
 from setuptools import find_packages, setup
 
@@ -28,17 +29,18 @@
 # Read the package version
 # We can't use `.__version__` from the library since it's not installed yet
 with open(os.path.join(_PACKAGE_REAL_PATH, '_version.py'), encoding='utf-8') as f:
-    version_globals = {}
-    version_locals = {}
-    exec(f.read(), version_globals, version_locals)
+    version_globals: Dict[str, Any] = {}
+    version_locals: Mapping[str, object] = {}
+    content = f.read()
+    exec(content, version_globals, version_locals)
     repo_version = version_locals['__version__']
 
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
+    # 'stanford-stk==0.7.0',
+    'stanford-stk @ git+https://git@github.com/eitanturok/stk.git',
     'torch>=2.3.0,<2.4',
     'triton>=2.1.0',
-    # 'stanford-stk==0.7.0',
-    'stanford-stk @ git+https://git@github.com/eitanturok/stk.git'
 ]
 
 extra_deps = {}

From 747b027e01bf6580db25640cf7566a692639e6cf Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:32:56 +0000
Subject: [PATCH 11/24] add classifiers, better long-description

---
 setup.py | 52 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/setup.py b/setup.py
index 26597cbe..c1f1f2d1 100644
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,35 @@
     exec(content, version_globals, version_locals)
     repo_version = version_locals['__version__']
 
+
+with open('README.md', 'r', encoding='utf-8') as fh:
+    long_description = fh.read()
+
+# Hide the content between <!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_BEGIN --> and
+# <!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_END --> tags in the README
+while True:
+    start_tag = '<!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_BEGIN -->'
+    end_tag = '<!-- SETUPTOOLS_LONG_DESCRIPTION_HIDE_END -->'
+    start = long_description.find(start_tag)
+    end = long_description.find(end_tag)
+    if start == -1:
+        assert end == -1, 'there should be a balanced number of start and ends'
+        break
+    else:
+        assert end != -1, 'there should be a balanced number of start and ends'
+        long_description = long_description[:start] + \
+            long_description[end + len(end_tag):]
+
+
+classifiers = [
+    'Programming Language :: Python :: 3',
+    'Programming Language :: Python :: 3.9',
+    'Programming Language :: Python :: 3.10',
+    'Programming Language :: Python :: 3.11',
+    'License :: OSI Approved :: BSD License',
+    'Operating System :: Unix',
+]
+
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
     # 'stanford-stk==0.7.0',
@@ -103,23 +132,6 @@
     warnings.warn('Warning: No CUDA devices; cuda code will not be compiled.')
 
 
-# convert README to long description on PyPI, optionally skipping certain
-# marked sections if present (e.g., for coverage / code quality badges)
-with open('README.md', 'r', encoding='utf-8') as fh:
-    long_description = fh.read()
-while True:
-    start_tag = '<!-- LONG_DESCRIPTION_SKIP_START -->'
-    end_tag = '<!-- LONG_DESCRIPTION_SKIP_END -->'
-    start = long_description.find(start_tag)
-    end = long_description.find(end_tag)
-    if start == -1:
-        assert end == -1, 'Skipped section starts and ends imbalanced'
-        break
-    else:
-        assert end != -1, 'Skipped section starts and ends imbalanced'
-        long_description = long_description[:start] + long_description[
-            end + len(end_tag):]
-
 setup(
     name=_PACKAGE_NAME,
     version=repo_version,
@@ -129,11 +141,7 @@
     long_description=long_description,
     long_description_content_type='text/markdown',
     url='https://github.com/stanford-futuredata/megablocks',
-    classifiers=[
-        'Programming Language :: Python :: 3',
-        'License :: OSI Approved :: BSD License',
-        'Operating System :: Unix',
-    ],
+    classifiers=classifiers,
     packages=find_packages(exclude=['tests*']),
     ext_modules=ext_modules,
     cmdclass=cmdclass,

From f979eed878f55f8b0e6665aea5ea5aeff08e208a Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:34:05 +0000
Subject: [PATCH 12/24] update url

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c1f1f2d1..40d02415 100644
--- a/setup.py
+++ b/setup.py
@@ -140,7 +140,7 @@
     description='MegaBlocks',
     long_description=long_description,
     long_description_content_type='text/markdown',
-    url='https://github.com/stanford-futuredata/megablocks',
+    url='https://github.com/databricks/megablocks',
     classifiers=classifiers,
     packages=find_packages(exclude=['tests*']),
     ext_modules=ext_modules,

From b21d0f90665e74a2ea1dbb7112e72d4c2f5bb8c7 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:40:49 +0000
Subject: [PATCH 13/24] exclude more packages

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 40d02415..cb7c83f4 100644
--- a/setup.py
+++ b/setup.py
@@ -142,7 +142,7 @@
     long_description_content_type='text/markdown',
     url='https://github.com/databricks/megablocks',
     classifiers=classifiers,
-    packages=find_packages(exclude=['tests*']),
+    packages=find_packages(exclude=['tests*', 'third_party*', 'yamls*', 'exp*', '.github*']),
     ext_modules=ext_modules,
     cmdclass=cmdclass,
     install_requires=install_requires,

From c3a993b270b19095479fde4392de7fa26a305b2b Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:41:28 +0000
Subject: [PATCH 14/24] add python_requires

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index cb7c83f4..d8700869 100644
--- a/setup.py
+++ b/setup.py
@@ -147,4 +147,5 @@
     cmdclass=cmdclass,
     install_requires=install_requires,
     extras_require=extra_deps,
+    python_requires='>=3.9',
 )

From 296fc3c0ce43d861e03b577a5352dfd12ee84242 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:42:13 +0000
Subject: [PATCH 15/24] update

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d8700869..b2dfbdaa 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,8 @@
 
 # Read the package version
 # We can't use `.__version__` from the library since it's not installed yet
-with open(os.path.join(_PACKAGE_REAL_PATH, '_version.py'), encoding='utf-8') as f:
+version_path = os.path.join(_PACKAGE_REAL_PATH, '_version.py')
+with open(version_path, encoding='utf-8') as f:
     version_globals: Dict[str, Any] = {}
     version_locals: Mapping[str, object] = {}
     content = f.read()

From 310faf1ecd52ae96c4ff8191de39fd5d3fd3d9ed Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 19:43:42 +0000
Subject: [PATCH 16/24] use Composer's .gitignore

---
 .gitignore | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 166 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index 898d43c9..789c7518 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,168 @@
-*~
-gpt2-merges.txt
-gpt2-vocab.json
-core
-__pycache__
-*.egg-info
-dist
-build
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
 *.so
-.vscode/
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
 .coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+docs/path/
+docs/source/_build/
+docs/source/api_reference/
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Virtual Env
+venv/
+
+# WandB
+wandb/
+
+# Neptune
+.neptune/
+
+# Spacemacs
+._#*
+.#*
+.vscode/
+
+# Vim
+*.swp
+
+# README backups
+README.md.bkp
+
+# OS X
+.DS_Store
+
+# Sphinx
+api_reference/
+
+# Node (for pyright)
+node_modules/
+package.json
+package-lock.json
+
+# pycharm
+.idea/
+
+# composer
+data/

From 1d6d1e66f45748e56eee254cd33ffc7e849162e0 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 20:21:04 +0000
Subject: [PATCH 17/24] use Composer's pyproject.toml + my changes

---
 pyproject.toml | 657 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 613 insertions(+), 44 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 898fdfb6..288b5b4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,46 +1,55 @@
 # build requirements
 [build-system]
-# if you get an error like "RuntimeError: Cuda extensions are being compiled with a version of Cuda that does not match the version used to compile Pytorch binaries.  Pytorch binaries were compiled with Cuda 11.7.", you need to:
-#   pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
-# then build via:
-#   pip install --no-build-isolation -e .
-# since, by default, torch 2.0.1 only uses cuda 11.7
-requires = ["setuptools < 70.0.0", "packaging >= 21.0.0", "torch >= 2.3.0, < 2.4"]
+requires = ["setuptools < 70.0.0", "torch >= 2.3.0, < 2.4"]
 build-backend = "setuptools.build_meta"
 
 # iSort
 [tool.isort]
 multi_line_output = 0
 line_length = 80
-skip = [ "env", "wandb", "runs", "build", "node_modules", "examples/inference-deployments/mpt/mpt_7b_ft_handler.py" ]
-
-# Coverage
-[tool.coverage.run]
-parallel = true
-branch = true
-relative_files = true
-concurrency = ["thread"]
-include = [
-    "megablocks/*"
+include_trailing_comma = true
+split_on_trailing_comma = true
+
+# ruff
+[tool.ruff.lint]
+select = [
+    "C4",
+    # TODO port pydocstyle
+    # "D", # pydocstyle
+    "LOG",
+    "PERF",
+    "PLE",
+    "COM812",
+]
+[tool.ruff]
+exclude = [
+    "build/**",
+    "docs/**",
+    "node_modules/**",
 ]
 
 # Pyright
 [tool.pyright]
-exclude = ['env-**','**/ci-testing']
+exclude = ['env-**', 'venv*', '.venv']
 stubPath = ""  # suppress useless 'stubPath is not a valid directory' errors
-
-reportUnnecessaryIsInstance = "warning"
+# Disable checks for missing imports, as a conditional install of composer will not include them
+# Any incorrect imports will be discovered through test cases
+reportMissingImports="none"
+# Sometimes for code validation or readability we want redundant isinstance checks.
+reportUnnecessaryIsInstance = "none"
 reportMissingTypeStubs = "none"
+# forward() and initailize_object() have incompatible method overrides
 reportIncompatibleMethodOverride = "none"
 reportIncompatibleVariableOverride = "error"
 reportUnusedImport = "error"
-reportUnusedClass = "warning"
-reportUnusedFunction = "warning"
+reportUnusedClass = "error"
+reportUnusedFunction = "error"
 reportUnusedVariable = "error"
 reportDuplicateImport = "error"
 reportWildcardImportFromLibrary = "error"
-reportUntypedFunctionDecorator = "warning"
-reportPrivateImportUsage = "warning"
+reportUntypedFunctionDecorator = "error"
+# Pyright sometimes marks public imports as private
+reportPrivateImportUsage = "none"
 reportUndefinedVariable = "error"
 strictParameterNoneValue = true
 reportPropertyTypeMismatch = "error"
@@ -50,42 +59,103 @@ reportInvalidTypeVarUse = "error"
 reportOverlappingOverload = "error"
 reportUninitializedInstanceVariable = "error"
 reportInvalidStringEscapeSequence = "error"
-reportMissingParameterType = "warning"  # TODO: make this an error
-reportCallInDefaultInitializer = "none"  # TODO: make this an error
-reportUnnecessaryComparison = "warning"
+reportMissingParameterType = "none"  # TODO: make this an error. Many tests are missing parameter types.
+reportCallInDefaultInitializer = "error"
+reportUnnecessaryComparison = "error"
 reportSelfClsParameterName = "error"
-reportImplicitStringConcatenation = "warning"  # TODO: make this an error
+# Need to ensure all implict string concatinations are wrapped with an extra set of paranethesis.
+reportImplicitStringConcatenation = "none"  # TODO: make this an error.
 reportInvalidStubStatement = "error"
 reportIncompleteStub = "error"
 reportUnsupportedDunderAll = "error"
 reportUnusedCoroutine = "error"
 
+# Coverage
+[tool.coverage.run]
+parallel = true
+branch = true
+relative_files = true
+concurrency = ["thread"]
+include = [
+    "megablocks/*"
+]
+
 # Pytest
 [tool.pytest.ini_options]
 # By default, skip gpu tests
 addopts = "--tb=short -m 'not gpu'"
 
 markers = [
-    # For distributed testing
+    # Tests that require a world_size of two should be annotated with `@pytest.mark.world_size(2)`.
+    # If not specified, the test will be assumed to have a world-size of one, which is
+    # equivalent to `@pytest.mark.world_size(1)`
     "world_size(val)",
+    # Tests that require a gpu should be annotated with `@pytest.mark.gpu`
+    "gpu",
+    # Tests which are run as part of the documentation build
+    "doctest",
     # Should be run during daily regression
     "daily",
     # Whether the test will be reading data from a remote source, and may require credentials
     "remote",
-    # whether the test requires a gpu
-    "gpu",
 ]
 
 filterwarnings = [
-    # "error",  # warnings should be treated like errors, but still need to fix some warnings
-    'ignore:ExtraArgumentWarning',  # extra arguments originate from pytest-specific CLI args
-    'ignore:DistributedDefaultValueWarning',  # default distributed values are fine
-    'ignore:NoDistributedWarning',  # running without distributed is fine
-    'ignore:Deterministic mode is activated:UserWarning',  # all tests run with deterministic mode
-    'ignore:SubsetNumBatchesWarning',  # different subsets OK for testing
-    'ignore:No optimizer:UserWarning',  # testing defaults
-    'ignore:No scheduler:UserWarning',  # testing defaults
-    'ignore::DeprecationWarning:tensorboard',  # ignore tensorboard
+    "error",  # Mark all warnings as errors
+
+    # Treat the warnings that python usually would ignore as warnings, not errors.
+    "default::DeprecationWarning",
+    'default::PendingDeprecationWarning',
+    'default::ImportWarning',
+    'ignore::ResourceWarning',  # Ignore these -- they are from errors like CPU OOMs
+
+    # Ignore the following warnings
+    'ignore:Deterministic mode is activated:UserWarning',  # All tests run with deterministic mode
+    'ignore:SubsetNumBatchesWarning',  # SubsetNumBatches is used extensively in testing
+    # ignore lambda warnings
+    'ignore:Running code eval locally may be insecure.*:UserWarning', # All tests run w/o lambdas at the moment
+    # allow training metrics
+    'ignore:Computing model evaluation metrics during training doubles the number of forward passes:UserWarning',
+    'ignore:No optimizer was specified.:UserWarning',  # OK to not specify an optimizer in the tests
+    # Ignore a bug in the pytorch dataloader
+    '''ignore:Exception ignored in. <function _MultiProcessingDataLoaderIter.__del__:pytest.PytestUnraisableExceptionWarning''',
+    # Ignore torchvision complaining about no c libraries (happens in the conda build)
+    'ignore:Failed to load image Python extension:UserWarning',
+    # Ignore a deprecation warning in the conda build
+    'ignore:distutils Version classes are deprecated:DeprecationWarning',
+    # Ignore a UserWarning from TorchMetrics about potentially large memory usage when batch sizes are extremely large
+    'ignore:Metric `SpearmanCorrcoef` will save all targets and predictions in the buffer:UserWarning:torchmetrics',
+    # Ignore a UserWarning from torch 1.12 due to DeepSpeed's use of positional args
+    'ignore:Positional args are being deprecated, use kwargs instead.*:UserWarning',
+    'ignore:torch.distributed._all_gather_base is a private function and will be deprecated.*:UserWarning',
+    'ignore:torch.distributed._reduce_scatter_base is a private function and will be deprecated.*:UserWarning',
+    # Ignore tensorboard deprecation warnings
+    'ignore:Call to deprecated create function Descriptor().*:DeprecationWarning:tensorboard',
+    'ignore:Call to deprecated create function EnumDescriptor().*:DeprecationWarning:tensorboard',
+    'ignore:Call to deprecated create function EnumValueDescriptor().*:DeprecationWarning:tensorboard',
+    'ignore:Call to deprecated create function FieldDescriptor().*:DeprecationWarning:tensorboard',
+    'ignore:Call to deprecated create function FileDescriptor().*:DeprecationWarning:tensorboard',
+    # Ignore TracerWarnings for operations potentially unsupported by model tracing
+    'ignore:.*might cause the trace to be incorrect.*:Warning',
+    'ignore:save_weights_only=True only saves weights for now, but will changed to also save metadata.*:UserWarning',
+    # Ignore has_cuda is deprecated warning please use torch.backends.cuda.is_build
+    '''ignore:'has_cuda' is deprecated, please use 'torch.backends.cuda.is_built():UserWarning''',
+    # Ignore has_cudnn is deprecated warning please use torch.backends.cudnn.is_available
+    '''ignore:'has_cudnn' is deprecated, please use 'torch.backends.cudnn.is_available():UserWarning''',
+    # Ignore has_mps is deprecated warning please use torch.backends.mps.is_built
+    '''ignore:'has_mps' is deprecated, please use 'torch.backends.mps.is_built():UserWarning''',
+    # Ignore has_mkldnn is deprecated warning please use torch.backends.mkldnn.is_available
+    '''ignore:'has_mkldnn' is deprecated, please use 'torch.backends.mkldnn.is_available():UserWarning''',
+    # Ignore torch distributed deprecated warnings
+    '''ignore:torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead:UserWarning''',
+    # Ignore torch sharded tensor deprecated warnings
+    '''ignore:Please use DTensor instead and we are deprecating ShardedTensor.:UserWarning''',
+    # Ignore torch pytree deprecated warnings
+    '''ignore:torch.utils._pytree._register_pytree_node is deprecated.*:UserWarning''',
+    # Ignore autograd kernel warning inside DeepSpeed
+    '''ignore:.*an autograd kernel was not registered to the Autograd key.*:UserWarning''',
+    # Ignore save_state_dict / load_state_dict deprecation warnings
+    '''ignore:'.*_state_dict' is deprecated and will be removed in future versions.*:UserWarning''',
 ]
 
 # Enable logging for pytest
@@ -173,7 +243,7 @@ blank_line_before_nested_class_or_def = true
 #        'key1': 'value1',
 #        'key2': 'value2',
 #    })
-coalesce_brackets = false
+coalesce_brackets = true
 
 # The column limit.
 column_limit = 80
@@ -208,7 +278,7 @@ continuation_indent_width = 4
 #       start_ts=now()-timedelta(days=3),
 #       end_ts=now(),
 #   )        # <--- this bracket is dedented and on a separate line
-dedent_closing_brackets = false
+dedent_closing_brackets = true
 
 # Disable the heuristic which places each list element on a separate line
 # if the list is comma-terminated.
@@ -379,7 +449,7 @@ split_all_top_level_comma_separated_values = false
 
 # Split before arguments if the argument list is terminated by a
 # comma.
-split_arguments_when_comma_terminated = false
+split_arguments_when_comma_terminated = true
 
 # Set to True to prefer splitting before '+', '-', '*', '/', '//', or '@'
 # rather than after.
@@ -498,7 +568,506 @@ ignore_patterns = [
     "build/**/*.py",
 ]
 
+[tool.pylint.MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list="megablocks_ops"
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist="megablocks_ops"
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=""
+
+# Specify a score threshold to be exceeded before program exits with error.
+# fail-under=10.0
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore="CVS"
+
+# Add files or directories matching the regex patterns to the ignore-list. The
+# regex matches against paths and can be in Posix or Windows format.
+ignore-paths=""
+
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=""
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+init-hook=""
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=0
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+# load-plugins=
+
+# Pickle collected data for later comparisons.
+# persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.9
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode="yes"
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension="no"
+
+
+[tool.pylint.'MESSAGES CONTROL']
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=""
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable="""raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        similarities,
+        typecheck,
+        design,
+        missing-module-docstring,
+        wrong-import-position,
+        use-symbolic-message-instead,
+        arguments-differ,
+        import-outside-toplevel,
+        too-many-nested-block,
+        signature-differs,
+        useless-import-alias"""
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable="c-extension-no-member"
+
+
+[tool.pylint.'REPORTS']
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+# evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)"
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format="colorized"
+
+# Tells whether to display a full report or only the messages.
+reports="no"
+
+# Activate the evaluation score.
+score="no"
+
+
+[tool.pylint.REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=10
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions="sys.exit,argparse.parse_error"
+
+
+[tool.pylint.STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency="no"
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps="no"
+
+
+[tool.pylint.LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style="old"
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules="logging"
+
+
+[tool.pylint.VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=""
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables="yes"
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=""
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks="cb_,_cb"
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx="_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names="_.*|^ignored_|^unused_"
+
+# Tells whether we should check for unused import in __init__ files.
+init-import="no"
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules="six.moves,past.builtins,future.builtins,builtins,io"
+
+
+[tool.pylint.MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=""
+
+# Regular expression of note tags to take in consideration.
+#notes-rgx=
+
+
+[tool.pylint.SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the 'python-enchant' package.
+spelling-dict=""
+
+# List of comma separated words that should be considered directives if they
+# appear and the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives="fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:,pyright:,type:"
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=""
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=""
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words="no"
+
+
+[tool.pylint.BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style="snake_case"
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style="snake_case"
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=""
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=""
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style="any"
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style="UPPER_CASE"
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style="PascalCase"
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style="UPPER_CASE"
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style="snake_case"
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=""
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs="^[_a-z][_a-z0-9]*$"
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint="no"
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style="any"
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style="snake_case"
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style="snake_case"
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=""
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx="^_"
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes="abc.abstractproperty"
+
+# Naming style matching correct variable names.
+variable-naming-style="snake_case"
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[tool.pylint.SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments="yes"
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings="yes"
+
+# Imports are removed from the similarity computation
+ignore-imports="no"
+
+# Signatures are removed from the similarity computation
+ignore-signatures="no"
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[tool.pylint.FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=""
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines="^\\s*(# )?<?https?:\\/\\/\\S+>?$"
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module.
+# max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt="no"
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt="no"
+
+
+[tool.pylint.IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=""
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all="no"
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks="no"
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=""
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=""
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=""
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=""
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=""
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party="enchant"
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=""
+
+
+[tool.pylint.CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods="no"
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods="__init__,__new__,setUp,__post_init__"
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected="_asdict,_fields,_replace,_source,_make"
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg="cls"
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg="cls"
+
+
+[tool.pylint.DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=""
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=""
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[tool.pylint.EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions="BaseException,Exception"
+
 [tool.pydocstyle]
 convention="google"
-add_ignore="D100,D101,D102,D103,D104,D105,D107,D400,D401,D415"
-add_select="D404"
+add_ignore="D102,D105,D107,D401"
+add_select="D400,D404"

From 082fe06f723d8ce6cb75c7ea8a624333082736a3 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 24 Jul 2024 21:08:12 +0000
Subject: [PATCH 18/24] remove my stk fork

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index b2dfbdaa..2ec34c7d 100644
--- a/setup.py
+++ b/setup.py
@@ -67,8 +67,7 @@
 
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
-    # 'stanford-stk==0.7.0',
-    'stanford-stk @ git+https://git@github.com/eitanturok/stk.git',
+    'stanford-stk==0.7.0',
     'torch>=2.3.0,<2.4',
     'triton>=2.1.0',
 ]

From 28e782d59e1eb51ad8250fb0ee3c1ffc2918c31a Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Thu, 25 Jul 2024 15:40:03 +0000
Subject: [PATCH 19/24] remove composer specific

---
 .gitignore | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 789c7518..000c47a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,6 +163,3 @@ package-lock.json
 
 # pycharm
 .idea/
-
-# composer
-data/

From 5932383b6d3cad333546ad2387a854d611ae14e3 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Thu, 25 Jul 2024 21:24:26 +0000
Subject: [PATCH 20/24] better error msg

---
 setup.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index 2ec34c7d..af4b7f39 100644
--- a/setup.py
+++ b/setup.py
@@ -13,11 +13,14 @@
 # More info here: # https://pytorch.org/tutorials/advanced/cpp_extension.html
 try:
     import torch
-    from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension,
-                                           CUDAExtension,)
+    from torch.utils.cpp_extension import (
+        CUDA_HOME,
+        BuildExtension,
+        CUDAExtension,
+    )
 except ModuleNotFoundError as e:
     raise ModuleNotFoundError(
-        "No module named 'torch'. `torch` is required to install this repo."
+        "No module named 'torch'. `torch` is required to install `MegaBlocks`."
     ) from e
 
 

From ad9a5616f6f735a1c99d0553e3c77eb8ef2c48f0 Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Thu, 25 Jul 2024 21:25:29 +0000
Subject: [PATCH 21/24] fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 07c253be..dab9a763 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ NOTE: This assumes you have `numpy` and `torch` installed.
 
 Installing `megablocks[gg]` enables dMoE computation with grouped GEMM. This feature is enabled by setting the `mlp_impl` argument to `grouped`. This is currently our recommended path for Hopper-generation GPUs.
 
-To contribute to MegaBlock, install `megablocks[dev]`. Run `pre-commit install` to configure the [pre-commit](https://pre-commit.com/) hook and then run `pre-commit run` before each commit to automatically format the code.
+To contribute to MegaBlocks, install `megablocks[dev]`. Run `pre-commit install` to configure the [pre-commit](https://pre-commit.com/) hook and then run `pre-commit run` before each commit to automatically format the code.
 
 MegaBlocks can be installed with all dependencies via the `megablocks[all]` package.
 

From b836fa92e4a232c1350e17e63a912f8b4fce0dfa Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Fri, 26 Jul 2024 23:44:34 +0000
Subject: [PATCH 22/24] add correct versions of stanford-stk, grouped_gemm; add
 packaging

---
 setup.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index af4b7f39..abd91dab 100644
--- a/setup.py
+++ b/setup.py
@@ -13,11 +13,8 @@
 # More info here: # https://pytorch.org/tutorials/advanced/cpp_extension.html
 try:
     import torch
-    from torch.utils.cpp_extension import (
-        CUDA_HOME,
-        BuildExtension,
-        CUDAExtension,
-    )
+    from torch.utils.cpp_extension import (CUDA_HOME, BuildExtension,
+                                           CUDAExtension,)
 except ModuleNotFoundError as e:
     raise ModuleNotFoundError(
         "No module named 'torch'. `torch` is required to install `MegaBlocks`."
@@ -70,7 +67,9 @@
 
 install_requires = [
     'numpy>=1.21.5,<2.1.0',
-    'stanford-stk==0.7.0',
+    'packaging>=21.3.0,<24.2',
+    # 'stanford-stk==0.7.0',
+    'stanford-stk @ git+https://git@github.com/eitanturok/stk.git',
     'torch>=2.3.0,<2.4',
     'triton>=2.1.0',
 ]
@@ -78,7 +77,8 @@
 extra_deps = {}
 
 extra_deps['gg'] = [
-    'grouped_gemm==0.1.4',
+    # 'grouped_gemm==0.1.4',
+    'grouped_gemm @ git+https://git@github.com/eitanturok/grouped_gemm.git',
 ]
 
 extra_deps['dev'] = [

From 99c6815d3d4bf81968b3c62cb5550eb8bf4fca4f Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 31 Jul 2024 15:22:30 +0000
Subject: [PATCH 23/24] test in my GA

---
 .github/workflows/pr-gpu.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
index 0221752d..f13adf3b 100644
--- a/.github/workflows/pr-gpu.yaml
+++ b/.github/workflows/pr-gpu.yaml
@@ -4,10 +4,12 @@ on:
     branches:
     - main
     - release/*
+    - eitan-dev # todo: delete
   pull_request_target:
     branches:
     - main
     - release/**
+    - eitan-dev  # todo: delete
   workflow_dispatch:
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -15,7 +17,7 @@ concurrency:
 jobs:
   pytest-gpu:
     name: ${{ matrix.name }}
-    runs-on: linux-ubuntu-latest
+    runs-on: ubuntu-latest # todo: replace with linux-ubuntu-latest
     strategy:
       fail-fast: false
       matrix:

From 6af0c3ee45d5884bb9df36f36866c76260f3a69c Mon Sep 17 00:00:00 2001
From: Eitan Turok <eitan.turok@databricks.com>
Date: Wed, 31 Jul 2024 15:25:11 +0000
Subject: [PATCH 24/24] use my fork for testing

---
 .github/workflows/pr-gpu.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
index f13adf3b..9cfc6bfa 100644
--- a/.github/workflows/pr-gpu.yaml
+++ b/.github/workflows/pr-gpu.yaml
@@ -38,7 +38,7 @@ jobs:
         container: ${{ matrix.container }}
         python_version: ${{ matrix.python_version }}
         gpu_num: ${{ matrix.gpu_num }}
-        git_repo: databricks/megablocks
+        git_repo: eitanturok/megablocks # todo: replace with databricks/megablocks
         pip_deps: "[all,testing]"
         pytest_command: "coverage run -m pytest tests" # todo: remove tests from pytest tests when we delete all tests outside of MegaBlocks repo
         pytest_markers: "gpu"