Lightning-AI · ali-alshaar7 · Oct 2, 2024 · Oct 1, 2024 · Oct 2, 2024 · Oct 2, 2024
@@ -6,4 +6,5 @@
 # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods`
 #  shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372
 deepspeed >=0.8.2, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin"  # strict
-bitsandbytes >=0.42.0,<0.43.0
+bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32'
+bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin'
@@ -8,4 +8,5 @@ hydra-core >=1.2.0, <1.4.0
 jsonargparse[signatures] >=4.27.7, <4.28.0
 rich >=12.3.0, <13.6.0
 tensorboardX >=2.2, <2.7.0  # min version is set by torch.onnx missing attribute
-bitsandbytes >=0.42.0,<0.43.0
+bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32'
+bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin'
@@ -43,7 +43,7 @@
 
 
 class BitsandbytesPrecision(Precision):
-    """Plugin for quantizing weights with `bitsandbytes <https://github.com/TimDettmers/bitsandbytes>`__.
+    """Plugin for quantizing weights with `bitsandbytes <https://github.com/bitsandbytes-foundation/bitsandbytes>`__.
 
     .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.
 
@@ -184,11 +184,15 @@ def _replace_param(
     if param.device.type == "meta":
         if isinstance(param, bnb.nn.Params4bit):
             return bnb.nn.Params4bit(
-                data,
+                data=data,
                 requires_grad=data.requires_grad,
                 quant_state=quant_state,
+                blocksize=param.blocksize,
                 compress_statistics=param.compress_statistics,
                 quant_type=param.quant_type,
+                quant_storage=param.quant_storage,
+                module=param.module,
+                bnb_quantized=param.bnb_quantized,
             )
         return torch.nn.Parameter(data, requires_grad=data.requires_grad)
     param.data = data
@@ -322,6 +326,7 @@ def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torc
                 return
             assert isinstance(self.weight, bnb.nn.Params4bit)
             self.weight = self.quantize(self.weight, weight, device)
+            self.weight.bnb_quantized = True
 
         @staticmethod
         def quantize(
@@ -337,6 +342,7 @@ def quantize(
                 blocksize=params4bit.blocksize,
                 compress_statistics=params4bit.compress_statistics,
                 quant_type=params4bit.quant_type,
+                quant_storage=params4bit.quant_storage,
             )
             return _replace_param(params4bit, w_4bit, quant_state)
 

@@ -16,7 +16,7 @@
 
 
 class BitsandbytesPrecision(Precision, FabricBNBPrecision):
-    """Plugin for quantizing weights with `bitsandbytes <https://github.com/TimDettmers/bitsandbytes>`__.
+    """Plugin for quantizing weights with `bitsandbytes <https://github.com/bitsandbytes-foundation/bitsandbytes>`__.
 
     .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.