huggingface · muellerzr · Aug 15, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py
@@ -2678,11 +2678,10 @@ def log(self, values: dict, step: int | None = None, log_kwargs: dict | None = {
         for tracker in self.trackers:
             tracker.log(values, step=step, **log_kwargs.get(tracker.name, {}))
 
-    @on_main_process
     def end_training(self):
         """
-        Runs any special end training behaviors, such as stopping trackers on the main process only. Should always be
-        called at the end of your script if using experiment tracking.
+        Runs any special end training behaviors, such as stopping trackers on the main process only or destoying
+        process group. Should always be called at the end of your script if using experiment tracking.
 
         Example:
 
@@ -2698,6 +2697,10 @@ def end_training(self):
         for tracker in self.trackers:
             tracker.finish()
 
+        if torch.distributed.is_initialized():
+            # needed when using torch.distributed.init_process_group
+            torch.distributed.destroy_process_group()
+
     def save(self, obj, f, safe_serialization=False):
         """
         Save the object passed to disk once per machine. Use in place of `torch.save`.