convert_hf : fix memory leak in lazy MoE conversion

The '_lazy' queue was sometimes self-referential, which caused reference cycles of objects old enough to avoid garbage collection until potential memory exhaustion.
2024-07-15 21:09:04 -04:00 · 2024-07-15 21:09:04 -04:00 · b971122eb1
commit b971122eb1
parent 2a49a68d70
2 changed files with 23 additions and 51 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -3456,20 +3456,19 @@ class LazyTorchTensor(gguf.LazyBase):
        dtype = self._dtype_map[self.dtype]
        return gguf.LazyNumpyTensor(
            meta=gguf.LazyNumpyTensor.meta_with_dtype_and_shape(dtype, self.shape),
-            lazy=self._lazy,
            args=(self,),
-            func=(lambda s: s[0].numpy())
+            func=(lambda s: s.numpy())
        )

    @classmethod
-    def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: torch.Size) -> Tensor:
+    def meta_with_dtype_and_shape(cls, dtype: torch.dtype, shape: tuple[int, ...]) -> Tensor:
        return torch.empty(size=shape, dtype=dtype, device="meta")

    @classmethod
    def from_safetensors_slice(cls, st_slice: Any) -> Tensor:
        dtype = cls._dtype_str_map[st_slice.get_dtype()]
-        shape = st_slice.get_shape()
-        lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[0][:])
+        shape: tuple[int, ...] = tuple(st_slice.get_shape())
+        lazy = cls(meta=cls.meta_with_dtype_and_shape(dtype, shape), args=(st_slice,), func=lambda s: s[:])
        return cast(torch.Tensor, lazy)

    @classmethod
@ -3482,7 +3481,7 @@ class LazyTorchTensor(gguf.LazyBase):
        if func is torch.Tensor.numpy:
            return args[0].numpy()

-        return LazyTorchTensor._wrap_fn(func)(*args, **kwargs)
+        return cls._wrap_fn(func)(*args, **kwargs)


 def parse_args() -> argparse.Namespace: