Update mz_gguf_loader.py

2024-10-28 04:07:11 +02:00 · 2024-10-28 04:07:11 +02:00 · ce903c0384
commit ce903c0384
parent db23e2ecc0
1 changed files with 1 additions and 0 deletions
--- a/mz_gguf_loader.py
+++ b/mz_gguf_loader.py
@ -27,6 +27,7 @@ def quantize_load_state_dict(model, state_dict, device="cpu", cublas_ops=False):
            setattr(model, "cublas_half_matmul", True)
            print("Using cublas_ops")
        except:
            print("Failed to load cublas_ops")
            raise ImportError("Install cublas_ops (https://github.com/aredden/torch-cublas-hgemm) to use cublas_ops")
    else:
        linear_ops = F.linear