diff --git a/vllm/compilation/fusion_attn.py b/vllm/compilation/fusion_attn.py index 79518b6f4f965..a40a8caf34a88 100644 --- a/vllm/compilation/fusion_attn.py +++ b/vllm/compilation/fusion_attn.py @@ -164,3 +164,6 @@ class AttnFusionPass(VllmInductorPass): logger.debug("Fused quantization onto %s attention nodes", count) self.dump_graph(graph, "after_attn_fusion") self.end_and_log() + + def uuid(self): + return VllmInductorPass.hash_source(self, AttentionStaticQuantPattern) diff --git a/vllm/compilation/inductor_pass.py b/vllm/compilation/inductor_pass.py index 810d0801e9f38..2a149c65b3877 100644 --- a/vllm/compilation/inductor_pass.py +++ b/vllm/compilation/inductor_pass.py @@ -76,9 +76,10 @@ class InductorPass(CustomGraphPass): for src in srcs: if isinstance(src, str): src_str = src - elif isinstance(src, types.FunctionType): + elif isinstance(src, (types.FunctionType, type)): src_str = inspect.getsource(src) else: + # object instance src_str = inspect.getsource(src.__class__) hasher.update(src_str.encode("utf-8")) return hasher.hexdigest()