# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import copy from enum import Enum from itertools import product import torch from tqdm import tqdm from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.fused_moe.config import FUSED_MOE_UNQUANTIZED_CONFIG from vllm.platforms import current_platform from .common import ( Config, RankTensors, WeightTensors, reference_moe_impl, run_modular_kernel, ) from .mk_objects import ( MK_FUSED_EXPERT_TYPES, MK_MULTI_GPU_PREPARE_FINALIZE_TYPES, MK_QUANT_CONFIGS, ) from .parallel_utils import ProcessGroupInfo, parallel_launch_with_config class Result(Enum): PASS = 1 FAIL = 2 SKIP = 3 def rank_worker( pgi: ProcessGroupInfo, vllm_config: VllmConfig, cpu_group, config: Config, weights: WeightTensors, ): current_platform.seed_everything(pgi.rank) # sanity check from vllm import envs if config.fused_moe_chunk_size is not None: assert config.fused_moe_chunk_size == envs.VLLM_FUSED_MOE_CHUNK_SIZE # get weights to this device weights.to_current_device() Ms = config.Ms assert isinstance(Ms, list) TOPKs = config.topks assert isinstance(TOPKs, list) for m, topk in product(Ms, TOPKs): print(f"Running m={m}, topk={topk} ...") # override m and topk cfgx = copy.deepcopy(config) cfgx.Ms = m cfgx.topks = topk # inputs for rank rank_tensors = RankTensors.make(cfgx, pgi) # modular kernel out mk_out = run_modular_kernel(pgi, vllm_config, cfgx, weights, rank_tensors) with set_current_vllm_config(vllm_config): ref_out = reference_moe_impl(cfgx, weights, rank_tensors) torch.testing.assert_close(ref_out, mk_out, atol=3e-2, rtol=3e-2) def make_feature_matrix(csv_file_path: str): from dataclasses import asdict import pandas as pd def add_to_results( config: Config, success: Result, results_df: pd.DataFrame | None = None ): config_dict = asdict(config) config_dict["prepare_finalize_type"] = config_dict[ "prepare_finalize_type" ].__name__ config_dict["fused_experts_type"] = config_dict["fused_experts_type"].__name__ config_dict["per_tensor_act_quant"] = config.is_per_tensor_act_quant quant_config_dict = config_dict["quant_config"] del config_dict["quant_config"] if quant_config_dict is None: quant_config = FUSED_MOE_UNQUANTIZED_CONFIG quant_config_dict = asdict(quant_config) config_dict |= quant_config_dict result_dict = config_dict | {"success": success.name} result_df = pd.DataFrame([result_dict]) if results_df is None: results_df = result_df else: results_df = pd.concat([results_df, result_df], ignore_index=True) return results_df Ms = [64] Ks = [7168] # hidden sizes Ns = [2048] TOPKs = [[4, 1]] Es = [32] DTYPEs = [torch.bfloat16] PF_TYPES = MK_MULTI_GPU_PREPARE_FINALIZE_TYPES FE_TYPES = MK_FUSED_EXPERT_TYPES Q_TYPES = MK_QUANT_CONFIGS combinations = list( product(Ms, Ks, Ns, Es, TOPKs, DTYPEs, PF_TYPES, FE_TYPES, Q_TYPES) ) results_df: pd.DataFrame | None = None for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm( combinations ): config = Config( Ms=[m], K=k, N=n, E=e, topks=topks, dtype=dtype, prepare_finalize_type=pf_type, fused_experts_type=experts_type, quant_config=quant_config, world_size=2, fused_moe_chunk_size=None, ) success = None if config.is_valid()[0]: print(f"Running config : {config.describe()} ...") try: weights: WeightTensors = WeightTensors.make(config) vllm_config, env_dict = config.make_env_data() parallel_launch_with_config( config.world_size, rank_worker, vllm_config, env_dict, config, weights, ) success = Result.PASS except Exception as _: success = Result.FAIL else: success = Result.SKIP results_df = add_to_results(config, success, results_df) if results_df is not None: results_df.to_csv(f"{csv_file_path}") if __name__ == "__main__": import argparse from pathlib import Path parser = argparse.ArgumentParser( description=( "Make ModularKernel feature matrix \n" "Example : python3 -m tests.kernels.moe.modular_kernel_tools.make_feature_matrix " # noqa: E501 "-f ./feature_matrices/feature_matrix.csv" ) ) parser.add_argument( "-f", "--feature-matrix-csv-file-path", type=str, required=True, help="File name to Generate a .csv file", ) args = parser.parse_args() csv_path = args.feature_matrix_csv_file_path assert csv_path.endswith("csv"), ( f"Need a file path ending with .csv, got {csv_path}" ) assert Path(csv_path).parent.is_dir(), ( f"Cannot find parent directory for {Path(csv_path).parent}" ) make_feature_matrix(args.feature_matrix_csv_file_path)