# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import itertools import logging from dataclasses import dataclass from functools import cached_property from pathlib import Path from typing import Literal import regex as re logger = logging.getLogger("mkdocs") ROOT_DIR = Path(__file__).parent.parent.parent.parent ROOT_DIR_RELATIVE = "../../../../.." EXAMPLE_DIR = ROOT_DIR / "examples" EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples" def title(text: str) -> str: # Default title case text = text.replace("_", " ").replace("/", " - ").title() # Custom substitutions subs = { "io": "IO", "api": "API", "cli": "CLI", "cpu": "CPU", "llm": "LLM", "mae": "MAE", "ner": "NER", "tpu": "TPU", "gguf": "GGUF", "lora": "LoRA", "rlhf": "RLHF", "vllm": "vLLM", "openai": "OpenAI", "lmcache": "LMCache", "multilora": "MultiLoRA", "mlpspeculator": "MLPSpeculator", r"fp\d+": lambda x: x.group(0).upper(), # e.g. fp16, fp32 r"int\d+": lambda x: x.group(0).upper(), # e.g. int8, int16 } for pattern, repl in subs.items(): text = re.sub(rf"\b{pattern}\b", repl, text, flags=re.IGNORECASE) return text @dataclass class Example: """ Example class for generating documentation content from a given path. Attributes: path (Path): The path to the main directory or file. category (str): The category of the document. Properties:: main_file() -> Path | None: Determines the main file in the given path. other_files() -> list[Path]: Determines other files in the directory excluding the main file. title() -> str: Determines the title of the document. Methods: generate() -> str: Generates the documentation content. """ path: Path category: str @cached_property def main_file(self) -> Path | None: """Determines the main file in the given path. If path is a file, it returns the path itself. If path is a directory, it searches for Markdown files (*.md) in the directory and returns the first one found. If no Markdown files are found, it returns None.""" # Single file example if self.path.is_file(): return self.path # Multi file example with a README if md_paths := list(self.path.glob("*.md")): return md_paths[0] # Multi file example without a README return None @cached_property def other_files(self) -> list[Path]: """Determine other files in the directory excluding the main file. If path is a file, it returns an empty list. Otherwise, it returns every file in the directory except the main file in a list.""" # Single file example if self.path.is_file(): return [] # Multi file example is_other_file = lambda file: file.is_file() and file != self.main_file return sorted(file for file in self.path.rglob("*") if is_other_file(file)) @cached_property def is_code(self) -> bool: return self.main_file is not None and self.main_file.suffix != ".md" @cached_property def title(self) -> str: # Generate title from filename if no main md file found if self.main_file is None or self.is_code: return title(self.path.stem) # Specify encoding for building on Windows with open(self.main_file, encoding="utf-8") as f: first_line = f.readline().strip() match = re.match(r"^#\s+(?P.+)$", first_line) if match: return match.group("title") raise ValueError(f"Title not found in {self.main_file}") def fix_relative_links(self, content: str) -> str: """ Fix relative links in markdown content by converting them to gh-file format. Args: content (str): The markdown content to process Returns: str: Content with relative links converted to gh-file format """ # Regex to match markdown links [text](relative_path) # This matches links that don't start with http, https, ftp, or # link_pattern = r"\[([^\]]*)\]\((?!(?:https?|ftp)://|#)([^)]+)\)" def replace_link(match): link_text = match.group(1) relative_path = match.group(2) # Make relative to repo root gh_file = (self.main_file.parent / relative_path).resolve() gh_file = gh_file.relative_to(ROOT_DIR) # Make GitHub URL url = "https://github.com/vllm-project/vllm/" url += "tree/main" if self.path.is_dir() else "blob/main" gh_url = f"{url}/{gh_file}" return f"[{link_text}]({gh_url})" return re.sub(link_pattern, replace_link, content) def generate(self) -> str: content = f"# {self.title}\n\n" url = "https://github.com/vllm-project/vllm/" url += "tree/main" if self.path.is_dir() else "blob/main" content += f"Source <{url}/{self.path.relative_to(ROOT_DIR)}>.\n\n" # Use long code fence to avoid issues with # included files containing code fences too code_fence = "``````" if self.main_file is not None: # Single file example or multi file example with a README if self.is_code: content += ( f"{code_fence}{self.main_file.suffix[1:]}\n" f'--8<-- "{self.main_file}"\n' f"{code_fence}\n" ) else: with open(self.main_file, encoding="utf-8") as f: # Skip the title from md snippets as it's been included above main_content = f.readlines()[1:] content += self.fix_relative_links("".join(main_content)) content += "\n" else: # Multi file example without a README for file in self.other_files: file_title = title(str(file.relative_to(self.path).with_suffix(""))) content += f"## {file_title}\n\n" content += ( f'{code_fence}{file.suffix[1:]}\n--8<-- "{file}"\n{code_fence}\n\n' ) return content if not self.other_files: return content content += "## Example materials\n\n" for file in self.other_files: content += f'??? abstract "{file.relative_to(self.path)}"\n' if file.suffix != ".md": content += f" {code_fence}{file.suffix[1:]}\n" content += f' --8<-- "{file}"\n' if file.suffix != ".md": content += f" {code_fence}\n" return content def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): logger.info("Generating example documentation") logger.debug("Root directory: %s", ROOT_DIR.resolve()) logger.debug("Example directory: %s", EXAMPLE_DIR.resolve()) logger.debug("Example document directory: %s", EXAMPLE_DOC_DIR.resolve()) # Create the EXAMPLE_DOC_DIR if it doesn't exist if not EXAMPLE_DOC_DIR.exists(): EXAMPLE_DOC_DIR.mkdir(parents=True) categories = sorted(p for p in EXAMPLE_DIR.iterdir() if p.is_dir()) examples = [] glob_patterns = ["*.py", "*.md", "*.sh"] # Find categorised examples for category in categories: logger.info("Processing category: %s", category.stem) globs = [category.glob(pattern) for pattern in glob_patterns] for path in itertools.chain(*globs): examples.append(Example(path, category.stem)) # Find examples in subdirectories globs = [category.glob(f"*/{pattern}") for pattern in glob_patterns] for path in itertools.chain(*globs): examples.append(Example(path.parent, category.stem)) # Generate the example documentation for example in sorted(examples, key=lambda e: e.path.stem): example_name = f"{example.path.stem}.md" doc_path = EXAMPLE_DOC_DIR / example.category / example_name if not doc_path.parent.exists(): doc_path.parent.mkdir(parents=True) # Specify encoding for building on Windows with open(doc_path, "w+", encoding="utf-8") as f: f.write(example.generate()) logger.debug("Example generated: %s", doc_path.relative_to(ROOT_DIR)) logger.info("Total examples generated: %d", len(examples))