From cfd0ae823472d7dc33b29864dd576d5c7b5f832f Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 8 Mar 2025 10:51:39 +0100 Subject: [PATCH] Add RLHF document (#14482) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/source/generate_examples.py | 3 ++- docs/source/index.md | 1 + docs/source/training/rlhf.md | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 docs/source/training/rlhf.md diff --git a/docs/source/generate_examples.py b/docs/source/generate_examples.py index c51ca18667ef..1206d5fe7539 100644 --- a/docs/source/generate_examples.py +++ b/docs/source/generate_examples.py @@ -14,13 +14,14 @@ EXAMPLE_DOC_DIR = ROOT_DIR / "docs/source/getting_started/examples" def fix_case(text: str) -> str: subs = { "api": "API", - "Cli": "CLI", + "cli": "CLI", "cpu": "CPU", "llm": "LLM", "tpu": "TPU", "aqlm": "AQLM", "gguf": "GGUF", "lora": "LoRA", + "rlhf": "RLHF", "vllm": "vLLM", "openai": "OpenAI", "multilora": "MultiLoRA", diff --git a/docs/source/index.md b/docs/source/index.md index 3db79456a4e4..09ada43335c7 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -105,6 +105,7 @@ features/compatibility_matrix :maxdepth: 1 training/trl.md +training/rlhf.md ::: diff --git a/docs/source/training/rlhf.md b/docs/source/training/rlhf.md new file mode 100644 index 000000000000..00822aefe11e --- /dev/null +++ b/docs/source/training/rlhf.md @@ -0,0 +1,11 @@ +# Reinforcement Learning from Human Feedback + +Reinforcement Learning from Human Feedback (RLHF) is a technique that fine-tunes language models using human-generated preference data to align model outputs with desired behaviours. + +vLLM can be used to generate the completions for RLHF. The best way to do this is with libraries like [TRL](https://github.com/huggingface/trl), [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF) and [verl](https://github.com/volcengine/verl). + +See the following basic examples to get started if you don't want to use an existing library: + +- [Training and inference processes are located on separate GPUs (inspired by OpenRLHF)](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf.html) +- [Training and inference processes are colocated on the same GPUs using Ray](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_colocate.html) +- [Utilities for performing RLHF with vLLM](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_utils.html)