diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 6a7db920b5b6..09c8e23ebb1c 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -14,7 +14,8 @@ from tqdm.asyncio import tqdm
 from transformers import (AutoTokenizer, PreTrainedTokenizer,
                           PreTrainedTokenizerFast)
 
-from vllm.model_executor.model_loader.weight_utils import get_lock
+# NOTE(simon): do not import vLLM here so the benchmark script
+# can run without vLLM installed.
 
 AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
 
@@ -427,6 +428,8 @@ def get_model(pretrained_model_name_or_path: str) -> str:
     if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
         from modelscope import snapshot_download
 
+        from vllm.model_executor.model_loader.weight_utils import get_lock
+
         # Use file lock to prevent multiple processes from
         # downloading the same model weights at the same time.
         with get_lock(pretrained_model_name_or_path):
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 1dd01ca96867..47627126b668 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -684,6 +684,15 @@ def main(args: argparse.Namespace):
                         "Invalid metadata format. Please use KEY=VALUE format."
                     )
 
+        if not args.save_detailed:
+            # Remove fields with too many data points
+            for field in [
+                    "input_lens", "output_lens", "ttfts", "itls",
+                    "generated_texts", "errors"
+            ]:
+                if field in result_json:
+                    del result_json[field]
+
         # Traffic
         result_json["request_rate"] = (args.request_rate if args.request_rate
                                        < float("inf") else "inf")
@@ -828,6 +837,12 @@ if __name__ == "__main__":
         action="store_true",
         help="Specify to save benchmark results to a json file",
     )
+    parser.add_argument(
+        "--save-detailed",
+        action="store_true",
+        help="When saving the results, whether to include per request "
+        "information such as response, error, ttfs, tpots, etc.",
+    )
     parser.add_argument(
         "--metadata",
         metavar="KEY=VALUE",