mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-13 13:07:07 +08:00
56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
import asyncio
|
|
import aiohttp
|
|
|
|
|
|
async def test_connect(session):
|
|
try:
|
|
print("Sending request")
|
|
async with session.post("http://localhost:8001/v1/connect/completions", json={
|
|
"temperature": 0.5,
|
|
"top_p": 0.9,
|
|
"max_tokens": 150,
|
|
"frequency_penalty": 1.3,
|
|
"presence_penalty": 0.2,
|
|
"repetition_penalty": 1.2,
|
|
"model": "meta-llama/Llama-3.2-3B-Instruct",
|
|
"messages": [{
|
|
"role": "assistant",
|
|
"content": "what can i help you?"
|
|
}, {
|
|
"role": "user",
|
|
"content": "tell me about us"
|
|
}],
|
|
"stream": True,
|
|
"stream_options": {
|
|
"include_usage": True
|
|
}
|
|
}, headers={"Content-Type": "application/json"}) as response:
|
|
print(response.status)
|
|
if response.status == 200:
|
|
transfer_encoding = response.headers.get('Transfer-Encoding')
|
|
if transfer_encoding == 'chunked':
|
|
async for chunk in response.content.iter_chunked(1024):
|
|
try:
|
|
decoded_chunk = chunk.decode('utf-8')
|
|
print(decoded_chunk)
|
|
except UnicodeDecodeError:
|
|
print(f"Error decoding chunk: {chunk!r}")
|
|
else:
|
|
print(f"Unexpected Transfer-Encoding: {transfer_encoding}")
|
|
else:
|
|
print(f"Request failed with status code {response.status}")
|
|
except aiohttp.ClientError as e:
|
|
print(f"Error: {e}")
|
|
|
|
|
|
async def main():
|
|
async with aiohttp.ClientSession() as session:
|
|
tasks = []
|
|
for _ in range(2):
|
|
tasks.append(test_connect(session))
|
|
await asyncio.gather(*tasks)
|
|
|
|
|
|
asyncio.run(main())
|
|
|