fix(api-nodes): random issues on Windows by capturing general OSError for retries (#10486)

This commit is contained in:
Alexander Piskun 2025-10-26 08:51:06 +02:00 committed by GitHub
parent f6bbc1ac84
commit 9d529e5308
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 15 deletions

View File

@ -2,7 +2,6 @@ import asyncio
import contextlib
import json
import logging
import socket
import time
import uuid
from dataclasses import dataclass
@ -456,24 +455,20 @@ async def _diagnose_connectivity() -> dict[str, bool]:
results = {
"internet_accessible": False,
"api_accessible": False,
"is_local_issue": False,
"is_api_issue": False,
}
timeout = aiohttp.ClientTimeout(total=5.0)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
with contextlib.suppress(ClientError, OSError):
async with session.get("https://www.google.com") as resp:
results["internet_accessible"] = resp.status < 500
except (ClientError, asyncio.TimeoutError, socket.gaierror):
results["is_local_issue"] = True
if not results["internet_accessible"]:
return results
parsed = urlparse(default_base_url())
health_url = f"{parsed.scheme}://{parsed.netloc}/health"
with contextlib.suppress(ClientError, asyncio.TimeoutError):
with contextlib.suppress(ClientError, OSError):
async with session.get(health_url) as resp:
results["api_accessible"] = resp.status < 500
results["is_api_issue"] = results["internet_accessible"] and not results["api_accessible"]
return results
@ -790,7 +785,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
except ProcessingInterrupted:
logging.debug("Polling was interrupted by user")
raise
except (ClientError, asyncio.TimeoutError, socket.gaierror) as e:
except (ClientError, OSError) as e:
if attempt <= cfg.max_retries:
logging.warning(
"Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s",
@ -824,7 +819,7 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool):
delay *= cfg.retry_backoff
continue
diag = await _diagnose_connectivity()
if diag.get("is_local_issue"):
if not diag["internet_accessible"]:
try:
request_logger.log_request_response(
operation_id=operation_id,

View File

@ -32,7 +32,7 @@ async def download_url_to_bytesio(
dest: Optional[Union[BytesIO, IO[bytes], str, Path]],
*,
timeout: Optional[float] = None,
max_retries: int = 3,
max_retries: int = 5,
retry_delay: float = 1.0,
retry_backoff: float = 2.0,
cls: type[COMFY_IO.ComfyNode] = None,
@ -177,7 +177,7 @@ async def download_url_to_bytesio(
return
except asyncio.CancelledError:
raise ProcessingInterrupted("Task cancelled") from None
except (ClientError, asyncio.TimeoutError) as e:
except (ClientError, OSError) as e:
if attempt <= max_retries:
with contextlib.suppress(Exception):
request_logger.log_request_response(
@ -191,7 +191,7 @@ async def download_url_to_bytesio(
continue
diag = await _diagnose_connectivity()
if diag.get("is_local_issue"):
if not diag["internet_accessible"]:
raise LocalNetworkError(
"Unable to connect to the network. Please check your internet connection and try again."
) from e

View File

@ -290,7 +290,7 @@ async def upload_file(
return
except asyncio.CancelledError:
raise ProcessingInterrupted("Task cancelled") from None
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
except (aiohttp.ClientError, OSError) as e:
if attempt <= max_retries:
with contextlib.suppress(Exception):
request_logger.log_request_response(
@ -313,7 +313,7 @@ async def upload_file(
continue
diag = await _diagnose_connectivity()
if diag.get("is_local_issue"):
if not diag["internet_accessible"]:
raise LocalNetworkError(
"Unable to connect to the network. Please check your internet connection and try again."
) from e