feat: add HTTP sessions and retry logic for Gitea/GitHub API calls

Creates two module-level requests.Session objects (_gitea_session,
_github_session) with an HTTPAdapter backed by urllib3 Retry:
- 3 retries with 0.5s exponential backoff
- retries on 429, 500, 502, 503, 504 and connection errors
- GET-only to avoid unsafe retries

Benefits:
- TCP connections are pooled and reused across all package checks,
  including concurrent builds in ThreadPoolExecutor
- Transient network blips no longer cause spurious rebuilds
- Per-request timeouts tightened to 10s (Gitea) / 15s (GitHub)
  so failures surface quickly and the retry budget is actually useful
This commit is contained in:
2026-05-17 10:38:07 +10:00
parent 9ca3340f10
commit 3de605d321
+45 -15
View File
@@ -23,6 +23,8 @@ import sys
import logging import logging
import subprocess import subprocess
import requests import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -33,6 +35,36 @@ import yaml
from cerberus import Validator from cerberus import Validator
# ==================== HTTP SESSIONS ====================
def _make_session(retries: int = 3, backoff_factor: float = 0.5) -> requests.Session:
"""
Build a requests Session that retries on transient failures.
Retries up to `retries` times on connection errors, read timeouts, and
5xx / 429 responses, with exponential backoff (0s, 0.5s, 1s, 2s …).
Only GET requests are retried to stay safe.
"""
session = requests.Session()
retry = Retry(
total=retries,
backoff_factor=backoff_factor,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET"],
raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("https://", adapter)
session.mount("http://", adapter)
return session
# Shared sessions — one per upstream so connections are pooled and reused
# across all package checks (including parallel builds in ThreadPoolExecutor).
# requests.Session is thread-safe for concurrent requests.
_gitea_session = _make_session()
_github_session = _make_session()
# ==================== VALIDATION SCHEMA ==================== # ==================== VALIDATION SCHEMA ====================
# Cerberus schema for metadata.yaml validation based on PackageMetadata and Build dataclasses # Cerberus schema for metadata.yaml validation based on PackageMetadata and Build dataclasses
@@ -274,15 +306,14 @@ def get_github_latest_release(repo: str) -> Optional[dict]:
try: try:
github_token = get_github_token() github_token = get_github_token()
_github_session.headers.update({
'Authorization': f'token {github_token}',
'Accept': 'application/vnd.github.v3+json',
})
url = f"https://api.github.com/repos/{repo}/releases/latest" url = f"https://api.github.com/repos/{repo}/releases/latest"
headers = {
'Authorization': f'token {github_token}',
'Accept': 'application/vnd.github.v3+json'
}
logger.debug(f"Checking GitHub releases: {url}") logger.debug(f"Checking GitHub releases: {url}")
response = requests.get(url, headers=headers, timeout=30) response = _github_session.get(url, timeout=15)
if response.status_code == 200: if response.status_code == 200:
release = response.json() release = response.json()
@@ -389,15 +420,14 @@ def get_github_releases_by_pattern(repo: str, pattern: str) -> Optional[dict]:
try: try:
github_token = get_github_token() github_token = get_github_token()
_github_session.headers.update({
'Authorization': f'token {github_token}',
'Accept': 'application/vnd.github.v3+json',
})
url = f"https://api.github.com/repos/{repo}/releases" url = f"https://api.github.com/repos/{repo}/releases"
headers = {
'Authorization': f'token {github_token}',
'Accept': 'application/vnd.github.v3+json'
}
logger.debug(f"Checking GitHub releases with pattern '{pattern}': {url}") logger.debug(f"Checking GitHub releases with pattern '{pattern}': {url}")
response = requests.get(url, headers=headers, timeout=30) response = _github_session.get(url, timeout=15)
if response.status_code == 200: if response.status_code == 200:
releases = response.json() releases = response.json()
@@ -558,10 +588,10 @@ def check_package_exists(package_name: str, version: str, release: str, distro:
f"{package_type}/{package_name}/{full_version}" f"{package_type}/{package_name}/{full_version}"
) )
headers = {'Authorization': f'token {gitea_token}'} _gitea_session.headers.update({'Authorization': f'token {gitea_token}'})
logger.debug(f"Checking package existence: {url}") logger.debug(f"Checking package existence: {url}")
response = requests.get(url, headers=headers, timeout=30) response = _gitea_session.get(url, timeout=10)
if response.status_code == 200: if response.status_code == 200:
package_info = response.json() package_info = response.json()
@@ -582,7 +612,7 @@ def check_package_exists(package_name: str, version: str, release: str, distro:
files_url = f"{base_url}/api/v1/packages/{owner}/{package_type}/{package_name}/{full_version}/files" files_url = f"{base_url}/api/v1/packages/{owner}/{package_type}/{package_name}/{full_version}/files"
logger.debug(f"Checking distro-specific files: {files_url}") logger.debug(f"Checking distro-specific files: {files_url}")
files_response = requests.get(files_url, headers=headers, timeout=30) files_response = _gitea_session.get(files_url, timeout=10)
if files_response.status_code == 200: if files_response.status_code == 200:
files = files_response.json() files = files_response.json()