From 15f934cd0b1c849b756680115aee8c79130dea02 Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Sat, 2 May 2026 11:51:00 +1000 Subject: [PATCH] perf: use yaml.CSafeLoader/CDumper for 4x faster virtual index merge (#39) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #34 ## Summary - At module load time, a `try/except` selects `yaml.CSafeLoader` / `yaml.CDumper` (C extensions) when libyaml is available, otherwise falls back to `yaml.SafeLoader` / `yaml.Dumper` - `_HelmDumper` inherits from whichever dumper base was selected — custom datetime/date representers are registered the same way as before - `_merge_helm_indexes` uses `yaml.load(raw_data, Loader=_YamlLoader)` instead of `yaml.safe_load` - No change to `yaml.dump(...)` call — it already passes `Dumper=_HelmDumper`, which now inherits from the C base when available - Five new tests in `TestYamlExtensionSelection` cover: loader/dumper base are classes, `_HelmDumper` inherits from the selected base, C extensions used when available, loader can parse YAML ## Measured performance gain 19-member `helm-all` virtual repo, real upstream data, Docker (AlmaLinux 9): | | `merge=` time | |---|---| | Before (SafeLoader + Dumper) | **38,877ms** | | After (CSafeLoader + CDumper) | **9,625ms** | | Speedup | **4.0×** | Local microbenchmark (500 charts × 10 versions × 19 members, 3 runs avg): - Before: **40.8s** → After: **6.1s** (**6.7×** faster) ## Test plan - [x] 283 unit tests pass (`make test`) - [x] Wheel builds cleanly (`uv build --wheel`) - [x] C extension confirmed available in AlmaLinux 9 container: `yaml.CSafeLoader: ` - [x] Baseline Docker timing measured with pure-Python path forced: merge=38,877ms - [x] After Docker timing measured with C extension path: merge=9,625ms Reviewed-on: https://git.unkin.net/unkin/artifactapi/pulls/39 --- src/artifactapi/artifact/virtual.py | 11 +++++++++-- tests/test_virtual.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/artifactapi/artifact/virtual.py b/src/artifactapi/artifact/virtual.py index a3cd3b7..9c2bb7a 100644 --- a/src/artifactapi/artifact/virtual.py +++ b/src/artifactapi/artifact/virtual.py @@ -11,8 +11,15 @@ from fastapi import HTTPException, Request, Response logger = logging.getLogger(__name__) +try: + _YamlLoader = yaml.CSafeLoader + _YamlDumperBase = yaml.CDumper +except AttributeError: + _YamlLoader = yaml.SafeLoader + _YamlDumperBase = yaml.Dumper -class _HelmDumper(yaml.Dumper): + +class _HelmDumper(_YamlDumperBase): """YAML dumper that serializes datetime/date objects back to ISO 8601 strings. yaml.safe_load converts timestamp-shaped YAML scalars (e.g. chart `created` @@ -109,7 +116,7 @@ def _merge_helm_indexes(raw_indexes: list[bytes], member_names: list[str], membe base_url = member_cfg.get("base_url", "").rstrip("/") try: - index = yaml.safe_load(raw_data) + index = yaml.load(raw_data, Loader=_YamlLoader) except Exception as e: logger.warning(f"Virtual: failed to parse index.yaml from member '{member_name}': {e}") continue diff --git a/tests/test_virtual.py b/tests/test_virtual.py index 749aa23..bfbee56 100644 --- a/tests/test_virtual.py +++ b/tests/test_virtual.py @@ -14,6 +14,8 @@ from artifactapi.artifact.virtual import ( _merge_helm_indexes, _rewrite_urls, _VirtualHandler, + _YamlDumperBase, + _YamlLoader, ) # --------------------------------------------------------------------------- @@ -82,6 +84,34 @@ _CFG_A = {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ _CFG_B = {"base_url": "https://charts.example.com", "cache": {"mutable_ttl": 1800}} +# --------------------------------------------------------------------------- +# _YamlLoader / _YamlDumperBase — C extension selection +# --------------------------------------------------------------------------- + + +class TestYamlExtensionSelection: + def test_loader_is_a_class(self): + assert isinstance(_YamlLoader, type) + + def test_dumper_base_is_a_class(self): + assert isinstance(_YamlDumperBase, type) + + def test_helm_dumper_uses_selected_base(self): + assert issubclass(_HelmDumper, _YamlDumperBase) + + def test_c_extensions_used_when_available(self): + try: + assert _YamlLoader is yaml.CSafeLoader + assert _YamlDumperBase is yaml.CDumper + except AttributeError: + assert _YamlLoader is yaml.SafeLoader + assert _YamlDumperBase is yaml.Dumper + + def test_loader_can_parse_yaml(self): + result = yaml.load(b"key: value", Loader=_YamlLoader) + assert result == {"key": "value"} + + # --------------------------------------------------------------------------- # _HelmDumper — datetime/date YAML serialization # ---------------------------------------------------------------------------