feat: cache parsed member indexes as msgpack to skip YAML re-parse on rebuild
Warm rebuilds of virtual repos (member caches valid, virtual TTL expired) previously re-parsed all member index.yaml files on every rebuild. With 19 Helm members totalling 14 MB, YAML parsing was 60% of merge time (~6.3s of ~9.6s). Parsing each member's YAML also produces msgpack and stores it in S3 alongside the raw index. Subsequent rebuilds load the compact msgpack and skip YAML parsing entirely. Before: warm rebuild ~9.6s (CSafeLoader baseline) After: warm rebuild ~5.9s (38% faster, merge=4.7s down from ~9.6s)
This commit is contained in:
+196
-22
@@ -8,6 +8,7 @@ import yaml
|
||||
|
||||
from artifactapi.artifact.virtual import (
|
||||
_HANDLERS,
|
||||
_entries_to_msgpack_safe,
|
||||
_get_member_index,
|
||||
_HelmDumper,
|
||||
_HelmHandler,
|
||||
@@ -173,13 +174,13 @@ class TestHelmHandler:
|
||||
assert isinstance(msg, str) and len(msg) > 0
|
||||
|
||||
def test_merge_returns_bytes(self):
|
||||
result = self.handler.merge([_INDEX_A], ["member-a"], [_CFG_A], "http://proxy.example.com")
|
||||
result = self.handler.merge([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com")
|
||||
assert isinstance(result, bytes)
|
||||
|
||||
def test_merge_delegates_to_merge_helm_indexes(self):
|
||||
with patch("artifactapi.artifact.virtual._merge_helm_indexes", return_value=b"merged") as mock_fn:
|
||||
result = self.handler.merge([b"data"], ["m"], [{}], "http://proxy")
|
||||
mock_fn.assert_called_once_with([b"data"], ["m"], [{}], "http://proxy")
|
||||
result = self.handler.merge([b"data"], [None], ["m"], [{}], "http://proxy")
|
||||
mock_fn.assert_called_once_with([b"data"], [None], ["m"], [{}], "http://proxy")
|
||||
assert result == b"merged"
|
||||
|
||||
|
||||
@@ -239,7 +240,7 @@ class TestRewriteUrls:
|
||||
|
||||
class TestMergeHelmIndexes:
|
||||
def _merge(self, raw_indexes, member_names, member_configs, proxy_base="http://proxy.example.com"):
|
||||
return _merge_helm_indexes(raw_indexes, member_names, member_configs, proxy_base)
|
||||
return _merge_helm_indexes(raw_indexes, [None] * len(raw_indexes), member_names, member_configs, proxy_base)
|
||||
|
||||
def _parse(self, raw):
|
||||
return yaml.safe_load(raw)
|
||||
@@ -342,7 +343,7 @@ class TestGetMemberIndex:
|
||||
storage.exists.return_value = True
|
||||
cache.is_index_valid.return_value = True
|
||||
|
||||
_, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
_, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == b"cached bytes"
|
||||
|
||||
@@ -365,7 +366,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response(b"fresh bytes")
|
||||
|
||||
_, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
_, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == b"fresh bytes"
|
||||
|
||||
@@ -375,7 +376,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response()
|
||||
|
||||
_, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
_, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == b"upstream bytes"
|
||||
|
||||
@@ -434,7 +435,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.side_effect = Exception("connection refused")
|
||||
|
||||
_, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
_, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data is None
|
||||
|
||||
@@ -446,7 +447,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response()
|
||||
|
||||
_, _, _, raw_data = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
_, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == b"upstream bytes"
|
||||
|
||||
@@ -457,7 +458,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response()
|
||||
|
||||
_, _, ttl, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache)
|
||||
_, _, ttl, _, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert ttl == 900
|
||||
|
||||
@@ -468,7 +469,7 @@ class TestGetMemberIndex:
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response()
|
||||
|
||||
_, _, ttl, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache)
|
||||
_, _, ttl, _, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert ttl == 3600
|
||||
|
||||
@@ -567,7 +568,7 @@ class TestVirtualRoute:
|
||||
|
||||
def test_cache_miss_returns_200_with_yaml_content_type(self, client, patched_virtual_deps):
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -575,7 +576,7 @@ class TestVirtualRoute:
|
||||
|
||||
def test_cache_miss_response_contains_merged_entries(self, client, patched_virtual_deps):
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
index = yaml.safe_load(response.content)
|
||||
@@ -584,7 +585,7 @@ class TestVirtualRoute:
|
||||
def test_cache_miss_stores_result_in_s3(self, client, patched_virtual_deps):
|
||||
deps = patched_virtual_deps
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
deps["storage"].upload.assert_called_once()
|
||||
@@ -592,7 +593,7 @@ class TestVirtualRoute:
|
||||
def test_cache_miss_marks_index_cached(self, client, patched_virtual_deps):
|
||||
deps = patched_virtual_deps
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
deps["cache"].mark_index_cached.assert_called_once()
|
||||
@@ -601,8 +602,8 @@ class TestVirtualRoute:
|
||||
deps = patched_virtual_deps
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.side_effect = [
|
||||
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE),
|
||||
("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE),
|
||||
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None),
|
||||
("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE, None),
|
||||
]
|
||||
client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
@@ -611,7 +612,7 @@ class TestVirtualRoute:
|
||||
|
||||
def test_all_members_unreachable_returns_502(self, client, patched_virtual_deps):
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, None)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, None, None)
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
assert response.status_code == 502
|
||||
@@ -619,8 +620,8 @@ class TestVirtualRoute:
|
||||
def test_one_member_unreachable_still_returns_200(self, client, patched_virtual_deps):
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.side_effect = [
|
||||
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE),
|
||||
("helm-member-2", _CFG_B, 1800, None),
|
||||
("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None),
|
||||
("helm-member-2", _CFG_B, 1800, None, None),
|
||||
]
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
@@ -638,7 +639,7 @@ class TestVirtualRoute:
|
||||
patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get,
|
||||
patch.object(main_mod.config, "get_remote_config", side_effect=patched_get),
|
||||
):
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
# only helm-test was available — should succeed
|
||||
@@ -650,7 +651,180 @@ class TestVirtualRoute:
|
||||
deps["storage"].upload.side_effect = Exception("S3 write error")
|
||||
|
||||
with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get:
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE)
|
||||
mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None)
|
||||
response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _entries_to_msgpack_safe
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestEntriesToMsgpackSafe:
|
||||
def test_plain_string_values_pass_through(self):
|
||||
entries = {"chart": [{"name": "chart", "version": "1.0.0", "urls": ["http://x/c.tgz"]}]}
|
||||
result = _entries_to_msgpack_safe(entries)
|
||||
assert result["chart"][0]["version"] == "1.0.0"
|
||||
|
||||
def test_datetime_converted_to_iso_string(self):
|
||||
dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC)
|
||||
entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": dt}]}
|
||||
result = _entries_to_msgpack_safe(entries)
|
||||
assert isinstance(result["chart"][0]["created"], str)
|
||||
assert "2023-06-15" in result["chart"][0]["created"]
|
||||
|
||||
def test_date_converted_to_iso_string(self):
|
||||
entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": date(2023, 6, 15)}]}
|
||||
result = _entries_to_msgpack_safe(entries)
|
||||
assert result["chart"][0]["created"] == "2023-06-15"
|
||||
|
||||
def test_empty_entries_returns_empty_dict(self):
|
||||
assert _entries_to_msgpack_safe({}) == {}
|
||||
|
||||
def test_multiple_versions_all_converted(self):
|
||||
dt = datetime(2023, 1, 1, tzinfo=UTC)
|
||||
entries = {
|
||||
"chart": [
|
||||
{"name": "chart", "version": "1.0.0", "created": dt},
|
||||
{"name": "chart", "version": "2.0.0", "created": dt},
|
||||
]
|
||||
}
|
||||
result = _entries_to_msgpack_safe(entries)
|
||||
for v in result["chart"]:
|
||||
assert isinstance(v["created"], str)
|
||||
|
||||
def test_result_is_msgpack_serializable(self):
|
||||
import msgpack
|
||||
|
||||
dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC)
|
||||
entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": dt, "urls": ["http://x/c.tgz"]}]}
|
||||
safe = _entries_to_msgpack_safe(entries)
|
||||
packed = msgpack.packb(safe, use_bin_type=True)
|
||||
unpacked = msgpack.unpackb(packed, raw=False)
|
||||
assert unpacked["chart"][0]["created"] == safe["chart"][0]["created"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _merge_helm_indexes — pre-parsed entries path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMergeHelmIndexesWithParsed:
|
||||
"""Verify that pre-parsed entries (from msgpack) produce the same output as raw YAML."""
|
||||
|
||||
def _parse_entries(self, raw: bytes) -> dict:
|
||||
index = yaml.safe_load(raw)
|
||||
return index.get("entries") or {}
|
||||
|
||||
def test_parsed_entries_produce_same_charts_as_raw(self):
|
||||
parsed = self._parse_entries(_INDEX_A)
|
||||
raw_result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com"))
|
||||
parsed_result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [parsed], ["member-a"], [_CFG_A], "http://proxy.example.com"))
|
||||
assert set(raw_result["entries"].keys()) == set(parsed_result["entries"].keys())
|
||||
|
||||
def test_parsed_entries_urls_are_rewritten(self):
|
||||
parsed = self._parse_entries(_INDEX_A)
|
||||
result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [parsed], ["member-a"], [_CFG_A], "http://proxy.example.com"))
|
||||
url = result["entries"]["vault"][0]["urls"][0]
|
||||
assert "member-a" in url
|
||||
assert "proxy.example.com" in url
|
||||
|
||||
def test_none_parsed_falls_back_to_raw_bytes(self):
|
||||
result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com"))
|
||||
assert "vault" in result["entries"]
|
||||
|
||||
def test_mixed_parsed_and_raw_merge_correctly(self):
|
||||
parsed_a = self._parse_entries(_INDEX_A)
|
||||
result = yaml.safe_load(
|
||||
_merge_helm_indexes(
|
||||
[_INDEX_A, _INDEX_B],
|
||||
[parsed_a, None],
|
||||
["member-a", "member-b"],
|
||||
[_CFG_A, _CFG_B],
|
||||
"http://proxy.example.com",
|
||||
)
|
||||
)
|
||||
assert "vault" in result["entries"]
|
||||
assert "nginx" in result["entries"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _get_member_index — msgpack cache behaviour
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetMemberIndexMsgpack:
|
||||
@pytest.fixture
|
||||
def storage(self):
|
||||
m = MagicMock()
|
||||
m.get_object_key.side_effect = lambda name, path: f"{name}/{path}"
|
||||
m.exists.return_value = False
|
||||
m.download_object.return_value = _INDEX_SIMPLE
|
||||
return m
|
||||
|
||||
@pytest.fixture
|
||||
def cache(self):
|
||||
m = MagicMock()
|
||||
m.is_index_valid.return_value = False
|
||||
return m
|
||||
|
||||
@pytest.fixture
|
||||
def member_cfg(self):
|
||||
return {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}}
|
||||
|
||||
def _fake_response(self, content=_INDEX_SIMPLE):
|
||||
r = MagicMock()
|
||||
r.content = content
|
||||
r.raise_for_status = MagicMock()
|
||||
return r
|
||||
|
||||
async def test_cache_hit_with_msgpack_returns_parsed_entries(self, storage, cache, member_cfg):
|
||||
import msgpack
|
||||
|
||||
entries = {"mychart": [{"name": "mychart", "version": "1.0.0", "urls": ["http://x/c.tgz"]}]}
|
||||
packed = msgpack.packb(entries, use_bin_type=True)
|
||||
|
||||
storage.exists.side_effect = lambda key: True
|
||||
cache.is_index_valid.return_value = True
|
||||
storage.download_object.side_effect = lambda key: packed if key.endswith("index.msgpack") else _INDEX_SIMPLE
|
||||
|
||||
_, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert parsed == entries
|
||||
|
||||
async def test_cache_miss_builds_msgpack_and_returns_parsed(self, storage, cache, member_cfg):
|
||||
with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.return_value = self._fake_response()
|
||||
|
||||
_, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == _INDEX_SIMPLE
|
||||
assert isinstance(parsed, dict)
|
||||
assert "mychart" in parsed
|
||||
|
||||
async def test_broken_msgpack_rebuilds_from_raw_yaml(self, storage, cache, member_cfg):
|
||||
storage.exists.side_effect = lambda key: True
|
||||
cache.is_index_valid.return_value = True
|
||||
storage.download_object.side_effect = lambda key: b"not-valid-msgpack" if key.endswith("index.msgpack") else _INDEX_SIMPLE
|
||||
|
||||
_, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data == _INDEX_SIMPLE
|
||||
# Falls back to YAML parse and rebuilds msgpack — entries are returned
|
||||
assert isinstance(parsed, dict)
|
||||
assert "mychart" in parsed
|
||||
|
||||
async def test_upstream_failure_returns_none_for_both(self, storage, cache, member_cfg):
|
||||
with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls:
|
||||
mock_client = AsyncMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_client
|
||||
mock_client.get.side_effect = Exception("timeout")
|
||||
|
||||
_, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache)
|
||||
|
||||
assert raw_data is None
|
||||
assert parsed is None
|
||||
|
||||
Reference in New Issue
Block a user