9e7944835e
Tag manifests (e.g. library/nginx/manifests/latest) and their sha256-addressed counterparts were stored at separate S3 keys with no cross-reference, so a sha256 manifest request always missed cache even when the identical content had just been stored under the tag key. After serving any mutable (tag) manifest, compute the sha256 of the response body and write it under the digest key (manifests/sha256:<hex>) if absent. The next sha256-addressed pull hits cache immediately. Also adds a short-lived Redis distributed lock (SET NX EX 30) around upstream fetches so that concurrent pods racing for the same cold key poll storage for up to 5 s before issuing a duplicate upstream request, eliminating the thundering herd on deploy events. Includes unit tests for both the lock primitives (acquire/release, fail-open when Redis is unavailable) and the docker proxy behaviour (cross-link written on tag hit, not written for sha256 requests, lock acquired/released, poll path serves from cache without upstream fetch, fallback fetch when poll times out).
401 lines
19 KiB
Python
401 lines
19 KiB
Python
"""Tests for RedisCache, focusing on is_mutable_file with configurable patterns."""
|
|
|
|
import hashlib
|
|
from unittest.mock import ANY, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from artifactapi.cache import RedisCache
|
|
from artifactapi.config import _PACKAGE_MUTABLE_PATTERNS
|
|
|
|
|
|
@pytest.fixture
|
|
def bare_cache():
|
|
"""RedisCache instance bypassing __init__ (no Redis needed for pure-logic tests)."""
|
|
return RedisCache.__new__(RedisCache)
|
|
|
|
|
|
@pytest.fixture
|
|
def unavailable_cache():
|
|
"""RedisCache where Redis is not reachable."""
|
|
with patch("redis.from_url", side_effect=Exception("connection refused")):
|
|
return RedisCache("redis://localhost:6379/0")
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_redis_client():
|
|
return MagicMock()
|
|
|
|
|
|
@pytest.fixture
|
|
def cache_with_redis(mock_redis_client):
|
|
"""RedisCache backed by a MagicMock Redis client."""
|
|
with patch("redis.from_url", return_value=mock_redis_client):
|
|
c = RedisCache("redis://localhost:6379/0")
|
|
c.client = mock_redis_client
|
|
c.available = True
|
|
return c
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# is_mutable_file — alpine patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestIsMutableFileAlpine:
|
|
def test_apkindex_tarball_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns)
|
|
|
|
def test_nested_apkindex_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert bare_cache.is_mutable_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns)
|
|
|
|
def test_apk_package_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns)
|
|
|
|
def test_random_tarball_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert not bare_cache.is_mutable_file("some/path/archive.tar.gz", patterns)
|
|
|
|
def test_apkindex_signature_file_is_not_index(self, bare_cache):
|
|
# Signature file adjacent to the index should not be treated as an index
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns)
|
|
|
|
def test_apkindex_tmp_file_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"]
|
|
assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# is_mutable_file — rpm patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestIsMutableFileRpm:
|
|
def test_repomd_xml_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("almalinux/9/x86_64/repomd.xml", patterns)
|
|
|
|
def test_repodata_primary_xml_gz_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("repo/repodata/primary.xml.gz", patterns)
|
|
|
|
def test_repodata_sqlite_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("repo/repodata/primary.sqlite", patterns)
|
|
|
|
def test_repodata_sqlite_bz2_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("repo/repodata/other.sqlite.bz2", patterns)
|
|
|
|
def test_repodata_yaml_xz_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("repo/repodata/comps.yaml.xz", patterns)
|
|
|
|
def test_packages_gz_pattern_matches_any_path(self, bare_cache):
|
|
# The Packages.gz$ regex is a carryover from the original hardcoded logic and
|
|
# deliberately matches any path ending in Packages.gz — including Debian-style paths.
|
|
# This test documents that intentional behaviour.
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert bare_cache.is_mutable_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns)
|
|
|
|
def test_rpm_package_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert not bare_cache.is_mutable_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns)
|
|
|
|
def test_arbitrary_xml_outside_repodata_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"]
|
|
assert not bare_cache.is_mutable_file("some/path/config.xml", patterns)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# is_mutable_file — docker patterns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestIsMutableFileDocker:
|
|
def test_tag_manifest_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert bare_cache.is_mutable_file("library/nginx/manifests/latest", patterns)
|
|
|
|
def test_version_tag_manifest_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert bare_cache.is_mutable_file("library/nginx/manifests/1.25.3", patterns)
|
|
|
|
def test_hyphenated_tag_manifest_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert bare_cache.is_mutable_file("library/nginx/manifests/latest-rc", patterns)
|
|
|
|
def test_numeric_date_tag_manifest_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert bare_cache.is_mutable_file("library/nginx/manifests/20240101", patterns)
|
|
|
|
def test_digest_manifest_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
digest = "sha256:" + "a" * 64
|
|
assert not bare_cache.is_mutable_file(f"library/nginx/manifests/{digest}", patterns)
|
|
|
|
def test_tags_list_is_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert bare_cache.is_mutable_file("library/nginx/tags/list", patterns)
|
|
|
|
def test_blob_is_not_index(self, bare_cache):
|
|
patterns = _PACKAGE_MUTABLE_PATTERNS["docker"]
|
|
assert not bare_cache.is_mutable_file("library/nginx/blobs/sha256:abc123", patterns)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# is_mutable_file — edge cases
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestIsMutableFileEdgeCases:
|
|
def test_empty_patterns_nothing_is_index(self, bare_cache):
|
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", [])
|
|
assert not bare_cache.is_mutable_file("repomd.xml", [])
|
|
assert not bare_cache.is_mutable_file("library/nginx/manifests/latest", [])
|
|
|
|
def test_none_patterns_nothing_is_index(self, bare_cache):
|
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", None)
|
|
assert not bare_cache.is_mutable_file("repomd.xml", None)
|
|
|
|
def test_custom_patterns_match(self, bare_cache):
|
|
patterns = [r"metadata\.json$", r"index\.yaml$"]
|
|
assert bare_cache.is_mutable_file("repo/metadata.json", patterns)
|
|
assert bare_cache.is_mutable_file("repo/subdir/index.yaml", patterns)
|
|
assert not bare_cache.is_mutable_file("repo/data.tar.gz", patterns)
|
|
|
|
def test_custom_pattern_does_not_match_standard_index(self, bare_cache):
|
|
patterns = [r"metadata\.json$"]
|
|
assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", patterns)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# get_index_cache_key
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestGetIndexCacheKey:
|
|
def test_key_format_is_deterministic(self, bare_cache):
|
|
# Assert against a pre-computed value to pin the hash algorithm,
|
|
# truncation length, and format string in one assertion.
|
|
path = "alpine/v3.18/x86_64/APKINDEX.tar.gz"
|
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
|
key = bare_cache.get_index_cache_key("alpine-test", path)
|
|
assert key == f"index:alpine-test:{expected_hash}"
|
|
|
|
def test_different_paths_produce_different_keys(self, bare_cache):
|
|
k1 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.18/x86_64/APKINDEX.tar.gz")
|
|
k2 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.19/x86_64/APKINDEX.tar.gz")
|
|
assert k1 != k2
|
|
|
|
def test_different_remotes_produce_different_keys(self, bare_cache):
|
|
k1 = bare_cache.get_index_cache_key("remote-a", "path/to/APKINDEX.tar.gz")
|
|
k2 = bare_cache.get_index_cache_key("remote-b", "path/to/APKINDEX.tar.gz")
|
|
assert k1 != k2
|
|
|
|
def test_key_starts_with_index_prefix_and_remote(self, bare_cache):
|
|
key = bare_cache.get_index_cache_key("myremote", "some/path")
|
|
assert key.startswith("index:myremote:")
|
|
|
|
def test_key_hash_segment_is_16_chars(self, bare_cache):
|
|
key = bare_cache.get_index_cache_key("myremote", "some/path/file.xml")
|
|
# Format: index:<remote>:<16-char hash> — the fixed length matters for key-space hygiene
|
|
parts = key.split(":")
|
|
assert len(parts) == 3
|
|
assert len(parts[2]) == 16
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# mark_index_cached / is_index_valid
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestIndexValidity:
|
|
def test_mark_index_cached_calls_setex_with_correct_ttl(self, cache_with_redis, mock_redis_client):
|
|
cache_with_redis.mark_index_cached("remote", "path/APKINDEX.tar.gz", 300)
|
|
expected_key = cache_with_redis.get_index_cache_key("remote", "path/APKINDEX.tar.gz")
|
|
mock_redis_client.setex.assert_called_once_with(expected_key, 300, ANY)
|
|
|
|
def test_present_key_is_valid(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.exists.return_value = 1
|
|
assert cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
|
|
|
def test_missing_key_is_not_valid(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.exists.return_value = 0
|
|
assert not cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz")
|
|
|
|
def test_unavailable_redis_is_not_valid(self, unavailable_cache):
|
|
assert not unavailable_cache.is_index_valid("remote", "some/path")
|
|
|
|
def test_mark_cached_no_op_when_unavailable(self, unavailable_cache):
|
|
# client is None when Redis is unavailable — setex cannot be called
|
|
assert unavailable_cache.client is None
|
|
unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# mutable meta (ETag / Last-Modified storage)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestMutableMeta:
|
|
def test_meta_key_format(self, bare_cache):
|
|
path = "repo/metadata.json"
|
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
|
assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}"
|
|
|
|
def test_meta_key_hash_is_16_chars(self, bare_cache):
|
|
key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json")
|
|
assert len(key.split(":")[-1]) == 16
|
|
|
|
def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.hgetall.return_value = {"etag": '"abc123"'}
|
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None)
|
|
mock_redis_client.hset.assert_called_once()
|
|
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
|
assert meta["etag"] == '"abc123"'
|
|
|
|
def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client):
|
|
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
|
mock_redis_client.hgetall.return_value = {"last_modified": lm}
|
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm)
|
|
meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json")
|
|
assert meta["last_modified"] == lm
|
|
|
|
def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client):
|
|
cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None)
|
|
mock_redis_client.hset.assert_not_called()
|
|
|
|
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
|
unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise
|
|
|
|
def test_get_returns_empty_when_unavailable(self, unavailable_cache):
|
|
assert unavailable_cache.get_mutable_meta("remote", "path") == {}
|
|
|
|
def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client):
|
|
expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json")
|
|
cache_with_redis.delete_mutable_meta("remote", "path/meta.json")
|
|
mock_redis_client.delete.assert_called_once_with(expected_key)
|
|
|
|
def test_delete_no_op_when_unavailable(self, unavailable_cache):
|
|
unavailable_cache.delete_mutable_meta("remote", "path") # must not raise
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# artifact published date (quarantine support)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestArtifactPublished:
|
|
def test_key_format_is_deterministic(self, bare_cache):
|
|
path = "some/path/package-1.0.tar.gz"
|
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
|
assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}"
|
|
|
|
def test_key_hash_is_16_chars(self, bare_cache):
|
|
key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl")
|
|
assert len(key.split(":")[-1]) == 16
|
|
|
|
def test_different_paths_produce_different_keys(self, bare_cache):
|
|
k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz")
|
|
k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz")
|
|
assert k1 != k2
|
|
|
|
def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client):
|
|
lm = "Mon, 01 Jan 2024 00:00:00 GMT"
|
|
cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm)
|
|
expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz")
|
|
mock_redis_client.set.assert_called_once_with(expected_key, lm)
|
|
|
|
def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client):
|
|
lm = "Tue, 15 Mar 2022 12:00:00 GMT"
|
|
mock_redis_client.get.return_value = lm
|
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
|
assert result == lm
|
|
|
|
def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.get.return_value = None
|
|
result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz")
|
|
assert result is None
|
|
|
|
def test_store_no_op_when_unavailable(self, unavailable_cache):
|
|
unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT")
|
|
|
|
def test_get_returns_none_when_unavailable(self, unavailable_cache):
|
|
assert unavailable_cache.get_artifact_published("remote", "path") is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# fetch lock (thundering-herd deduplication)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFetchLock:
|
|
def test_acquire_returns_true_when_lock_obtained(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = True
|
|
result = cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest")
|
|
assert result is True
|
|
|
|
def test_acquire_calls_set_nx_with_ttl(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = True
|
|
cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest", ttl=15)
|
|
_, kwargs = mock_redis_client.set.call_args
|
|
assert kwargs.get("nx") is True
|
|
assert kwargs.get("ex") == 15
|
|
|
|
def test_acquire_returns_false_when_lock_already_held(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = None # Redis SET NX → None when key exists
|
|
result = cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest")
|
|
assert result is False
|
|
|
|
def test_acquire_fails_open_when_unavailable(self, unavailable_cache):
|
|
# caller must be allowed to proceed when Redis is down
|
|
assert unavailable_cache.acquire_fetch_lock("myremote", "some/path") is True
|
|
|
|
def test_acquire_fails_open_on_redis_exception(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.side_effect = Exception("connection reset")
|
|
assert cache_with_redis.acquire_fetch_lock("myremote", "some/path") is True
|
|
|
|
def test_lock_key_embeds_path_hash(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = True
|
|
path = "library/nginx/manifests/latest"
|
|
cache_with_redis.acquire_fetch_lock("myremote", path)
|
|
args, _ = mock_redis_client.set.call_args
|
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
|
assert args[0] == f"fetchlock:myremote:{expected_hash}"
|
|
|
|
def test_lock_key_hash_is_16_chars(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = True
|
|
cache_with_redis.acquire_fetch_lock("myremote", "some/long/path/file.tar.gz")
|
|
args, _ = mock_redis_client.set.call_args
|
|
# key format: fetchlock:<remote>:<16-char hash>
|
|
parts = args[0].split(":")
|
|
assert len(parts) == 3
|
|
assert len(parts[2]) == 16
|
|
|
|
def test_different_paths_produce_different_lock_keys(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.set.return_value = True
|
|
cache_with_redis.acquire_fetch_lock("myremote", "path/a/manifests/latest")
|
|
key_a = mock_redis_client.set.call_args[0][0]
|
|
mock_redis_client.set.reset_mock()
|
|
cache_with_redis.acquire_fetch_lock("myremote", "path/b/manifests/latest")
|
|
key_b = mock_redis_client.set.call_args[0][0]
|
|
assert key_a != key_b
|
|
|
|
def test_release_deletes_correct_key(self, cache_with_redis, mock_redis_client):
|
|
path = "library/nginx/manifests/latest"
|
|
cache_with_redis.release_fetch_lock("myremote", path)
|
|
expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16]
|
|
mock_redis_client.delete.assert_called_once_with(f"fetchlock:myremote:{expected_hash}")
|
|
|
|
def test_release_no_op_when_unavailable(self, unavailable_cache):
|
|
unavailable_cache.release_fetch_lock("myremote", "some/path") # must not raise
|
|
|
|
def test_release_no_op_on_redis_exception(self, cache_with_redis, mock_redis_client):
|
|
mock_redis_client.delete.side_effect = Exception("timeout")
|
|
cache_with_redis.release_fetch_lock("myremote", "some/path") # must not raise
|