diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 2281254..0000000 --- a/.dockerignore +++ /dev/null @@ -1,15 +0,0 @@ -.git/ -.venv/ -dist/ -tests/ -remotes.yaml -ca-bundle.pem -.env -*.log -docker-compose.yml -.woodpecker/ -.tox/ -.ruff_cache/ -.pytest_cache/ -.pre-commit-cache/ -minio_data/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 459d659..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -repos: - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.15.12 - hooks: - - id: ruff - args: [--fix, --exit-non-zero-on-fix] - - id: ruff-format diff --git a/.woodpecker/build.yaml b/.woodpecker/build.yaml deleted file mode 100644 index 58b86d0..0000000 --- a/.woodpecker/build.yaml +++ /dev/null @@ -1,9 +0,0 @@ -when: - - event: pull_request - -steps: - - name: docker-build - image: woodpeckerci/plugin-docker-buildx - settings: - repo: git.unkin.net/unkin/artifactapi - dry_run: true diff --git a/.woodpecker/docker.yaml b/.woodpecker/docker.yaml deleted file mode 100644 index 985531b..0000000 --- a/.woodpecker/docker.yaml +++ /dev/null @@ -1,18 +0,0 @@ -when: - - event: tag - ref: refs/tags/v* - -steps: - - name: docker - image: woodpeckerci/plugin-docker-buildx - settings: - registry: git.unkin.net - repo: git.unkin.net/unkin/artifactapi - username: droneci - password: - from_secret: DRONECI_PASSWORD - tags: - - ${CI_COMMIT_TAG} - - latest - build_args: - - VERSION=${CI_COMMIT_TAG##v} diff --git a/.woodpecker/pre-commit.yaml b/.woodpecker/pre-commit.yaml deleted file mode 100644 index 5086dd5..0000000 --- a/.woodpecker/pre-commit.yaml +++ /dev/null @@ -1,9 +0,0 @@ -when: - - event: pull_request - -steps: - - name: pre-commit - image: git.unkin.net/unkin/almalinux9-base:20260606 - commands: - - uvx pre-commit run --all-files - diff --git a/.woodpecker/test.yaml b/.woodpecker/test.yaml deleted file mode 100644 index b137cd2..0000000 --- a/.woodpecker/test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -when: - - event: pull_request - -steps: - - name: test - image: git.unkin.net/unkin/almalinux9-base:20260606 - commands: - - uvx --python 3.11 --with tox-uv tox diff --git a/SPEC.md b/SPEC.md deleted file mode 100644 index 9beb93b..0000000 --- a/SPEC.md +++ /dev/null @@ -1,137 +0,0 @@ -# ArtifactAPI Specification - -## Repository model - -Every repository entry in `remotes.yaml` has two orthogonal fields: - -| field | values | meaning | -|---|---|---| -| `type` | `local`, `remote`, `virtual` | repository kind — how the repo is served | -| `package` | `docker`, `rpm`, `alpine`, `generic` | package format — what protocol and caching rules to apply | - -**type** - -- `local` — files are uploaded directly to the API and stored in S3; no upstream. -- `remote` — proxies and caches content from an upstream URL (`base_url`). -- `virtual` — aggregates multiple repositories (not yet implemented). - -**package** - -- `docker` — upstream speaks the OCI Distribution API (Bearer auth, manifest/blob paths). -- `rpm` — upstream is an RPM repository; repodata files are index files. -- `alpine` — upstream is an Alpine APK repository; `APKINDEX.tar.gz` is an index file. -- `generic` — plain HTTP file download; no format-specific logic. - ---- - -## Caching - -Two cache classes determine retention: - -| class | stored | TTL | -|---|---|---| -| **file** | S3 object, no Redis entry | `file_ttl` — `0` means indefinite | -| **index** | S3 object + Redis TTL key | `index_ttl` — when the Redis key expires the S3 object is deleted and re-fetched | - -Index files are mutable metadata that must expire. File-class objects are treated as immutable and cached indefinitely (unless `file_ttl` is set). - ---- - -## Docker package rules - -### URL construction - -Remote URLs are prefixed with `/v2/` for `package: docker` remotes: - -``` -{base_url}/v2/{path} -``` - -e.g. `library/nginx/manifests/latest` → `https://registry-1.docker.io/v2/library/nginx/manifests/latest` - -### Authentication - -Docker registries use Bearer token challenges. On a `401 Unauthorized` response, the API: - -1. Parses the `WWW-Authenticate: Bearer` header for `realm`, `service`, and `scope`. -2. Fetches a token from the auth realm, supplying `username`/`password` from the remote config if present. -3. Retries the request with `Authorization: Bearer `. - -Tokens are cached in-memory keyed by `(realm, service, scope, username)` and expire 30 seconds before their stated `expires_in`. - -### Cache classification - -| path pattern | mutable | class | TTL source | -|---|---|---|---| -| `/manifests/` | yes | index | `index_ttl` | -| `/tags/list` | yes | index | `index_ttl` | -| `/manifests/sha256:` | no | file | `file_ttl` | -| `/blobs/sha256:` | no | file | `file_ttl` | - -Tag-based manifests and tag lists are mutable and cached as index. Digest-pinned manifests and blobs are content-addressed and cached indefinitely as files. - -### Blob deduplication - -Blobs are stored under a digest-keyed path shared across all images on the same remote: - -``` -{remote_name}/blobs/sha256/{digest} -``` - -The same layer pulled by different images is stored once. - -### Accept headers - -| path | `Accept` header sent upstream | -|---|---| -| `/manifests/…` | `application/vnd.docker.distribution.manifest.v2+json`, `application/vnd.oci.image.manifest.v1+json`, `application/vnd.oci.image.index.v1+json`, `application/vnd.docker.distribution.manifest.list.v2+json` | -| `/blobs/…` | `application/octet-stream` | - ---- - -## OCI Distribution API endpoint - -The API exposes a native Docker registry interface so clients can use `docker pull` directly: - -``` -GET /v2/ — version ping -GET /v2/{remote}/{image}/manifests/{ref} — fetch manifest -HEAD /v2/{remote}/{image}/manifests/{ref} — manifest metadata -GET /v2/{remote}/{image}/blobs/{digest} — fetch blob -HEAD /v2/{remote}/{image}/blobs/{digest} — blob metadata -``` - -Responses include `Docker-Distribution-Api-Version`, `Docker-Content-Digest`, and the correct OCI `Content-Type` (detected from the manifest `mediaType` field). - -Only remotes with `package: docker` are accessible via this endpoint. All other remotes return `400`. - ---- - -## include_patterns - -`include_patterns` is a list of Python regexes applied to every request before any upstream fetch or cache lookup. - -**Generic remotes (`/api/v1/remote/…`):** -- Patterns match against the file path and the full path. -- Index files (mutable metadata) bypass pattern checks and are always allowed. - -**Docker remotes (`/v2/…`):** -- Patterns match against the image name (first two path segments, e.g. `library/nginx`) and the full path. -- The index-file exemption does **not** apply — patterns restrict whole images, including their manifests and tag lists. -- No patterns configured → all images allowed. - -Returns `403` when a request is blocked. - ---- - -## Versioning - -The package version is derived from git tags via `hatch-vcs`. Tags follow the format `v{MAJOR}.{MINOR}.{PATCH}`. - -Docker images are built with the version injected at build time: - -``` -SETUPTOOLS_SCM_PRETEND_VERSION= uv sync --frozen -``` - -The `Makefile` provides `patch`, `minor`, and `major` targets that tag the current commit and rebuild the container image. diff --git a/examples/conf.d-method/alpine.yaml b/examples/conf.d-method/alpine.yaml deleted file mode 100644 index 55f8c65..0000000 --- a/examples/conf.d-method/alpine.yaml +++ /dev/null @@ -1,10 +0,0 @@ -remotes: - alpine: - base_url: "https://dl-cdn.alpinelinux.org" - package: "alpine" - description: "Alpine Linux APK package repository" - immutable_patterns: - - ".*/x86_64/.*\\.apk$" - cache: - immutable_ttl: 0 - mutable_ttl: 7200 diff --git a/examples/conf.d-method/github.yaml b/examples/conf.d-method/github.yaml deleted file mode 100644 index 81ec2e2..0000000 --- a/examples/conf.d-method/github.yaml +++ /dev/null @@ -1,11 +0,0 @@ -remotes: - github: - base_url: "https://github.com" - package: "generic" - description: "GitHub releases and files" - immutable_patterns: - - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" - - "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$" - cache: - immutable_ttl: 0 - mutable_ttl: 0 diff --git a/examples/conf.d-method/pypi.yaml b/examples/conf.d-method/pypi.yaml deleted file mode 100644 index 0950dc2..0000000 --- a/examples/conf.d-method/pypi.yaml +++ /dev/null @@ -1,16 +0,0 @@ -remotes: - pypi: - base_url: "https://files.pythonhosted.org" - package: "pypi" - description: "Python Package Index" - check_mutable_updates: true - quarantine_new: true - quarantine_days: 3 - immutable_patterns: - - "packages/.*\\.whl$" - - "packages/.*\\.whl\\.metadata$" - - "packages/.*\\.tar\\.gz$" - - "packages/.*\\.zip$" - cache: - immutable_ttl: 0 - mutable_ttl: 600 diff --git a/examples/single-file/remotes.yaml b/examples/single-file/remotes.yaml deleted file mode 100644 index f4aaa06..0000000 --- a/examples/single-file/remotes.yaml +++ /dev/null @@ -1,532 +0,0 @@ -# Example remotes configuration — copy and adapt for your environment. -# -# immutable_patterns: artifacts cached forever (e.g. release binaries, versioned tags). -# mutable_patterns: artifacts that expire after cache.mutable_ttl seconds and are -# re-fetched from upstream on next request (e.g. index files, -# branch archives). Defaults to the package-type built-ins when -# not set (APKINDEX, repomd.xml, Docker manifests, etc.). -# cache: -# immutable_ttl: TTL for immutable files (0 = forever, rarely needed to change). -# mutable_ttl: TTL in seconds for mutable files. Omit to use the default (3600). -# -# quarantine_new: Set to true to block immutable artifacts published within the last -# quarantine_days days. Requests return 404 until the quarantine period -# expires. Fails open when the publish date cannot be determined. -# quarantine_days: Number of days to quarantine newly published artifacts (requires -# quarantine_new: true). The upstream Last-Modified header is used as -# the publish date. -# -# WARNING: this file may contain credentials — do not commit real values. -# -# Global configuration -#s3: -# endpoint: "localhost:9000" -# access_key: "minioadmin" -# secret_key: "minioadmin" -# bucket: "artifacts" -# secure: false -# -#redis: -# url: "redis://localhost:6379/0" -# -#database: -# url: "postgresql://artifacts:artifacts123@localhost:5432/artifacts" -# -remotes: - github: - base_url: "https://github.com" - package: "generic" - description: "GitHub releases and files" - immutable_patterns: - - "gruntwork-io/terragrunt/.*terragrunt_linux_amd64.*" - - "lxc/incus/.*\\.tar\\.gz$" - - "prometheus/node_exporter/.*/node_exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "VictoriaMetrics/VictoriaMetrics/.*/vmutils-linux-amd64-.*\\.tar\\.gz$" - - "VictoriaMetrics/VictoriaMetrics/.*/victoria-metrics-linux-amd64-.*-cluster\\.tar\\.gz$" - - "VictoriaMetrics/VictoriaMetrics/.*/victoria-logs-linux-amd64-.*\\.tar\\.gz$" - - "VictoriaMetrics/VictoriaMetrics/.*/vlutils-linux-amd64-.*\\.tar\\.gz$" - - "prometheus-community/bind_exporter/.*/bind_exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "prometheus-community/pgbouncer_exporter/.*/pgbouncer_exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "prometheus-community/postgres_exporter/.*/postgres_exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "onedr0p/exportarr/.*/exportarr_.*_linux_amd64\\.tar\\.gz$" - - "tynany/frr_exporter/.*/frr_exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "camptocamp/prometheus-puppetdb-exporter/.*/prometheus-puppetdb-exporter-.*\\.linux-amd64\\.tar\\.gz$" - - "grafana/jsonnet-language-server/.*/jsonnet-language-server_.*_linux_amd64$" - - "helmfile/helmfile/.*/helmfile_.*_linux_amd64\\.tar\\.gz$" - - "helmfile/vals/.*/vals_.*_linux_amd64\\.tar\\.gz$" - - "openbao/openbao-plugins/.*/openbao-plugin-secrets-consul_linux_amd64_.*\\.tar\\.gz$" - - "openbao/openbao-plugins/.*/openbao-plugin-secrets-nomad_linux_amd64_.*\\.tar\\.gz$" - - "apple/foundationdb/.*/libfdb_c\\.x86_64\\.so$" - - "stalwartlabs/stalwart/.*/stalwart-cli-x86_64-unknown-linux-gnu\\.tar\\.gz$" - - "stalwartlabs/stalwart/.*/stalwart-foundationdb-x86_64-unknown-linux-gnu\\.tar\\.gz$" - - "stalwartlabs/stalwart/.*/stalwart-x86_64-unknown-linux-gnu\\.tar\\.gz$" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 0 - - github-archive: - base_url: "https://github.com" - package: "generic" - description: "GitHub repository archive tarballs" - immutable_patterns: - # Tag archives are immutable — a tag never changes - - ".*/archive/refs/tags/.*\\.tar\\.gz$" - mutable_patterns: - # Branch archives can change on every push - - ".*/archive/refs/heads/main\\.tar\\.gz$" - - ".*/archive/refs/heads/master\\.tar\\.gz$" - # Before re-downloading an expired branch archive, check whether it has - # actually changed (304 Not Modified → just refresh the TTL, no transfer). - # Only applies to user-defined mutable_patterns, not package-type defaults. - check_mutable_updates: true - cache: - immutable_ttl: 0 # Tag archives cached indefinitely - mutable_ttl: 86400 # Branch archives refreshed after 1 day - - gitea-dl: - base_url: "https://dl.gitea.com" - package: "generic" - description: "Gitea download site" - immutable_patterns: - - "act_runner/.*/act_runner-.*-linux-amd64$" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 0 - - hashicorp-releases: - base_url: "https://releases.hashicorp.com" - package: "generic" - description: "HashiCorp product releases" - immutable_patterns: - - "terraform/.*terraform_.*_linux_amd64\\.zip$" - - "terraform/.*terraform_.*_windows_amd64\\.zip$" - - "terraform/.*terraform_.*_darwin_amd64\\.zip$" - - "vault/.*vault_.*_linux_amd64\\.zip$" - - "vault/.*vault_.*_windows_amd64\\.zip$" - - "vault/.*vault_.*_darwin_amd64\\.zip$" - - "consul-cni/.*/consul-cni_.*_linux_amd64\\.zip$" - - "consul/.*/consul_.*_linux_amd64\\.zip$" - - "nomad-autoscaler/.*/nomad-autoscaler_.*_linux_amd64\\.zip$" - - "nomad/.*/nomad_.*_linux_amd64\\.zip$" - - "packer/.*/packer_.*_linux_amd64\\.zip$" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 0 - - alpine: - base_url: "https://dl-cdn.alpinelinux.org" - package: "alpine" - description: "Alpine Linux APK package repository" - immutable_patterns: - - ".*/x86_64/.*\\.apk$" - # check_mutable_updates not set: APKINDEX.tar.gz is a package-type default - # and is always re-fetched on expiry — conditional checks are skipped for - # built-in mutable patterns regardless of this flag. - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 7200 # Index files (APKINDEX.tar.gz) cached for 2 hours - - almalinux: - base_url: "https://gsl-syd.mm.fcix.net/almalinux" - package: "rpm" - description: "AlmaLinux RPM package repository" - immutable_patterns: - - ".*/x86_64/.*\\.rpm$" - - ".*/noarch/.*\\.rpm$" - - ".*/repodata/.*$" - - ".*\\.rpm$" # Allow all RPM files - # repomd.xml / repodata are package-type defaults — always re-fetched on - # expiry. check_mutable_updates would only apply to any custom - # mutable_patterns added here. - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 7200 # Metadata files cached for 2 hours - - epel: - base_url: "http://mirror.aarnet.edu.au/pub/epel" - package: "rpm" - description: "EPEL (Extra Packages for Enterprise Linux)" - immutable_patterns: - - "8/Everything/x86_64/.*\\.rpm$" - - "9/Everything/x86_64/.*\\.rpm$" - - "10/Everything/x86_64/.*\\.rpm$" - - ".*/noarch/.*\\.rpm$" - - ".*/repodata/.*$" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 7200 # Metadata files cached for 2 hours - - fedora: - base_url: "https://gsl-syd.mm.fcix.net/fedora/linux" - package: "rpm" - description: "Fedora Linux RPM package repository" - immutable_patterns: - - "releases/.*/Everything/x86_64/.*\\.rpm$" - - "updates/.*/Everything/x86_64/.*\\.rpm$" - - "development/.*/Everything/x86_64/.*\\.rpm$" - - ".*/noarch/.*\\.rpm$" - - "updates/.*/Everything/x86_64/repodata/.*$" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 300 # Metadata files cached for 5 minutes - - ghcr: - base_url: "https://ghcr.io" - package: "docker" - description: "GitHub Container Registry" - # username: "your-github-username" - # password: "your-github-pat" # needs read:packages scope - # Docker manifest/tag-list patterns are package-type defaults — always - # re-fetched on expiry. check_mutable_updates only applies to any custom - # mutable_patterns you add (e.g. a metadata endpoint). - cache: - immutable_ttl: 0 - mutable_ttl: 300 - - dockerhub: - base_url: "https://registry-1.docker.io" - package: "docker" - description: "Docker Hub registry" - cache: - immutable_ttl: 0 - mutable_ttl: 300 - - pypi: - base_url: "https://files.pythonhosted.org" - package: "pypi" - description: "Python Package Index — simple index and package files via a single remote" - # simple/ requests are transparently fetched from pypi.org; package files come from - # files.pythonhosted.org (base_url). URLs in the simple index are rewritten to this remote. - check_mutable_updates: true - # Block packages published within the last 3 days (supply-chain attack mitigation). - # Immutable artifacts (wheel/sdist) newer than quarantine_days return 404 until - # the window passes. Disable by setting quarantine_new: false or removing both keys. - quarantine_new: true - quarantine_days: 3 - immutable_patterns: - - "packages/.*\\.whl$" - - "packages/.*\\.whl\\.metadata$" - - "packages/.*\\.tar\\.gz$" - - "packages/.*\\.zip$" - - "packages/.*\\.egg$" - cache: - immutable_ttl: 0 - mutable_ttl: 600 # Simple index pages refreshed after 10 minutes - - pypi-gitea: - base_url: "https://gitea.example.com/api/packages/myorg/pypi" - package: "pypi" - description: "Private Gitea PyPI registry — simple index and files at the same host" - # username: "your-gitea-username" - # password: "your-personal-access-token" # needs package:read scope - check_mutable_updates: true - immutable_patterns: - - "files/.*\\.whl$" - - "files/.*\\.whl\\.metadata$" - - "files/.*\\.tar\\.gz$" - - "files/.*\\.zip$" - - "files/.*\\.egg$" - cache: - immutable_ttl: 0 - mutable_ttl: 600 - - npm: - base_url: "https://registry.npmjs.org" - package: "npm" - description: "npm registry — package metadata with tarball URL rewriting" - check_mutable_updates: true - immutable_patterns: - - \.tgz$ - mutable_patterns: - - ^(?!.*\.tgz$).* - cache: - immutable_ttl: 0 - mutable_ttl: 600 # Package metadata refreshed after 10 minutes - - hashicorp-helm: - base_url: "https://helm.releases.hashicorp.com" - package: "helm" - description: "HashiCorp Helm chart repository (Vault, Consul, Nomad, etc.)" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 # Chart tarballs are versioned — cache forever - mutable_ttl: 3600 # index.yaml refreshed after 1 hour - - metallb: - base_url: "https://metallb.github.io/metallb" - package: "helm" - description: "MetalLB load balancer Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - jetstack: - base_url: "https://charts.jetstack.io" - package: "helm" - description: "Jetstack Helm charts (cert-manager)" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - rancher-stable: - base_url: "https://releases.rancher.com/server-charts/stable" - package: "helm" - description: "Rancher stable Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - purelb: - base_url: "https://gitlab.com/api/v4/projects/20400619/packages/helm/stable" - package: "helm" - description: "PureLB load balancer Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - istio: - base_url: "https://istio-release.storage.googleapis.com/charts" - package: "helm" - description: "Istio service mesh Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - cnpg: - base_url: "https://cloudnative-pg.github.io/charts" - package: "helm" - description: "CloudNativePG operator Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - ceph-csi: - base_url: "https://ceph.github.io/csi-charts" - package: "helm" - description: "Ceph CSI driver Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - external-dns: - base_url: "https://kubernetes-sigs.github.io/external-dns/" - package: "helm" - description: "ExternalDNS Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - intel-helm: - base_url: "https://intel.github.io/helm-charts/" - package: "helm" - description: "Intel Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - elastic: - base_url: "https://helm.elastic.co" - package: "helm" - description: "Elastic stack Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - k8up-io: - base_url: "https://k8up-io.github.io/k8up" - package: "helm" - description: "K8up backup operator Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - victoriametrics: - base_url: "https://victoriametrics.github.io/helm-charts/" - package: "helm" - description: "VictoriaMetrics observability Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - grafana: - base_url: "https://grafana.github.io/helm-charts" - package: "helm" - description: "Grafana observability Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - helm-openldap: - base_url: "https://jp-gouin.github.io/helm-openldap/" - package: "helm" - description: "OpenLDAP Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - woodpecker: - base_url: "https://woodpecker-ci.org/" - package: "helm" - description: "Woodpecker CI Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - stakater: - base_url: "https://stakater.github.io/stakater-charts" - package: "helm" - description: "Stakater Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - jfrog: - base_url: "https://charts.jfrog.io/" - package: "helm" - description: "JFrog Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - openvox: - base_url: "https://openvoxproject.github.io/openvox-helm-chart" - package: "helm" - description: "OpenVox Helm charts" - check_mutable_updates: true - immutable_patterns: - - "\\.tgz$" - cache: - immutable_ttl: 0 - mutable_ttl: 3600 - - puppet-forge: - base_url: "https://forgeapi.puppet.com" - package: "puppet" - description: "Puppet Forge module registry" - # Module metadata (v3/modules/, v3/releases) is mutable by default. - # Configure r10k / librarian-puppet with this remote as the Forge URL: - # http://your-proxy/api/v1/remote/puppet-forge - check_mutable_updates: true - immutable_patterns: - - "^v3/files/.*\\.tar\\.gz$" - cache: - immutable_ttl: 0 # Module tarballs cached indefinitely - mutable_ttl: 600 # Module metadata refreshed after 10 minutes - - terraform-registry: - base_url: "https://registry.terraform.io" - package: "terraform" - description: "Terraform/OpenTofu provider registry (Registry Protocol)" - # Provider version lists are mutable by default. - # Point Terraform at this remote via .terraformrc: - # host "registry.terraform.io" { - # services = { - # "providers.v1" = "http://your-proxy/api/v1/remote/terraform-registry/" - # } - # } - # releases_remote must match the name of the hashicorp-releases remote below, - # so download_url / shasums_url in per-version download info are rewritten. - releases_remote: "hashicorp-releases" - immutable_patterns: - - "[^/]+/[^/]+/[^/]+/download/[^/]+/[^/]+$" - cache: - immutable_ttl: 0 # Per-version download info cached indefinitely - mutable_ttl: 300 # Provider versions list refreshed after 5 minutes - - hashicorp-releases: - base_url: "https://releases.hashicorp.com" - package: "generic" - description: "HashiCorp releases CDN — provider zips, SHA256SUMS, and signatures" - immutable_patterns: - - ".*\\.zip$" - - ".*SHA256SUMS(\\.sig)?$" - cache: - immutable_ttl: 0 # Release artifacts cached indefinitely - mutable_ttl: 0 - - -virtuals: - helm-all: - package: "helm" - description: "Virtual repository merging all helm remotes — member order is priority order for duplicate chart+version" - members: - - hashicorp-helm - - metallb - - jetstack - - rancher-stable - - purelb - - istio - - cnpg - - ceph-csi - - external-dns - - intel-helm - - elastic - - k8up-io - - victoriametrics - - grafana - - helm-openldap - - woodpecker - - stakater - - jfrog - - openvox - -locals: - local-generic: - package: "generic" - description: "Local generic file repository" - cache: - immutable_ttl: 0 # Files cached indefinitely - mutable_ttl: 0 diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 8be2f2d..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,60 +0,0 @@ -[project] -name = "artifactapi" -dynamic = ["version"] -description = "Generic artifact caching system with support for various package managers" - -dependencies = [ - "fastapi>=0.104.0", - "uvicorn[standard]>=0.24.0", - "httpx>=0.25.0", - "redis>=5.0.0", - "boto3>=1.29.0", - "psycopg2-binary>=2.9.0", - "pyyaml>=6.0", - "lxml>=4.9.0", - "prometheus-client>=0.19.0", - "python-multipart>=0.0.6", - "msgpack>=1.0.0", -] -requires-python = ">=3.11" -readme = "README.md" -license = {text = "MIT"} - -[project.scripts] -artifactapi = "artifactapi.main:main" - -[build-system] -requires = ["hatchling", "hatch-vcs"] -build-backend = "hatchling.build" - -[tool.hatch.version] -source = "vcs" - -[tool.hatch.metadata] -allow-direct-references = true - -[tool.hatch.build.targets.wheel] -packages = ["src/artifactapi"] - -[project.optional-dependencies] -dev = [ - "pytest>=7.4.0", - "pytest-asyncio>=0.21.0", - "black>=23.9.0", - "isort>=5.12.0", - "mypy>=1.6.0", - "ruff>=0.4.0", - "tox>=4.0.0", - "pre-commit>=3.0.0", -] - -[tool.pytest.ini_options] -asyncio_mode = "auto" -testpaths = ["tests"] - -[tool.ruff] -line-length = 140 - -[tool.ruff.lint] -select = ["E", "F", "I", "UP"] -ignore = ["E501"] diff --git a/src/artifactapi/__init__.py b/src/artifactapi/__init__.py deleted file mode 100644 index 551d7c3..0000000 --- a/src/artifactapi/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Artifact API package diff --git a/src/artifactapi/artifact/__init__.py b/src/artifactapi/artifact/__init__.py deleted file mode 100644 index 9a52d4e..0000000 --- a/src/artifactapi/artifact/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import discovery, docker, flush, local, proxy - -__all__ = ["discovery", "docker", "flush", "local", "proxy"] diff --git a/src/artifactapi/artifact/discovery.py b/src/artifactapi/artifact/discovery.py deleted file mode 100644 index 786ccb5..0000000 --- a/src/artifactapi/artifact/discovery.py +++ /dev/null @@ -1,82 +0,0 @@ -import logging -import re -from typing import Any -from urllib.parse import urlparse - -import httpx -from fastapi import HTTPException - -from .proxy import cache_single_artifact - -logger = logging.getLogger(__name__) - - -async def _discover_github_releases(remote: str, include_pattern: str) -> list[str]: - match = re.match(r"github\.com/([^/]+)/([^/]+)", remote) - if not match: - raise HTTPException(status_code=400, detail="Invalid GitHub remote format") - - owner, repo = match.groups() - - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.get(f"https://api.github.com/repos/{owner}/{repo}/releases") - if response.status_code != 200: - raise HTTPException(status_code=response.status_code, detail=f"Failed to fetch releases: {response.text}") - - releases = response.json() - regex = re.compile(include_pattern.replace("*", ".*")) - return [ - asset["browser_download_url"] - for release in releases - for asset in release.get("assets", []) - if regex.search(asset["browser_download_url"]) - ] - - -async def _discover(remote: str, include_pattern: str) -> list[str]: - if "github.com" in remote: - return await _discover_github_releases(remote, include_pattern) - raise HTTPException(status_code=400, detail=f"Unsupported remote: {remote}") - - -async def cache_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]: - try: - matching_urls = await _discover(remote, include_pattern) - - if not matching_urls: - return {"message": "No matching artifacts found", "cached_count": 0, "artifacts": []} - - cached_artifacts = [] - for url in matching_urls: - result = await cache_single_artifact(url, "", "", storage, {}) - cached_artifacts.append(result) - - cached_count = sum(1 for a in cached_artifacts if a["status"] in ["cached", "already_cached"]) - return { - "message": f"Processed {len(matching_urls)} artifacts, {cached_count} successfully cached", - "cached_count": cached_count, - "artifacts": cached_artifacts, - } - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) - - -async def list_artifacts(remote: str, include_pattern: str, storage) -> dict[str, Any]: - try: - matching_urls = await _discover(remote, include_pattern) - cached_artifacts = [] - for url in matching_urls: - parsed = urlparse(url) - key = storage.get_object_key(remote, parsed.path) - if storage.exists(key): - cached_artifacts.append({"url": url, "cached_url": storage.get_url(key), "key": key}) - - return { - "remote": remote, - "pattern": include_pattern, - "total_found": len(matching_urls), - "cached_count": len(cached_artifacts), - "artifacts": cached_artifacts, - } - except Exception as e: - raise HTTPException(status_code=500, detail=str(e)) diff --git a/src/artifactapi/artifact/docker.py b/src/artifactapi/artifact/docker.py deleted file mode 100644 index 79e4eee..0000000 --- a/src/artifactapi/artifact/docker.py +++ /dev/null @@ -1,138 +0,0 @@ -import asyncio -import hashlib -import json -import logging -import re - -from fastapi import HTTPException, Request, Response - -from . import proxy as _proxy - -logger = logging.getLogger(__name__) - - -def ping() -> Response: - return Response( - content="{}", - media_type="application/json", - headers={"Docker-Distribution-Api-Version": "registry/2.0"}, - ) - - -async def proxy(request: Request, remote_name: str, path: str, storage, cache, config, metrics) -> Response: - remote_config = config.get_remote_config(remote_name) - if not remote_config: - raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured") - if remote_config.get("package") != "docker": - raise HTTPException(status_code=400, detail=f"Remote '{remote_name}' is not a docker remote") - - patterns = config.get_immutable_patterns(remote_name, "") - if patterns: - path_parts = path.split("/") - image_name = "/".join(path_parts[:2]) if len(path_parts) >= 2 else path - if not any(re.search(p, path) or re.search(p, image_name) for p in patterns): - logger.info(f"PATTERN BLOCKED: {remote_name}/{path}") - raise HTTPException(status_code=403, detail="Image not allowed by configuration patterns") - - if remote_config.get("ban_tags_enabled", False): - ban_tags = remote_config.get("ban_tags", []) - if ban_tags: - tag_match = re.search(r"/manifests/([^/]+)$", path) - if tag_match: - tag = tag_match.group(1) - if not tag.startswith("sha256:") and tag in ban_tags: - logger.info(f"TAG BANNED: {remote_name}/{path} (tag: {tag})") - raise HTTPException(status_code=403, detail=f"Tag '{tag}' is not permitted on this remote") - - base_url = remote_config.get("base_url", "").rstrip("/") - remote_url = f"{base_url}/v2/{path}" - - cached_key = storage.get_object_key(remote_name, path) - if not storage.exists(cached_key): - cached_key = None - - is_mutable = cache.is_mutable_file(path, config.get_mutable_patterns(remote_name)) - - if cached_key and is_mutable: - if not cache.is_index_valid(remote_name, path): - if not await _proxy.handle_expired_mutable(remote_name, path, remote_url, config, cache, storage): - cached_key = None - - lock_acquired = False - if not cached_key: - lock_acquired = cache.acquire_fetch_lock(remote_name, path) - if not lock_acquired: - # Another pod is already fetching — poll storage briefly before issuing a duplicate upstream request - for _ in range(10): - await asyncio.sleep(0.5) - probe_key = storage.get_object_key(remote_name, path) - if storage.exists(probe_key): - cached_key = probe_key - break - - if not cached_key: - logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}") - try: - result = await _proxy.cache_single_artifact(remote_url, remote_name, path, storage, remote_config) - if result["status"] == "error": - raise HTTPException(status_code=502, detail=f"Failed to fetch: {result['error']}") - if result["status"] == "cached" and is_mutable: - cache_config = config.get_cache_config(remote_name) - mutable_ttl = cache_config.get("mutable_ttl", 3600) - cache.mark_index_cached(remote_name, path, mutable_ttl) - logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)") - if result.get("etag") or result.get("last_modified"): - cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified")) - if not is_mutable: - published = result.get("last_modified") - if published: - cache.store_artifact_published(remote_name, path, published) - _proxy._check_quarantine(remote_name, published, config) - finally: - if lock_acquired: - cache.release_fetch_lock(remote_name, path) - elif not is_mutable: - published = cache.get_artifact_published(remote_name, path) - if not published: - published = await _proxy._fetch_last_modified(remote_url, remote_config) - if published: - cache.store_artifact_published(remote_name, path, published) - _proxy._check_quarantine(remote_name, published, config) - - artifact_data = storage.download_object(storage.get_object_key(remote_name, path)) - - is_blob = "/blobs/" in path - if is_blob: - content_type = "application/octet-stream" - else: - try: - manifest_json = json.loads(artifact_data) - content_type = manifest_json.get("mediaType") - if not content_type: - if "manifests" in manifest_json: - content_type = "application/vnd.oci.image.index.v1+json" - else: - content_type = "application/vnd.oci.image.manifest.v1+json" - except Exception: - content_type = "application/vnd.oci.image.manifest.v1+json" - - digest = f"sha256:{hashlib.sha256(artifact_data).hexdigest()}" - - # Cross-link tag manifests to their sha256 digest key so digest-addressed pulls hit cache - if is_mutable and "/manifests/" in path: - digest_path = re.sub(r"/manifests/[^/]+$", f"/manifests/{digest}", path) - digest_key = storage.get_object_key(remote_name, digest_path) - if not storage.exists(digest_key): - storage.upload(digest_key, artifact_data) - - headers = { - "Docker-Distribution-Api-Version": "registry/2.0", - "Docker-Content-Digest": digest, - "Content-Length": str(len(artifact_data)), - } - - if request.method == "HEAD": - return Response(status_code=200, headers=headers, media_type=content_type) - - metrics.record_cache_hit(remote_name, len(artifact_data)) - return Response(content=artifact_data, media_type=content_type, headers=headers) diff --git a/src/artifactapi/artifact/flush.py b/src/artifactapi/artifact/flush.py deleted file mode 100644 index c446066..0000000 --- a/src/artifactapi/artifact/flush.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging - -from fastapi import HTTPException - -logger = logging.getLogger(__name__) - - -def handle(remote: str | None, cache_type: str, cache, storage) -> dict: - try: - result = {"remote": remote, "cache_type": cache_type, "flushed": {"redis_keys": 0, "s3_objects": 0, "operations": []}} - - if cache_type in ["all", "index", "metrics"] and cache.available and cache.client: - patterns = [] - - if cache_type in ["all", "index"]: - if remote: - patterns += [f"index:{remote}:*", f"mutable:meta:{remote}:*"] - else: - patterns += ["index:*", "mutable:meta:*"] - - if cache_type in ["all", "metrics"]: - patterns.append(f"metrics:*:{remote}" if remote else "metrics:*") - - for pattern in patterns: - keys = cache.client.keys(pattern) - if keys: - cache.client.delete(*keys) - result["flushed"]["redis_keys"] += len(keys) - logger.info(f"Cache flush: deleted {len(keys)} Redis keys matching '{pattern}'") - - if result["flushed"]["redis_keys"] > 0: - result["flushed"]["operations"].append(f"Deleted {result['flushed']['redis_keys']} Redis keys") - - if cache_type in ["all", "files"]: - try: - list_params = {"Bucket": storage.bucket} - if remote: - list_params["Prefix"] = f"{remote}/" - - response = storage.client.list_objects_v2(**list_params) - if "Contents" in response: - objects_to_delete = [obj["Key"] for obj in response["Contents"]] - for key in objects_to_delete: - try: - storage.client.delete_object(Bucket=storage.bucket, Key=key) - result["flushed"]["s3_objects"] += 1 - except Exception as e: - logger.warning(f"Failed to delete S3 object {key}: {e}") - - if objects_to_delete: - scope = f" for remote '{remote}'" if remote else "" - result["flushed"]["operations"].append(f"Deleted {len(objects_to_delete)} S3 objects{scope}") - logger.info(f"Cache flush: deleted {len(objects_to_delete)} S3 objects{scope}") - - except Exception as e: - result["flushed"]["operations"].append(f"S3 flush failed: {str(e)}") - logger.error(f"Cache flush S3 error: {e}") - - if not result["flushed"]["operations"]: - result["flushed"]["operations"].append("No cache entries found to flush") - - return result - - except Exception as e: - logger.error(f"Cache flush error: {e}") - raise HTTPException(status_code=500, detail=f"Cache flush failed: {str(e)}") diff --git a/src/artifactapi/artifact/local.py b/src/artifactapi/artifact/local.py deleted file mode 100644 index ab978e8..0000000 --- a/src/artifactapi/artifact/local.py +++ /dev/null @@ -1,113 +0,0 @@ -import hashlib -import logging -import os - -from fastapi import HTTPException, Response, UploadFile -from fastapi.responses import JSONResponse - -logger = logging.getLogger(__name__) - - -def download(remote_name: str, path: str, storage, database, config) -> Response: - if not config.get_local_config(remote_name): - raise HTTPException(status_code=404, detail=f"Local repository '{remote_name}' not configured") - metadata = database.get_local_file_metadata(remote_name, path) - if not metadata: - raise HTTPException(status_code=404, detail="File not found") - content = storage.download_object(metadata["s3_key"]) - return Response( - content=content, - media_type=metadata.get("content_type", "application/octet-stream"), - headers={"Content-Disposition": f"attachment; filename={os.path.basename(path)}"}, - ) - - -async def upload(remote_name: str, path: str, file: UploadFile, storage, database, config) -> JSONResponse: - if not config.get_local_config(remote_name): - raise HTTPException(status_code=404, detail=f"Local repository '{remote_name}' not configured") - - try: - content = await file.read() - sha256_sum = hashlib.sha256(content).hexdigest() - - if database.file_exists(remote_name, path): - raise HTTPException(status_code=409, detail="File already exists") - - s3_key = f"local/{remote_name}/{path}" - content_type = file.content_type or "application/octet-stream" - - try: - storage.upload(s3_key, content) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Upload failed: {e}") - - success = database.add_local_file( - repository_name=remote_name, - file_path=path, - s3_key=s3_key, - size_bytes=len(content), - sha256_sum=sha256_sum, - content_type=content_type, - ) - - if not success: - storage.delete_object(s3_key) - raise HTTPException(status_code=500, detail="Failed to save file metadata") - - return JSONResponse( - { - "message": "File uploaded successfully", - "file_path": path, - "size_bytes": len(content), - "sha256_sum": sha256_sum, - "content_type": content_type, - } - ) - - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}") - - -def check_exists(remote_name: str, path: str, database, config) -> Response: - if not config.get_local_config(remote_name): - raise HTTPException(status_code=404, detail=f"Local repository '{remote_name}' not configured") - - try: - metadata = database.get_local_file_metadata(remote_name, path) - if not metadata: - raise HTTPException(status_code=404, detail="File not found") - - return Response( - headers={ - "Content-Length": str(metadata["size_bytes"]), - "Content-Type": metadata.get("content_type", "application/octet-stream"), - "X-SHA256": metadata["sha256_sum"], - "X-Created-At": metadata["created_at"].isoformat() if metadata["created_at"] else "", - "X-Uploaded-At": metadata["uploaded_at"].isoformat() if metadata["uploaded_at"] else "", - } - ) - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Check failed: {str(e)}") - - -def delete(remote_name: str, path: str, storage, database, config) -> JSONResponse: - if not config.get_local_config(remote_name): - raise HTTPException(status_code=404, detail=f"Local repository '{remote_name}' not configured") - - try: - s3_key = database.delete_local_file(remote_name, path) - if not s3_key: - raise HTTPException(status_code=404, detail="File not found") - - if not storage.delete_object(s3_key): - logger.warning(f"Failed to delete S3 object {s3_key} after database removal") - - return JSONResponse({"message": "File deleted successfully"}) - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Delete failed: {str(e)}") diff --git a/src/artifactapi/artifact/proxy.py b/src/artifactapi/artifact/proxy.py deleted file mode 100644 index 8d3facd..0000000 --- a/src/artifactapi/artifact/proxy.py +++ /dev/null @@ -1,327 +0,0 @@ -import base64 -import logging -import os -import re -from datetime import UTC, datetime, timedelta -from email.utils import parsedate_to_datetime - -import httpx -from fastapi import HTTPException, Request, Response - -from ..auth import get_docker_token_for_response -from ..remote import helm as _helm -from ..remote import npm as _npm -from ..remote import puppet as _puppet -from ..remote import python as _pypi -from ..remote import terraform as _terraform -from ..remote.base import get_content_type - -logger = logging.getLogger(__name__) - - -class UpstreamUnreachable(Exception): - """Raised when the upstream backend cannot be contacted (network or timeout error).""" - - -def _check_quarantine(remote_name: str, last_modified_str: str | None, config) -> None: - """Raise HTTP 404 if the artifact is within the per-remote quarantine window. - - Fails open (allows the request) when the publish date cannot be determined. - """ - enabled, days = config.get_quarantine_config(remote_name) - if not enabled or not days: - return - if not last_modified_str: - return # cannot determine age → allow - try: - publish_date = parsedate_to_datetime(last_modified_str) - except Exception: - return # unparseable → allow - cutoff = datetime.now(UTC) - timedelta(days=days) - if publish_date > cutoff: - available_on = (publish_date + timedelta(days=days)).date() - raise HTTPException( - status_code=404, - detail=( - f"Package quarantined: published {publish_date.date()}, available after {available_on} ({days}-day new-release quarantine)" - ), - ) - - -async def _fetch_last_modified(remote_url: str, remote_cfg: dict) -> str | None: - """HEAD the upstream URL and return the Last-Modified header, or None on any failure.""" - auth = _basic_auth_header(remote_cfg) - try: - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.head(remote_url, headers=auth, timeout=10.0) - return response.headers.get("Last-Modified") - except Exception: - return None - - -def _basic_auth_header(remote_cfg: dict) -> dict[str, str]: - username = remote_cfg.get("username") - password = remote_cfg.get("password") - if username and password: - token = base64.b64encode(f"{username}:{password}".encode()).decode() - return {"Authorization": f"Basic {token}"} - return {} - - -def _resolve_content( - data: bytes, - path: str, - filename: str, - remote_config: dict, - request: Request, - remote_name: str = "", -) -> tuple[bytes, str]: - package = remote_config.get("package") - proxy_base = str(request.base_url).rstrip("/") - base_url = remote_config.get("base_url", "").rstrip("/") - - if package == "pypi": - return _pypi.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name) - if package == "npm": - return _npm.resolve_content(data, path, filename, remote_config.get("immutable_patterns", []), base_url, proxy_base, remote_name) - if package == "helm": - return _helm.resolve_content(data, path, filename, base_url, proxy_base, remote_name) - if package == "puppet": - return _puppet.resolve_content(data, path, filename, base_url, proxy_base, remote_name) - if package == "terraform": - releases_remote = remote_config.get("releases_remote") - return _terraform.resolve_content(data, path, filename, base_url, proxy_base, remote_name, releases_remote) - return data, get_content_type(filename) - - -def construct_url(remote_config: dict, path: str) -> str: - base_url = remote_config.get("base_url", "").rstrip("/") - if remote_config.get("package") == "docker": - return f"{base_url}/v2/{path}" - if remote_config.get("package") == "pypi": - return _pypi.construct_url(base_url, path) - if remote_config.get("package") == "terraform": - return _terraform.construct_url(base_url, path) - return f"{base_url}/{path}" - - -async def cache_single_artifact(url: str, remote_name: str, path: str, storage, remote_config: dict) -> dict: - key = storage.get_object_key(remote_name, path) - - if storage.exists(key): - logger.info(f"Cache ALREADY EXISTS: {url} (key: {key})") - return {"url": url, "cached_url": storage.get_url(key), "status": "already_cached"} - - try: - is_docker = remote_config.get("package") == "docker" or "/v2/" in url - headers = {} - username = remote_config.get("username") - password = remote_config.get("password") - - if is_docker: - if "/manifests/" in url: - headers["Accept"] = ( - "application/vnd.docker.distribution.manifest.v2+json," - "application/vnd.oci.image.manifest.v1+json," - "application/vnd.oci.image.index.v1+json," - "application/vnd.docker.distribution.manifest.list.v2+json" - ) - elif "/blobs/" in url: - headers["Accept"] = "application/octet-stream" - elif username and password: - headers["Authorization"] = "Basic " + base64.b64encode(f"{username}:{password}".encode()).decode() - - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.get(url, headers=headers) - - if response.status_code == 401 and is_docker: - www_auth = response.headers.get("WWW-Authenticate", "") - token = await get_docker_token_for_response(www_auth, username, password) - if token: - headers["Authorization"] = f"Bearer {token}" - response = await client.get(url, headers=headers) - - response.raise_for_status() - storage.upload(key, response.content) - logger.info(f"Cache ADD SUCCESS: {url} (size: {len(response.content)} bytes, key: {key})") - - return { - "url": url, - "cached_url": storage.get_url(key), - "storage_path": f"s3://{storage.bucket}/{key}", - "size": len(response.content), - "status": "cached", - "etag": response.headers.get("ETag"), - "last_modified": response.headers.get("Last-Modified"), - } - - except Exception as e: - return {"url": url, "status": "error", "error": str(e)} - - -async def _upstream_reachable(url: str, auth_headers: dict | None = None) -> bool: - try: - async with httpx.AsyncClient(follow_redirects=True) as client: - await client.head(url, headers=auth_headers or {}, timeout=10.0) - return True - except (httpx.NetworkError, httpx.TimeoutException): - return False - except Exception: - return True - - -async def check_upstream_changed(remote_url: str, remote_name: str, path: str, cache, auth_headers: dict | None = None) -> bool: - meta = cache.get_mutable_meta(remote_name, path) - if not meta: - return True - - headers = dict(auth_headers or {}) - if meta.get("etag"): - headers["If-None-Match"] = meta["etag"] - if meta.get("last_modified"): - headers["If-Modified-Since"] = meta["last_modified"] - if not (meta.get("etag") or meta.get("last_modified")): - return True - - try: - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.head(remote_url, headers=headers) - return response.status_code != 304 - except (httpx.NetworkError, httpx.TimeoutException) as exc: - raise UpstreamUnreachable(str(exc)) from exc - - -async def handle_expired_mutable(remote_name: str, path: str, remote_url: str, config, cache, storage) -> bool: - """Handle an expired mutable file. Returns True if the cached copy is still valid.""" - mutable_ttl = config.get_cache_config(remote_name).get("mutable_ttl", 3600) - remote_cfg = config.get_remote_config(remote_name) or {} - auth = _basic_auth_header(remote_cfg) - check_updates = remote_cfg.get("check_mutable_updates", False) - user_mutable = check_updates and cache.is_mutable_file(path, config.get_user_mutable_patterns(remote_name)) - - if user_mutable: - try: - changed = await check_upstream_changed(remote_url, remote_name, path, cache, auth) - except UpstreamUnreachable: - cache.mark_index_cached(remote_name, path, mutable_ttl) - logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)") - return True - if not changed: - cache.mark_index_cached(remote_name, path, mutable_ttl) - logger.info(f"Mutable file UNCHANGED: {remote_name}/{path} - TTL refreshed ({mutable_ttl}s)") - return True - logger.info(f"Mutable file CHANGED: {remote_name}/{path} - re-downloading") - else: - if not await _upstream_reachable(remote_url, auth): - cache.mark_index_cached(remote_name, path, mutable_ttl) - logger.warning(f"Mutable STALE (backend unreachable): {remote_name}/{path} - TTL extended ({mutable_ttl}s)") - return True - logger.info(f"Mutable file EXPIRED: {remote_name}/{path} - removing from cache") - - cache.cleanup_expired_index(storage, remote_name, path) - return False - - -async def handle(request: Request, remote_name: str, path: str, storage, cache, config, database, metrics) -> Response: - remote_config = config.get_remote_config(remote_name) - if not remote_config: - raise HTTPException(status_code=404, detail=f"Remote '{remote_name}' not configured") - - path_parts = path.split("/") - if len(path_parts) >= 2: - repo_path = f"{path_parts[0]}/{path_parts[1]}" - file_path = "/".join(path_parts[2:]) - else: - repo_path = path - file_path = path - - mutable_patterns = config.get_mutable_patterns(remote_name) - if not cache.is_mutable_file(file_path, mutable_patterns) and not cache.is_mutable_file(path, mutable_patterns): - patterns = config.get_immutable_patterns(remote_name, repo_path) - if patterns and not any(re.search(p, file_path) or re.search(p, path) for p in patterns): - logger.info(f"PATTERN BLOCKED: {remote_name}/{path} - not matching include patterns") - raise HTTPException(status_code=403, detail="Artifact not allowed by configuration patterns") - - remote_url = construct_url(remote_config, path) - if not remote_config.get("base_url"): - raise HTTPException(status_code=500, detail=f"No base_url configured for remote '{remote_name}'") - - cached_key = storage.get_object_key(remote_name, path) - if not storage.exists(cached_key): - cached_key = None - - filename = os.path.basename(path) - is_mutable = cache.is_mutable_file(path, mutable_patterns) - - if cached_key and is_mutable: - if not cache.is_index_valid(remote_name, path): - if not await handle_expired_mutable(remote_name, path, remote_url, config, cache, storage): - cached_key = None - - if cached_key: - if not is_mutable: - published = cache.get_artifact_published(remote_name, path) - if not published: - published = await _fetch_last_modified(remote_url, remote_config) - if published: - cache.store_artifact_published(remote_name, path, published) - _check_quarantine(remote_name, published, config) - - try: - artifact_data = storage.download_object(cached_key) - artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name) - logger.info(f"Cache HIT: {remote_name}/{path} (size: {len(artifact_data)} bytes, key: {cached_key})") - metrics.record_cache_hit(remote_name, len(artifact_data)) - database.record_artifact_mapping(cached_key, remote_name, path, len(artifact_data)) - return Response( - content=artifact_data, - media_type=content_type, - headers={ - "Content-Disposition": f"attachment; filename={filename}", - "X-Artifact-Source": "cache", - "X-Artifact-Size": str(len(artifact_data)), - }, - ) - except HTTPException: - raise - except Exception as e: - raise HTTPException(status_code=500, detail=f"Error retrieving cached artifact: {str(e)}") - - logger.info(f"Cache MISS: {remote_name}/{path} - fetching from remote: {remote_url}") - result = await cache_single_artifact(remote_url, remote_name, path, storage, remote_config) - - if result["status"] == "error": - logger.error(f"Cache ADD FAILED: {remote_name}/{path} - {result['error']}") - raise HTTPException(status_code=502, detail=f"Failed to fetch artifact: {result['error']}") - - if result["status"] == "cached" and is_mutable: - cache_config = config.get_cache_config(remote_name) - mutable_ttl = cache_config.get("mutable_ttl", 3600) - cache.mark_index_cached(remote_name, path, mutable_ttl) - logger.info(f"Mutable file cached with TTL: {remote_name}/{path} (ttl: {mutable_ttl}s)") - if result.get("etag") or result.get("last_modified"): - cache.store_mutable_meta(remote_name, path, result.get("etag"), result.get("last_modified")) - - if not is_mutable: - published = result.get("last_modified") - if published: - cache.store_artifact_published(remote_name, path, published) - _check_quarantine(remote_name, published, config) - - try: - cache_key = storage.get_object_key(remote_name, path) - artifact_data = storage.download_object(cache_key) - artifact_data, content_type = _resolve_content(artifact_data, path, filename, remote_config, request, remote_name) - metrics.record_cache_miss(remote_name, len(artifact_data)) - database.record_artifact_mapping(cache_key, remote_name, path, len(artifact_data)) - return Response( - content=artifact_data, - media_type=content_type, - headers={ - "Content-Disposition": f"attachment; filename={filename}", - "X-Artifact-Source": "remote", - "X-Artifact-Size": str(len(artifact_data)), - }, - ) - except Exception as e: - raise HTTPException(status_code=500, detail=f"Error serving artifact: {str(e)}") diff --git a/src/artifactapi/artifact/virtual.py b/src/artifactapi/artifact/virtual.py deleted file mode 100644 index 3f4c88e..0000000 --- a/src/artifactapi/artifact/virtual.py +++ /dev/null @@ -1,317 +0,0 @@ -import asyncio -import base64 -import logging -import time -from datetime import UTC, date, datetime -from typing import Protocol, runtime_checkable - -import httpx -import msgpack as _msgpack -import yaml -from fastapi import HTTPException, Request, Response - -logger = logging.getLogger(__name__) - -try: - _YamlLoader = yaml.CSafeLoader - _YamlDumperBase = yaml.CDumper -except AttributeError: - _YamlLoader = yaml.SafeLoader - _YamlDumperBase = yaml.Dumper - - -class _HelmDumper(_YamlDumperBase): - """YAML dumper that serializes datetime/date objects back to ISO 8601 strings. - - yaml.safe_load converts timestamp-shaped YAML scalars (e.g. chart `created` - fields) to Python datetime objects. Without a custom representer, yaml.dump - would render them as "2022-12-16 11:08:49+00:00" (space, not T), which - Go's YAML parser cannot unmarshal into time.Time. - """ - - -def _repr_datetime(dumper: yaml.Dumper, data: datetime) -> yaml.ScalarNode: - s = data.strftime("%Y-%m-%dT%H:%M:%S.%f") + ("Z" if data.tzinfo else "") - return dumper.represent_scalar("tag:yaml.org,2002:str", s) - - -def _repr_date(dumper: yaml.Dumper, data: date) -> yaml.ScalarNode: - return dumper.represent_scalar("tag:yaml.org,2002:str", data.isoformat()) - - -_HelmDumper.add_representer(datetime, _repr_datetime) -_HelmDumper.add_representer(date, _repr_date) - - -def _entries_to_msgpack_safe(entries: dict) -> dict: - """Convert datetime/date values to ISO strings for msgpack serialization.""" - result = {} - for chart, versions in entries.items(): - safe_versions = [] - for v in versions: - safe_v = {} - for k, val in v.items(): - if isinstance(val, datetime): - safe_v[k] = val.isoformat() - elif isinstance(val, date): - safe_v[k] = val.isoformat() - else: - safe_v[k] = val - safe_versions.append(safe_v) - result[chart] = safe_versions - return result - - -async def _get_member_index( - member_name: str, - member_cfg: dict, - path: str, - storage, - cache, -) -> tuple[str, dict, int, bytes | None, dict | None]: - """Fetch or retrieve cached index.yaml for one member remote. - - Returns (member_name, member_cfg, ttl, raw_bytes, parsed_entries). - raw_bytes is None if the member is unreachable and not in S3. - parsed_entries is the pre-parsed entries dict (from msgpack cache), or None. - """ - member_ttl = member_cfg.get("cache", {}).get("mutable_ttl", 3600) - s3_key = storage.get_object_key(member_name, path) - msgpack_key = storage.get_object_key(member_name, "index.msgpack") - raw_data: bytes | None = None - parsed_entries: dict | None = None - - if storage.exists(s3_key) and cache.is_index_valid(member_name, path): - try: - raw_data = storage.download_object(s3_key) - logger.info(f"Virtual: cache hit for member '{member_name}'") - except Exception: - raw_data = None - if raw_data is not None and storage.exists(msgpack_key): - try: - packed = storage.download_object(msgpack_key) - parsed_entries = _msgpack.unpackb(packed, raw=False) - logger.debug(f"Virtual: msgpack hit for member '{member_name}'") - except Exception: - parsed_entries = None - - if raw_data is None: - base_url = member_cfg.get("base_url", "").rstrip("/") - upstream_url = f"{base_url}/index.yaml" - headers = {} - username = member_cfg.get("username") - password = member_cfg.get("password") - if username and password: - token = base64.b64encode(f"{username}:{password}".encode()).decode() - headers["Authorization"] = f"Basic {token}" - try: - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.get(upstream_url, headers=headers, timeout=30.0) - response.raise_for_status() - raw_data = response.content - except Exception as e: - logger.warning(f"Virtual: failed to fetch index.yaml from member '{member_name}': {e}") - return member_name, member_cfg, member_ttl, None, None - try: - storage.upload(s3_key, raw_data) - cache.mark_index_cached(member_name, path, member_ttl) - except Exception as e: - logger.warning(f"Virtual: failed to cache index.yaml for member '{member_name}': {e}") - - if parsed_entries is None and raw_data is not None: - try: - index = yaml.load(raw_data, Loader=_YamlLoader) - safe_entries = _entries_to_msgpack_safe(index.get("entries") or {}) - storage.upload(msgpack_key, _msgpack.packb(safe_entries, use_bin_type=True)) - parsed_entries = safe_entries - except Exception as e: - logger.warning(f"Virtual: failed to build msgpack cache for '{member_name}': {e}") - - return member_name, member_cfg, member_ttl, raw_data, parsed_entries - - -def _rewrite_urls(urls: list, base_url: str, proxy_base: str, member_name: str) -> list: - proxy_remote = f"{proxy_base}/api/v1/remote/{member_name}" - rewritten = [] - for url in urls: - if url.startswith(("http://", "https://")): - if base_url and url.startswith(base_url): - url = proxy_remote + url[len(base_url) :] - else: - url = f"{proxy_remote}/{url.lstrip('/')}" - rewritten.append(url) - return rewritten - - -def _merge_helm_indexes( - raw_indexes: list[bytes], - parsed_entries_list: list[dict | None], - member_names: list[str], - member_configs: list[dict], - proxy_base: str, -) -> bytes: - """Merge helm index.yaml files with per-member URL rewriting. - - Priority is determined by position in member_names: earlier members win - when the same chart name + version appears in multiple remotes. - Uses pre-parsed msgpack entries when available to skip YAML parsing. - """ - merged_entries: dict[str, list] = {} - - for raw_data, pre_parsed, member_name, member_cfg in zip(raw_indexes, parsed_entries_list, member_names, member_configs): - base_url = member_cfg.get("base_url", "").rstrip("/") - - if pre_parsed is not None: - entries = pre_parsed - else: - try: - index = yaml.load(raw_data, Loader=_YamlLoader) - except Exception as e: - logger.warning(f"Virtual: failed to parse index.yaml from member '{member_name}': {e}") - continue - entries = index.get("entries") or {} - - for chart_name, versions in entries.items(): - for version_entry in versions: - version_entry["urls"] = _rewrite_urls( - version_entry.get("urls") or [], - base_url, - proxy_base, - member_name, - ) - if chart_name not in merged_entries: - merged_entries[chart_name] = list(versions) - else: - existing = {(v.get("name"), v.get("version")) for v in merged_entries[chart_name]} - for version_entry in versions: - key = (version_entry.get("name"), version_entry.get("version")) - if key not in existing: - merged_entries[chart_name].append(version_entry) - existing.add(key) - - merged = { - "apiVersion": "v1", - "entries": merged_entries, - "generated": datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%S.000Z"), - } - return yaml.dump(merged, Dumper=_HelmDumper, default_flow_style=False, allow_unicode=True).encode() - - -@runtime_checkable -class _VirtualHandler(Protocol): - def accepts_path(self, path: str) -> bool: ... - def merge( - self, - raw_indexes: list[bytes], - parsed_entries: list[dict | None], - member_names: list[str], - member_configs: list[dict], - proxy_base: str, - ) -> bytes: ... - def path_error(self) -> str: ... - - -class _HelmHandler: - def accepts_path(self, path: str) -> bool: - return path == "index.yaml" - - def merge( - self, - raw_indexes: list[bytes], - parsed_entries: list[dict | None], - member_names: list[str], - member_configs: list[dict], - proxy_base: str, - ) -> bytes: - return _merge_helm_indexes(raw_indexes, parsed_entries, member_names, member_configs, proxy_base) - - def path_error(self) -> str: - return "Virtual helm repositories only serve index.yaml; chart tarballs are served directly by member remotes" - - -_HANDLERS: dict[str, _VirtualHandler] = { - "helm": _HelmHandler(), -} - - -async def handle(request: Request, virtual_name: str, path: str, storage, cache, config) -> Response: - virtual_cfg = config.get_virtual_config(virtual_name) - if not virtual_cfg: - raise HTTPException(status_code=404, detail=f"Virtual repository '{virtual_name}' not configured") - - package = virtual_cfg.get("package") - handler = _HANDLERS.get(package) - if handler is None: - raise HTTPException(status_code=400, detail=f"Virtual repositories with package '{package}' are not yet supported") - - if not handler.accepts_path(path): - raise HTTPException(status_code=404, detail=handler.path_error()) - - members = virtual_cfg.get("members", []) - if not members: - raise HTTPException(status_code=500, detail=f"Virtual repository '{virtual_name}' has no members configured") - - virtual_key = storage.get_object_key(virtual_name, path) - - if cache.is_index_valid(virtual_name, path) and storage.exists(virtual_key): - data = storage.download_object(virtual_key) - logger.info(f"Virtual HIT: {virtual_name}/{path}") - return Response(content=data, media_type="text/yaml") - - # Resolve configs first (config reads are sync/cheap) - member_entries = [] - for member_name in members: - member_cfg = config.get_remote_config(member_name) - if not member_cfg: - logger.warning(f"Virtual '{virtual_name}': member '{member_name}' not found in config, skipping") - continue - member_entries.append((member_name, member_cfg)) - - # Fetch all member indexes in parallel; asyncio.gather preserves input order - proxy_base = str(request.base_url).rstrip("/") - t_fetch = time.perf_counter() - results = await asyncio.gather(*[_get_member_index(name, cfg, path, storage, cache) for name, cfg in member_entries]) - fetch_ms = int((time.perf_counter() - t_fetch) * 1000) - - raw_indexes: list[bytes] = [] - used_parsed: list[dict | None] = [] - used_members: list[str] = [] - used_configs: list[dict] = [] - min_ttl: int | None = None - - for member_name, member_cfg, member_ttl, raw_data, parsed_entries in results: - if min_ttl is None or member_ttl < min_ttl: - min_ttl = member_ttl - if raw_data is None: - logger.warning(f"Virtual '{virtual_name}': skipping unreachable member '{member_name}'") - continue - raw_indexes.append(raw_data) - used_parsed.append(parsed_entries) - used_members.append(member_name) - used_configs.append(member_cfg) - - if not raw_indexes: - raise HTTPException(status_code=502, detail=f"Virtual repository '{virtual_name}': no member indices could be fetched") - - if min_ttl is None: - min_ttl = 3600 - - t_merge = time.perf_counter() - merged = await asyncio.to_thread(handler.merge, raw_indexes, used_parsed, used_members, used_configs, proxy_base) - merge_ms = int((time.perf_counter() - t_merge) * 1000) - - try: - t_store = time.perf_counter() - storage.upload(virtual_key, merged) - cache.mark_index_cached(virtual_name, path, min_ttl) - store_ms = int((time.perf_counter() - t_store) * 1000) - msgpack_hits = sum(1 for p in used_parsed if p is not None) - logger.info( - f"Virtual MISS: {virtual_name}/{path} rebuilt from {used_members} " - f"(fetch={fetch_ms}ms merge={merge_ms}ms store={store_ms}ms ttl={min_ttl}s " - f"msgpack={msgpack_hits}/{len(used_members)})" - ) - except Exception as e: - logger.warning(f"Virtual: failed to store merged index for '{virtual_name}': {e}") - - return Response(content=merged, media_type="text/yaml") diff --git a/src/artifactapi/auth/__init__.py b/src/artifactapi/auth/__init__.py deleted file mode 100644 index faffd6e..0000000 --- a/src/artifactapi/auth/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .docker import fetch_token, get_docker_token_for_response, parse_www_authenticate - -__all__ = ["fetch_token", "get_docker_token_for_response", "parse_www_authenticate"] diff --git a/src/artifactapi/auth/docker.py b/src/artifactapi/auth/docker.py deleted file mode 100644 index b781a7f..0000000 --- a/src/artifactapi/auth/docker.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging -import re -import time - -import httpx - -logger = logging.getLogger(__name__) - -# In-memory token cache: key -> (token, expires_at) -_token_cache: dict[str, tuple[str, float]] = {} - -_WWW_AUTH_RE = re.compile( - r'Bearer\s+realm="(?P[^"]+)"' - r'(?:,service="(?P[^"]*)")?' - r'(?:,scope="(?P[^"]*)")?', - re.IGNORECASE, -) - - -def _cache_key(realm: str, service: str, scope: str, username: str | None) -> str: - return f"{realm}|{service}|{scope}|{username or ''}" - - -def _get_cached_token(key: str) -> str | None: - entry = _token_cache.get(key) - if entry and entry[1] > time.time(): - return entry[0] - _token_cache.pop(key, None) - return None - - -def _store_token(key: str, token: str, expires_in: int) -> None: - # Expire 30s early to avoid using a token right as it expires - _token_cache[key] = (token, time.time() + max(expires_in - 30, 10)) - - -async def fetch_token( - realm: str, - service: str, - scope: str, - username: str | None = None, - password: str | None = None, -) -> str | None: - """Fetch a Bearer token from a Docker registry auth server.""" - key = _cache_key(realm, service, scope, username) - cached = _get_cached_token(key) - if cached: - return cached - - params: dict[str, str] = {} - if service: - params["service"] = service - if scope: - params["scope"] = scope - - auth = (username, password) if username and password else None - - try: - async with httpx.AsyncClient(follow_redirects=True) as client: - response = await client.get(realm, params=params, auth=auth) - response.raise_for_status() - data = response.json() - except Exception as e: - logger.warning(f"Docker token fetch failed ({realm}): {e}") - return None - - token = data.get("token") or data.get("access_token") - if not token: - logger.warning(f"Docker token response missing token field: {data}") - return None - - expires_in = int(data.get("expires_in", 300)) - _store_token(key, token, expires_in) - logger.debug(f"Docker token obtained (realm={realm}, service={service}, scope={scope}, expires_in={expires_in}s)") - return token - - -def parse_www_authenticate(header: str) -> tuple[str, str, str] | None: - """Parse WWW-Authenticate: Bearer header. Returns (realm, service, scope) or None.""" - m = _WWW_AUTH_RE.search(header) - if not m: - return None - return m.group("realm"), m.group("service") or "", m.group("scope") or "" - - -async def get_docker_token_for_response( - www_authenticate: str, - username: str | None = None, - password: str | None = None, -) -> str | None: - """Given a WWW-Authenticate header value, fetch and return a Bearer token.""" - parsed = parse_www_authenticate(www_authenticate) - if not parsed: - return None - realm, service, scope = parsed - return await fetch_token(realm, service, scope, username, password) diff --git a/src/artifactapi/cache/__init__.py b/src/artifactapi/cache/__init__.py deleted file mode 100644 index 7f06ae6..0000000 --- a/src/artifactapi/cache/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .redis import RedisCache - -__all__ = ["RedisCache"] diff --git a/src/artifactapi/cache/redis.py b/src/artifactapi/cache/redis.py deleted file mode 100644 index 8c7534b..0000000 --- a/src/artifactapi/cache/redis.py +++ /dev/null @@ -1,143 +0,0 @@ -import hashlib -import re -import time - -import redis - - -class RedisCache: - def __init__(self, redis_url: str): - self.redis_url = redis_url - - try: - self.client = redis.from_url(self.redis_url, decode_responses=True) - self.client.ping() - self.available = True - except Exception as e: - print(f"Redis not available: {e}") - self.client = None - self.available = False - - def is_mutable_file(self, file_path: str, patterns: list[str] | None = None) -> bool: - if patterns is None: - patterns = [] - return any(re.search(p, file_path) for p in patterns) - - def get_index_cache_key(self, remote_name: str, path: str) -> str: - return f"index:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - - def get_mutable_meta_key(self, remote_name: str, path: str) -> str: - return f"mutable:meta:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - - def is_index_valid(self, remote_name: str, path: str) -> bool: - if not self.available: - return False - try: - key = self.get_index_cache_key(remote_name, path) - return self.client.exists(key) > 0 - except Exception: - return False - - def mark_index_cached(self, remote_name: str, path: str, ttl: int = 300) -> None: - if not self.available: - return - try: - key = self.get_index_cache_key(remote_name, path) - self.client.setex(key, ttl, str(int(time.time()))) - except Exception: - pass - - def store_mutable_meta(self, remote_name: str, path: str, etag: str | None, last_modified: str | None) -> None: - if not self.available: - return - data = {} - if etag: - data["etag"] = etag - if last_modified: - data["last_modified"] = last_modified - if not data: - return - try: - self.client.hset(self.get_mutable_meta_key(remote_name, path), mapping=data) - except Exception: - pass - - def get_mutable_meta(self, remote_name: str, path: str) -> dict: - if not self.available: - return {} - try: - return self.client.hgetall(self.get_mutable_meta_key(remote_name, path)) or {} - except Exception: - return {} - - def delete_mutable_meta(self, remote_name: str, path: str) -> None: - if not self.available: - return - try: - self.client.delete(self.get_mutable_meta_key(remote_name, path)) - except Exception: - pass - - def get_artifact_published_key(self, remote_name: str, path: str) -> str: - return f"pkg:published:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - - def store_artifact_published(self, remote_name: str, path: str, last_modified: str) -> None: - """Persist the upstream Last-Modified header for a (typically immutable) artifact.""" - if not self.available: - return - try: - self.client.set(self.get_artifact_published_key(remote_name, path), last_modified) - except Exception: - pass - - def get_artifact_published(self, remote_name: str, path: str) -> str | None: - """Return the stored Last-Modified string for an artifact, or None.""" - if not self.available: - return None - try: - return self.client.get(self.get_artifact_published_key(remote_name, path)) - except Exception: - return None - - def acquire_fetch_lock(self, remote_name: str, path: str, ttl: int = 30) -> bool: - """Try to acquire a short-lived fetch lock. Returns True if acquired, False if held by another caller.""" - if not self.available: - return True # fail open: no Redis → behave as if we always hold the lock - key = f"fetchlock:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - try: - return bool(self.client.set(key, 1, nx=True, ex=ttl)) - except Exception: - return True - - def release_fetch_lock(self, remote_name: str, path: str) -> None: - if not self.available: - return - key = f"fetchlock:{remote_name}:{hashlib.sha256(path.encode()).hexdigest()[:16]}" - try: - self.client.delete(key) - except Exception: - pass - - def cleanup_expired_index(self, storage, remote_name: str, path: str) -> None: - if not self.available: - return - - try: - import os - - from ..config import ConfigManager - - config_path = os.environ.get("CONFIG_PATH") - if config_path: - config = ConfigManager(config_path) - remote_config = config.get_remote_config(remote_name) - if remote_config: - base_url = remote_config.get("base_url") - if base_url: - s3_key = storage.get_object_key(remote_name, path) - if storage.exists(s3_key): - storage.client.delete_object(Bucket=storage.bucket, Key=s3_key) - except Exception: - pass - - self.delete_mutable_meta(remote_name, path) diff --git a/src/artifactapi/config.py b/src/artifactapi/config.py deleted file mode 100644 index e1d59f1..0000000 --- a/src/artifactapi/config.py +++ /dev/null @@ -1,246 +0,0 @@ -import glob -import json -import os - -import yaml - -_PACKAGE_MUTABLE_PATTERNS: dict[str, list[str]] = { - "alpine": [ - r"APKINDEX\.tar\.gz$", - ], - "rpm": [ - r"repomd\.xml$", - r"repodata/.*\.(xml|xml\.gz|xml\.bz2|xml\.xz|xml\.zck|xml\.zst" - r"|sqlite|sqlite\.gz|sqlite\.bz2|sqlite\.xz|sqlite\.zck|sqlite\.zst" - r"|yaml\.xz|yaml\.gz|yaml\.bz2|yaml\.zst|asc|txt)$", - r"Packages\.gz$", - ], - "docker": [ - r"/manifests/(?!sha256:)[^/]+$", - r"/tags/list$", - ], - "pypi": [ - r"simple/", # Per-package and top-level simple index pages - ], - "npm": [], - "helm": [ - r"index\.yaml$", - ], - "puppet": [ - r"^v3/modules/", - r"^v3/releases", - ], - "terraform": [ - r"[^/]+/[^/]+/versions$", - ], - "generic": [], -} - - -class ConfigManager: - def __init__(self, config_path: str = "remotes.yaml"): - self.config_path = config_path - self._config_dir: str | None = None - self._last_modified: float = 0.0 - self.config = self._load_config() - - def _load_single_file(self, path: str) -> dict: - try: - with open(path) as f: - if path.endswith((".yaml", ".yml")): - return yaml.safe_load(f) or {} - return json.load(f) - except FileNotFoundError: - return {} - - @staticmethod - def _merge(base: dict, overlay: dict) -> dict: - result = {**base} - for key, value in overlay.items(): - if key in ("remotes", "virtuals", "locals") and isinstance(base.get(key), dict) and isinstance(value, dict): - result[key] = {**base.get(key, {}), **value} - else: - result[key] = value - return result - - def _load_from_dir(self, dir_path: str) -> dict: - merged: dict = {} - files = sorted(glob.glob(os.path.join(dir_path, "*.yaml")) + glob.glob(os.path.join(dir_path, "*.yml"))) - for path in files: - merged = self._merge(merged, self._load_single_file(path)) - return merged - - def _load_config(self) -> dict: - self._config_dir = None - - if os.path.isdir(self.config_path): - return self._load_from_dir(self.config_path) or {"remotes": {}, "virtuals": {}, "locals": {}} - - config = self._load_single_file(self.config_path) - if not config: - return {"remotes": {}, "virtuals": {}, "locals": {}} - - config_dir = config.pop("config_dir", None) - if config_dir: - if not os.path.isabs(config_dir): - config_dir = os.path.join(os.path.dirname(os.path.abspath(self.config_path)), config_dir) - self._config_dir = config_dir - config = self._merge(config, self._load_from_dir(config_dir)) - - return config - - def _file_mtimes(self) -> list[float]: - mtimes: list[float] = [] - if os.path.isdir(self.config_path): - for f in glob.glob(os.path.join(self.config_path, "*.yaml")) + glob.glob(os.path.join(self.config_path, "*.yml")): - try: - mtimes.append(os.path.getmtime(f)) - except OSError: - pass - else: - try: - mtimes.append(os.path.getmtime(self.config_path)) - except OSError: - pass - - if self._config_dir and os.path.isdir(self._config_dir): - for f in glob.glob(os.path.join(self._config_dir, "*.yaml")) + glob.glob(os.path.join(self._config_dir, "*.yml")): - try: - mtimes.append(os.path.getmtime(f)) - except OSError: - pass - - return mtimes - - def _check_reload(self) -> None: - try: - current_modified = max(self._file_mtimes(), default=0.0) - if current_modified > self._last_modified: - self._last_modified = current_modified - self.config = self._load_config() - print(f"Config reloaded from {self.config_path}") - except OSError: - pass - - def get_remote_config(self, remote_name: str) -> dict | None: - self._check_reload() - return self.config.get("remotes", {}).get(remote_name) - - def get_virtual_config(self, virtual_name: str) -> dict | None: - self._check_reload() - return self.config.get("virtuals", {}).get(virtual_name) - - def get_local_config(self, local_name: str) -> dict | None: - self._check_reload() - return self.config.get("locals", {}).get(local_name) - - def get_immutable_patterns(self, remote_name: str, repo_path: str = "") -> list[str]: - remote_config = self.get_remote_config(remote_name) - if not remote_config: - return [] - - repositories = remote_config.get("repositories", {}) - - if isinstance(repositories, dict): - repo_config = repositories.get(repo_path) - if repo_config: - patterns = repo_config.get("immutable_patterns", []) - else: - patterns = remote_config.get("immutable_patterns", []) - else: - patterns = remote_config.get("immutable_patterns", []) - - return patterns - - def get_s3_config(self) -> dict: - """Get S3 configuration from environment variables""" - endpoint = os.getenv("MINIO_ENDPOINT") - access_key = os.getenv("MINIO_ACCESS_KEY") - secret_key = os.getenv("MINIO_SECRET_KEY") - bucket = os.getenv("MINIO_BUCKET") - - if not endpoint: - raise ValueError("MINIO_ENDPOINT environment variable is required") - if not access_key: - raise ValueError("MINIO_ACCESS_KEY environment variable is required") - if not secret_key: - raise ValueError("MINIO_SECRET_KEY environment variable is required") - if not bucket: - raise ValueError("MINIO_BUCKET environment variable is required") - - return { - "endpoint": endpoint, - "access_key": access_key, - "secret_key": secret_key, - "bucket": bucket, - "secure": os.getenv("MINIO_SECURE", "false").lower() == "true", - } - - def get_redis_config(self) -> dict: - """Get Redis configuration from environment variables""" - redis_url = os.getenv("REDIS_URL") - if not redis_url: - raise ValueError("REDIS_URL environment variable is required") - - return {"url": redis_url} - - def get_database_config(self) -> dict: - """Get database configuration from environment variables""" - db_host = os.getenv("DBHOST") - db_port = os.getenv("DBPORT") - db_user = os.getenv("DBUSER") - db_pass = os.getenv("DBPASS") - db_name = os.getenv("DBNAME") - - if not all([db_host, db_port, db_user, db_pass, db_name]): - missing = [ - var - for var, val in [("DBHOST", db_host), ("DBPORT", db_port), ("DBUSER", db_user), ("DBPASS", db_pass), ("DBNAME", db_name)] - if not val - ] - raise ValueError(f"All database environment variables are required: {', '.join(missing)}") - - db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}" - return {"url": db_url} - - def get_user_mutable_patterns(self, remote_name: str) -> list[str]: - """Return only user-configured mutable_patterns, excluding package-type defaults.""" - remote_config = self.get_remote_config(remote_name) - if not remote_config: - return [] - return remote_config.get("mutable_patterns", []) - - def get_mutable_patterns(self, remote_name: str) -> list[str]: - """Return mutable-file patterns for a remote (TTL is configured per-remote in cache.index_ttl). - - Merges the package-level defaults with any extra patterns listed under - ``mutable_patterns`` in the remote's config. - """ - remote_config = self.get_remote_config(remote_name) - if not remote_config: - return [] - package = remote_config.get("package", "generic") - defaults = _PACKAGE_MUTABLE_PATTERNS.get(package, []) - extra = remote_config.get("mutable_patterns", []) - return defaults + [p for p in extra if p not in defaults] - - def get_cache_config(self, remote_name: str) -> dict: - """Get cache configuration for a specific remote""" - remote_config = self.get_remote_config(remote_name) - if not remote_config: - return {} - - return remote_config.get("cache", {}) - - def get_quarantine_config(self, remote_name: str) -> tuple[bool, int]: - """Return (enabled, quarantine_days) for a remote. - - When enabled=True and quarantine_days>0, immutable artifacts published - within the last quarantine_days days are blocked with a 404. - """ - remote_config = self.get_remote_config(remote_name) - if not remote_config: - return False, 0 - enabled = bool(remote_config.get("quarantine_new", False)) - days = int(remote_config.get("quarantine_days", 0)) - return enabled, days diff --git a/src/artifactapi/database/__init__.py b/src/artifactapi/database/__init__.py deleted file mode 100644 index 2a4fd1c..0000000 --- a/src/artifactapi/database/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .postgres import DatabaseManager - -__all__ = ["DatabaseManager"] diff --git a/src/artifactapi/database/postgres.py b/src/artifactapi/database/postgres.py deleted file mode 100644 index 733c131..0000000 --- a/src/artifactapi/database/postgres.py +++ /dev/null @@ -1,258 +0,0 @@ -import psycopg2 -from psycopg2.extras import RealDictCursor - - -class DatabaseManager: - def __init__(self, db_url: str): - self.db_url = db_url - self.available = False - self._init_database() - - def _init_database(self): - try: - self.connection = psycopg2.connect(self.db_url) - self.connection.autocommit = True - self._create_schema() - self.available = True - print("Database connection established") - except Exception as e: - print(f"Database not available: {e}") - self.available = False - - def _create_schema(self): - try: - with self.connection.cursor() as cursor: - cursor.execute(""" - CREATE TABLE IF NOT EXISTS artifact_mappings ( - id SERIAL PRIMARY KEY, - s3_key VARCHAR(255) UNIQUE NOT NULL, - remote_name VARCHAR(100) NOT NULL, - file_path TEXT NOT NULL, - size_bytes BIGINT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """) - - cursor.execute(""" - CREATE TABLE IF NOT EXISTS local_files ( - id SERIAL PRIMARY KEY, - repository_name VARCHAR(100) NOT NULL, - file_path TEXT NOT NULL, - s3_key VARCHAR(255) UNIQUE NOT NULL, - size_bytes BIGINT NOT NULL, - sha256_sum VARCHAR(64) NOT NULL, - content_type VARCHAR(100), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - UNIQUE(repository_name, file_path) - ) - """) - - cursor.execute("CREATE INDEX IF NOT EXISTS idx_s3_key ON artifact_mappings (s3_key)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_remote_name ON artifact_mappings (remote_name)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_repo_path ON local_files (repository_name, file_path)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_local_s3_key ON local_files (s3_key)") - print("Database schema initialized") - except Exception as e: - print(f"Error creating schema: {e}") - - def record_artifact_mapping(self, s3_key: str, remote_name: str, file_path: str, size_bytes: int): - if not self.available: - return - - try: - with self.connection.cursor() as cursor: - cursor.execute( - """ - INSERT INTO artifact_mappings (s3_key, remote_name, file_path, size_bytes) - VALUES (%s, %s, %s, %s) - ON CONFLICT (s3_key) - DO UPDATE SET - remote_name = EXCLUDED.remote_name, - file_path = EXCLUDED.file_path, - size_bytes = EXCLUDED.size_bytes - """, - (s3_key, remote_name, file_path, size_bytes), - ) - except Exception as e: - print(f"Error recording artifact mapping: {e}") - - def get_storage_by_remote(self) -> dict[str, int]: - if not self.available: - return {} - - try: - with self.connection.cursor(cursor_factory=RealDictCursor) as cursor: - cursor.execute(""" - SELECT remote_name, SUM(size_bytes) as total_size - FROM artifact_mappings - GROUP BY remote_name - """) - results = cursor.fetchall() - return {row["remote_name"]: row["total_size"] or 0 for row in results} - except Exception as e: - print(f"Error getting storage by remote: {e}") - return {} - - def get_remote_for_s3_key(self, s3_key: str) -> str | None: - if not self.available: - return None - - try: - with self.connection.cursor() as cursor: - cursor.execute( - "SELECT remote_name FROM artifact_mappings WHERE s3_key = %s", - (s3_key,), - ) - result = cursor.fetchone() - return result[0] if result else None - except Exception as e: - print(f"Error getting remote for S3 key: {e}") - return None - - def add_local_file( - self, - repository_name: str, - file_path: str, - s3_key: str, - size_bytes: int, - sha256_sum: str, - content_type: str = None, - ): - if not self.available: - return False - - try: - with self.connection.cursor() as cursor: - cursor.execute( - """ - INSERT INTO local_files (repository_name, file_path, s3_key, size_bytes, sha256_sum, content_type) - VALUES (%s, %s, %s, %s, %s, %s) - """, - ( - repository_name, - file_path, - s3_key, - size_bytes, - sha256_sum, - content_type, - ), - ) - self.connection.commit() - return True - except Exception as e: - print(f"Error adding local file: {e}") - return False - - def get_local_file_metadata(self, repository_name: str, file_path: str): - if not self.available: - return None - - try: - with self.connection.cursor() as cursor: - cursor.execute( - """ - SELECT repository_name, file_path, s3_key, size_bytes, sha256_sum, content_type, created_at, uploaded_at - FROM local_files - WHERE repository_name = %s AND file_path = %s - """, - (repository_name, file_path), - ) - result = cursor.fetchone() - if result: - return { - "repository_name": result[0], - "file_path": result[1], - "s3_key": result[2], - "size_bytes": result[3], - "sha256_sum": result[4], - "content_type": result[5], - "created_at": result[6], - "uploaded_at": result[7], - } - return None - except Exception as e: - print(f"Error getting local file metadata: {e}") - return None - - def list_local_files(self, repository_name: str, prefix: str = ""): - if not self.available: - return [] - - try: - with self.connection.cursor() as cursor: - if prefix: - cursor.execute( - """ - SELECT file_path, size_bytes, sha256_sum, content_type, created_at, uploaded_at - FROM local_files - WHERE repository_name = %s AND file_path LIKE %s - ORDER BY file_path - """, - (repository_name, f"{prefix}%"), - ) - else: - cursor.execute( - """ - SELECT file_path, size_bytes, sha256_sum, content_type, created_at, uploaded_at - FROM local_files - WHERE repository_name = %s - ORDER BY file_path - """, - (repository_name,), - ) - - results = cursor.fetchall() - return [ - { - "file_path": result[0], - "size_bytes": result[1], - "sha256_sum": result[2], - "content_type": result[3], - "created_at": result[4], - "uploaded_at": result[5], - } - for result in results - ] - except Exception as e: - print(f"Error listing local files: {e}") - return [] - - def delete_local_file(self, repository_name: str, file_path: str): - if not self.available: - return False - - try: - with self.connection.cursor() as cursor: - cursor.execute( - """ - DELETE FROM local_files - WHERE repository_name = %s AND file_path = %s - RETURNING s3_key - """, - (repository_name, file_path), - ) - result = cursor.fetchone() - self.connection.commit() - return result[0] if result else None - except Exception as e: - print(f"Error deleting local file: {e}") - return None - - def file_exists(self, repository_name: str, file_path: str): - if not self.available: - return False - - try: - with self.connection.cursor() as cursor: - cursor.execute( - """ - SELECT 1 FROM local_files - WHERE repository_name = %s AND file_path = %s - """, - (repository_name, file_path), - ) - return cursor.fetchone() is not None - except Exception as e: - print(f"Error checking file existence: {e}") - return False diff --git a/src/artifactapi/docker_auth.py b/src/artifactapi/docker_auth.py deleted file mode 100644 index c331c3f..0000000 --- a/src/artifactapi/docker_auth.py +++ /dev/null @@ -1,19 +0,0 @@ -from .auth.docker import ( - _cache_key, - _get_cached_token, - _store_token, - _token_cache, - fetch_token, - get_docker_token_for_response, - parse_www_authenticate, -) - -__all__ = [ - "_cache_key", - "_get_cached_token", - "_store_token", - "_token_cache", - "fetch_token", - "get_docker_token_for_response", - "parse_www_authenticate", -] diff --git a/src/artifactapi/main.py b/src/artifactapi/main.py deleted file mode 100644 index ae9d566..0000000 --- a/src/artifactapi/main.py +++ /dev/null @@ -1,145 +0,0 @@ -import logging -import os - -from fastapi import FastAPI, File, Query, Request, UploadFile -from fastapi.responses import PlainTextResponse -from prometheus_client import CONTENT_TYPE_LATEST, generate_latest -from pydantic import BaseModel - -try: - from importlib.metadata import version - - __version__ = version("artifactapi") -except ImportError: - __version__ = "dev" - -from .artifact import discovery, flush, local, proxy, virtual -from .artifact import docker as docker_handler -from .cache import RedisCache -from .config import ConfigManager -from .database import DatabaseManager -from .metrics import MetricsManager -from .storage import S3Storage - -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") -logger = logging.getLogger(__name__) - -app = FastAPI(title="Artifact Storage API", version=__version__) - -config_path = os.environ.get("CONFIG_PATH") -if not config_path: - raise ValueError("CONFIG_PATH environment variable is required") -config = ConfigManager(config_path) - -s3_config = config.get_s3_config() -redis_config = config.get_redis_config() -db_config = config.get_database_config() - -storage = S3Storage(**s3_config) -cache = RedisCache(redis_config["url"]) -database = DatabaseManager(db_config["url"]) -metrics = MetricsManager(cache, database) - - -class ArtifactRequest(BaseModel): - remote: str - include_pattern: str - - -@app.get("/") -def read_root(): - config._check_reload() - return { - "message": "Artifact Storage API", - "version": app.version, - "remotes": list(config.config.get("remotes", {}).keys()), - "virtuals": list(config.config.get("virtuals", {}).keys()), - "locals": list(config.config.get("locals", {}).keys()), - } - - -@app.get("/health") -def health_check(): - return {"status": "healthy"} - - -@app.get("/config") -def get_config(): - return config.config - - -@app.get("/metrics") -def get_metrics(json: bool | None = Query(False, description="Return JSON format instead of Prometheus")): - config._check_reload() - if json: - return metrics.get_metrics(storage, config) - metrics.get_metrics(storage, config) - return PlainTextResponse(generate_latest().decode("utf-8"), media_type=CONTENT_TYPE_LATEST) - - -@app.put("/cache/flush") -def flush_cache( - remote: str = Query(default=None, description="Specific remote to flush (optional)"), - cache_type: str = Query(default="all", description="Type to flush: 'all', 'index', 'files', 'metrics'"), -): - return flush.handle(remote, cache_type, cache, storage) - - -@app.get("/v2/") -async def docker_v2_ping(): - return docker_handler.ping() - - -@app.api_route("/v2/{remote_name}/{path:path}", methods=["GET", "HEAD"]) -async def docker_v2_proxy(request: Request, remote_name: str, path: str): - return await docker_handler.proxy(request, remote_name, path, storage, cache, config, metrics) - - -@app.get("/api/v1/virtual/{virtual_name}/{path:path}") -async def get_virtual_artifact(request: Request, virtual_name: str, path: str): - return await virtual.handle(request, virtual_name, path, storage, cache, config) - - -@app.get("/api/v1/remote/{remote_name}/{path:path}") -async def get_artifact(request: Request, remote_name: str, path: str): - return await proxy.handle(request, remote_name, path, storage, cache, config, database, metrics) - - -@app.get("/api/v1/local/{local_name}/{path:path}") -def get_local_artifact(local_name: str, path: str): - return local.download(local_name, path, storage, database, config) - - -@app.put("/api/v1/local/{local_name}/{path:path}") -async def upload_local_file(local_name: str, path: str, file: UploadFile = File(...)): - return await local.upload(local_name, path, file, storage, database, config) - - -@app.head("/api/v1/local/{local_name}/{path:path}") -def check_local_file_exists(local_name: str, path: str): - return local.check_exists(local_name, path, database, config) - - -@app.delete("/api/v1/local/{local_name}/{path:path}") -def delete_local_file(local_name: str, path: str): - return local.delete(local_name, path, storage, database, config) - - -@app.post("/api/v1/artifacts/cache") -async def cache_artifact(request: ArtifactRequest): - return await discovery.cache_artifacts(request.remote, request.include_pattern, storage) - - -@app.get("/api/v1/artifacts/{remote:path}") -async def list_cached_artifacts(remote: str, include_pattern: str = ".*"): - return await discovery.list_artifacts(remote, include_pattern, storage) - - -def main(): - import uvicorn - - uvicorn.run(app, host="0.0.0.0", port=8000) - - -if __name__ == "__main__": - main() diff --git a/src/artifactapi/metrics.py b/src/artifactapi/metrics.py deleted file mode 100644 index 04473fb..0000000 --- a/src/artifactapi/metrics.py +++ /dev/null @@ -1,202 +0,0 @@ -from datetime import datetime -from typing import Any - -from prometheus_client import Counter, Gauge - -# Prometheus metrics -request_counter = Counter("artifact_requests_total", "Total artifact requests", ["remote", "status"]) -cache_hit_counter = Counter("artifact_cache_hits_total", "Total cache hits", ["remote"]) -cache_miss_counter = Counter("artifact_cache_misses_total", "Total cache misses", ["remote"]) -bandwidth_saved_counter = Counter("artifact_bandwidth_saved_bytes_total", "Total bandwidth saved", ["remote"]) -storage_size_gauge = Gauge("artifact_storage_size_bytes", "Storage size by remote", ["remote"]) -redis_keys_gauge = Gauge("artifact_redis_keys_total", "Total Redis keys") - - -class MetricsManager: - def __init__(self, redis_client=None, database_manager=None): - self.redis_client = redis_client - self.database_manager = database_manager - self.start_time = datetime.now() - - def record_cache_hit(self, remote_name: str, size_bytes: int): - """Record a cache hit with size for bandwidth calculation""" - # Update Prometheus metrics - request_counter.labels(remote=remote_name, status="cache_hit").inc() - cache_hit_counter.labels(remote=remote_name).inc() - bandwidth_saved_counter.labels(remote=remote_name).inc(size_bytes) - - # Update Redis for persistence across instances - if self.redis_client and self.redis_client.available: - try: - # Increment global counters - self.redis_client.client.incr("metrics:cache_hits") - self.redis_client.client.incr("metrics:total_requests") - self.redis_client.client.incrby("metrics:bandwidth_saved", size_bytes) - - # Increment per-remote counters - self.redis_client.client.incr(f"metrics:cache_hits:{remote_name}") - self.redis_client.client.incr(f"metrics:total_requests:{remote_name}") - self.redis_client.client.incrby(f"metrics:bandwidth_saved:{remote_name}", size_bytes) - except Exception: - pass - - def record_cache_miss(self, remote_name: str, size_bytes: int): - """Record a cache miss (new download)""" - # Update Prometheus metrics - request_counter.labels(remote=remote_name, status="cache_miss").inc() - cache_miss_counter.labels(remote=remote_name).inc() - - # Update Redis for persistence across instances - if self.redis_client and self.redis_client.available: - try: - # Increment global counters - self.redis_client.client.incr("metrics:cache_misses") - self.redis_client.client.incr("metrics:total_requests") - - # Increment per-remote counters - self.redis_client.client.incr(f"metrics:cache_misses:{remote_name}") - self.redis_client.client.incr(f"metrics:total_requests:{remote_name}") - except Exception: - pass - - def get_redis_key_count(self) -> int: - """Get total number of keys in Redis""" - if self.redis_client and self.redis_client.available: - try: - return self.redis_client.client.dbsize() - except Exception: - return 0 - return 0 - - def get_s3_total_size(self, storage) -> int: - """Get total size of all objects in S3 bucket""" - try: - total_size = 0 - paginator = storage.client.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=storage.bucket): - if "Contents" in page: - for obj in page["Contents"]: - total_size += obj["Size"] - return total_size - except Exception: - return 0 - - def get_s3_size_by_remote(self, storage, config_manager) -> dict[str, int]: - """Get size of stored data per remote using database mappings""" - if self.database_manager and self.database_manager.available: - # Get from database if available - db_sizes = self.database_manager.get_storage_by_remote() - if db_sizes: - # Initialize all configured remotes and locals to 0 - remote_sizes = {} - all_names = list(config_manager.config.get("remotes", {}).keys()) + list(config_manager.config.get("locals", {}).keys()) - for remote in all_names: - remote_sizes[remote] = db_sizes.get(remote, 0) - - # Update Prometheus gauges - for remote, size in remote_sizes.items(): - storage_size_gauge.labels(remote=remote).set(size) - - return remote_sizes - - # Fallback to S3 scanning if database not available - try: - remote_sizes = {} - all_names = list(config_manager.config.get("remotes", {}).keys()) + list(config_manager.config.get("locals", {}).keys()) - - # Initialize all remotes and locals to 0 - for remote in all_names: - remote_sizes[remote] = 0 - - paginator = storage.client.get_paginator("list_objects_v2") - for page in paginator.paginate(Bucket=storage.bucket): - if "Contents" in page: - for obj in page["Contents"]: - key = obj["Key"] - # Try to map from database first - remote = None - if self.database_manager: - remote = self.database_manager.get_remote_for_s3_key(key) - - # Fallback to key parsing - if not remote: - remote = key.split("/")[0] if "/" in key else "unknown" - - if remote in remote_sizes: - remote_sizes[remote] += obj["Size"] - else: - remote_sizes.setdefault("unknown", 0) - remote_sizes["unknown"] += obj["Size"] - - # Update Prometheus gauges - for remote, size in remote_sizes.items(): - if remote != "unknown": # Don't set gauge for unknown - storage_size_gauge.labels(remote=remote).set(size) - - return remote_sizes - except Exception: - return {} - - def get_metrics(self, storage, config_manager) -> dict[str, Any]: - """Get comprehensive metrics""" - # Update Redis keys gauge - redis_key_count = self.get_redis_key_count() - redis_keys_gauge.set(redis_key_count) - - metrics = { - "timestamp": datetime.now().isoformat(), - "uptime_seconds": int((datetime.now() - self.start_time).total_seconds()), - "redis": {"total_keys": redis_key_count}, - "storage": { - "total_size_bytes": self.get_s3_total_size(storage), - "size_by_remote": self.get_s3_size_by_remote(storage, config_manager), - }, - "requests": { - "cache_hits": 0, - "cache_misses": 0, - "total_requests": 0, - "cache_hit_ratio": 0.0, - }, - "bandwidth": {"saved_bytes": 0}, - "per_remote": {}, - } - - if self.redis_client and self.redis_client.available: - try: - # Get global metrics - cache_hits = int(self.redis_client.client.get("metrics:cache_hits") or 0) - cache_misses = int(self.redis_client.client.get("metrics:cache_misses") or 0) - total_requests = cache_hits + cache_misses - bandwidth_saved = int(self.redis_client.client.get("metrics:bandwidth_saved") or 0) - - metrics["requests"]["cache_hits"] = cache_hits - metrics["requests"]["cache_misses"] = cache_misses - metrics["requests"]["total_requests"] = total_requests - metrics["requests"]["cache_hit_ratio"] = cache_hits / total_requests if total_requests > 0 else 0.0 - metrics["bandwidth"]["saved_bytes"] = bandwidth_saved - - # Get per-repo metrics - all_repos = { - **config_manager.config.get("remotes", {}), - **config_manager.config.get("virtuals", {}), - **config_manager.config.get("locals", {}), - } - for remote in all_repos.keys(): - remote_cache_hits = int(self.redis_client.client.get(f"metrics:cache_hits:{remote}") or 0) - remote_cache_misses = int(self.redis_client.client.get(f"metrics:cache_misses:{remote}") or 0) - remote_total = remote_cache_hits + remote_cache_misses - remote_bandwidth_saved = int(self.redis_client.client.get(f"metrics:bandwidth_saved:{remote}") or 0) - - metrics["per_remote"][remote] = { - "cache_hits": remote_cache_hits, - "cache_misses": remote_cache_misses, - "total_requests": remote_total, - "cache_hit_ratio": remote_cache_hits / remote_total if remote_total > 0 else 0.0, - "bandwidth_saved_bytes": remote_bandwidth_saved, - "storage_size_bytes": metrics["storage"]["size_by_remote"].get(remote, 0), - } - - except Exception: - pass - - return metrics diff --git a/src/artifactapi/remote/__init__.py b/src/artifactapi/remote/__init__.py deleted file mode 100644 index 225f8c5..0000000 --- a/src/artifactapi/remote/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from . import generic, helm, npm, puppet, python, rpm, terraform -from .base import get_content_type - -__all__ = ["generic", "helm", "npm", "puppet", "python", "rpm", "terraform", "get_content_type"] diff --git a/src/artifactapi/remote/base.py b/src/artifactapi/remote/base.py deleted file mode 100644 index ce5f523..0000000 --- a/src/artifactapi/remote/base.py +++ /dev/null @@ -1,16 +0,0 @@ -def get_content_type(filename: str) -> str: - if filename.endswith((".tar.gz", ".tgz")): - return "application/gzip" - if filename.endswith(".zip") or filename.endswith(".whl"): - return "application/zip" - if filename.endswith(".exe"): - return "application/x-msdownload" - if filename.endswith(".rpm"): - return "application/x-rpm" - if filename.endswith(".xml"): - return "application/xml" - if filename.endswith((".xml.gz", ".xml.bz2", ".xml.xz")): - return "application/gzip" - if filename.endswith((".yaml", ".yml")): - return "text/yaml" - return "application/octet-stream" diff --git a/src/artifactapi/remote/generic.py b/src/artifactapi/remote/generic.py deleted file mode 100644 index 3a41962..0000000 --- a/src/artifactapi/remote/generic.py +++ /dev/null @@ -1,3 +0,0 @@ -from .base import get_content_type - -__all__ = ["get_content_type"] diff --git a/src/artifactapi/remote/helm.py b/src/artifactapi/remote/helm.py deleted file mode 100644 index dc0aa79..0000000 --- a/src/artifactapi/remote/helm.py +++ /dev/null @@ -1,18 +0,0 @@ -from .base import get_content_type - - -def resolve_content( - data: bytes, - path: str, - filename: str, - base_url: str, - proxy_url: str, - remote_name: str, -) -> tuple[bytes, str]: - if filename == "index.yaml": - data = data.replace( - base_url.encode(), - f"{proxy_url}/api/v1/remote/{remote_name}".encode(), - ) - return data, "text/yaml" - return data, get_content_type(filename) diff --git a/src/artifactapi/remote/npm.py b/src/artifactapi/remote/npm.py deleted file mode 100644 index 3547b2d..0000000 --- a/src/artifactapi/remote/npm.py +++ /dev/null @@ -1,21 +0,0 @@ -import re - -from .base import get_content_type - - -def resolve_content( - data: bytes, - path: str, - filename: str, - immutable_patterns: list[str], - base_url: str, - proxy_url: str, - remote_name: str, -) -> tuple[bytes, str]: - if not any(re.search(p, path) for p in immutable_patterns): - data = data.replace( - base_url.encode(), - f"{proxy_url}/api/v1/remote/{remote_name}".encode(), - ) - return data, "application/json" - return data, get_content_type(filename) diff --git a/src/artifactapi/remote/puppet.py b/src/artifactapi/remote/puppet.py deleted file mode 100644 index 758bbf0..0000000 --- a/src/artifactapi/remote/puppet.py +++ /dev/null @@ -1,24 +0,0 @@ -from .base import get_content_type - - -def resolve_content( - data: bytes, - path: str, - filename: str, - base_url: str, - proxy_url: str, - remote_name: str, -) -> tuple[bytes, str]: - if not path.startswith("v3/files/"): - proxy_remote_url = f"{proxy_url}/api/v1/remote/{remote_name}" - # Rewrite any absolute forge API URLs - data = data.replace(base_url.encode(), proxy_remote_url.encode()) - # Rewrite relative file_uri paths ("/v3/files/...") to absolute proxy URLs. - # g10k resolves file_uri against only the forge host, so a relative path - # would drop our /api/v1/remote/ prefix. - data = data.replace( - b'"/v3/files/', - f'"{proxy_remote_url}/v3/files/'.encode(), - ) - return data, "application/json" - return data, get_content_type(filename) diff --git a/src/artifactapi/remote/python.py b/src/artifactapi/remote/python.py deleted file mode 100644 index bed8d2d..0000000 --- a/src/artifactapi/remote/python.py +++ /dev/null @@ -1,32 +0,0 @@ -import re - -from .base import get_content_type - - -def construct_url(base_url: str, path: str) -> str: - """Build the upstream URL for a PyPI request. - - PyPI splits simple/ index pages (pypi.org) from file downloads - (files.pythonhosted.org), so simple/ requests are redirected to pypi.org. - """ - if base_url.rstrip("/") == "https://files.pythonhosted.org" and "simple/" in path: - return f"https://pypi.org/{path}" - return f"{base_url}/{path}" - - -def resolve_content( - data: bytes, - path: str, - filename: str, - immutable_patterns: list[str], - base_url: str, - proxy_url: str, - remote_name: str, -) -> tuple[bytes, str]: - if not any(re.search(p, path) for p in immutable_patterns): - data = data.replace( - base_url.encode(), - f"{proxy_url}/api/v1/remote/{remote_name}".encode(), - ) - return data, "text/html; charset=utf-8" - return data, get_content_type(filename) diff --git a/src/artifactapi/remote/rpm.py b/src/artifactapi/remote/rpm.py deleted file mode 100644 index 3a41962..0000000 --- a/src/artifactapi/remote/rpm.py +++ /dev/null @@ -1,3 +0,0 @@ -from .base import get_content_type - -__all__ = ["get_content_type"] diff --git a/src/artifactapi/remote/terraform.py b/src/artifactapi/remote/terraform.py deleted file mode 100644 index ab89ff8..0000000 --- a/src/artifactapi/remote/terraform.py +++ /dev/null @@ -1,36 +0,0 @@ -import json -import re -from urllib.parse import urlparse - -from .base import get_content_type - -_DOWNLOAD_PATH = re.compile(r"^[^/]+/[^/]+/[^/]+/download/[^/]+/[^/]+$") - - -def construct_url(base_url: str, path: str) -> str: - return f"{base_url}/v1/providers/{path}" - - -def resolve_content( - data: bytes, - path: str, - filename: str, - _base_url: str, - proxy_url: str, - _remote_name: str, - releases_remote: str | None = None, -) -> tuple[bytes, str]: - if filename.endswith((".zip", ".sig")): - return data, get_content_type(filename) - if releases_remote and _DOWNLOAD_PATH.match(path): - releases_proxy = f"{proxy_url}/api/v1/remote/{releases_remote}" - try: - obj = json.loads(data) - for field in ("download_url", "shasums_url", "shasums_signature_url"): - if field in obj: - parsed = urlparse(obj[field]) - obj[field] = f"{releases_proxy}{parsed.path}" - data = json.dumps(obj).encode() - except (json.JSONDecodeError, KeyError): - pass - return data, "application/json" diff --git a/src/artifactapi/storage/__init__.py b/src/artifactapi/storage/__init__.py deleted file mode 100644 index 64272bd..0000000 --- a/src/artifactapi/storage/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .s3 import S3Storage - -__all__ = ["S3Storage"] diff --git a/src/artifactapi/storage/s3.py b/src/artifactapi/storage/s3.py deleted file mode 100644 index b2bcbc6..0000000 --- a/src/artifactapi/storage/s3.py +++ /dev/null @@ -1,114 +0,0 @@ -import hashlib -import os - -import boto3 -from botocore.config import Config -from botocore.exceptions import ClientError -from fastapi import HTTPException - - -class S3Storage: - def __init__( - self, - endpoint: str, - access_key: str, - secret_key: str, - bucket: str, - secure: bool = False, - ): - self.endpoint = endpoint - self.access_key = access_key - self.secret_key = secret_key - self.bucket = bucket - self.secure = secure - - ca_bundle = os.environ.get("REQUESTS_CA_BUNDLE") or os.environ.get("SSL_CERT_FILE") - config_kwargs = {"request_checksum_calculation": "when_required", "response_checksum_validation": "when_required"} - client_kwargs = { - "endpoint_url": f"http{'s' if self.secure else ''}://{self.endpoint}", - "aws_access_key_id": self.access_key, - "aws_secret_access_key": self.secret_key, - "config": Config(**config_kwargs), - } - - if ca_bundle and os.path.exists(ca_bundle): - client_kwargs["verify"] = ca_bundle - print(f"Debug: Using CA bundle: {ca_bundle}") - else: - print( - f"Debug: No CA bundle found. REQUESTS_CA_BUNDLE={os.environ.get('REQUESTS_CA_BUNDLE')}, SSL_CERT_FILE={os.environ.get('SSL_CERT_FILE')}" - ) - - self.client = boto3.client("s3", **client_kwargs) - - try: - self._ensure_bucket_exists() - except Exception as e: - print(f"Warning: Could not ensure bucket exists during initialization: {e}") - print("Bucket creation will be attempted on first use") - - def _ensure_bucket_exists(self): - try: - self.client.head_bucket(Bucket=self.bucket) - except ClientError: - self.client.create_bucket(Bucket=self.bucket) - - def get_object_key(self, remote_name: str, path: str) -> str: - clean_path = path.lstrip("/") - filename = os.path.basename(clean_path) - directory_path = os.path.dirname(clean_path) - - # Docker blobs are keyed by digest for deduplication across images - if "/blobs/sha256:" in clean_path: - parts = clean_path.split("/blobs/sha256:") - if len(parts) == 2: - digest = parts[1] - return f"{remote_name}/blobs/sha256/{digest}" - - if directory_path: - path_hash = hashlib.sha256(directory_path.encode()).hexdigest()[:16] - return f"{remote_name}/{path_hash}/{filename}" - else: - return f"{remote_name}/{filename}" - - def exists(self, key: str) -> bool: - try: - self._ensure_bucket_exists() - self.client.head_object(Bucket=self.bucket, Key=key) - return True - except ClientError: - return False - - def upload(self, key: str, data: bytes) -> str: - self._ensure_bucket_exists() - self.client.put_object(Bucket=self.bucket, Key=key, Body=data) - return f"s3://{self.bucket}/{key}" - - def get_url(self, key: str) -> str: - return f"http://{self.endpoint}/{self.bucket}/{key}" - - def get_presigned_url(self, key: str, expiration: int = 3600) -> str: - try: - return self.client.generate_presigned_url( - "get_object", - Params={"Bucket": self.bucket, "Key": key}, - ExpiresIn=expiration, - ) - except Exception: - return self.get_url(key) - - def download_object(self, key: str) -> bytes: - try: - self._ensure_bucket_exists() - response = self.client.get_object(Bucket=self.bucket, Key=key) - return response["Body"].read() - except ClientError: - raise HTTPException(status_code=404, detail="Artifact not found") - - def delete_object(self, key: str) -> bool: - try: - self._ensure_bucket_exists() - self.client.delete_object(Bucket=self.bucket, Key=key) - return True - except ClientError: - return False diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 2659f14..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,220 +0,0 @@ -""" -Pytest configuration and shared fixtures. - -Module-level setup (env vars + connection patches) runs before any test -module is imported, so the FastAPI app initialises against mocks rather -than real S3 / Redis / PostgreSQL services. -""" - -import os -import tempfile -from unittest.mock import MagicMock, patch - -import yaml - -# --------------------------------------------------------------------------- -# Test remote configuration -# --------------------------------------------------------------------------- - -TEST_REMOTES = { - "remotes": { - "alpine-test": { - "base_url": "https://dl-cdn.alpinelinux.org", - "package": "alpine", - "immutable_patterns": [".*/x86_64/.*\\.apk$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, - }, - "rpm-test": { - "base_url": "https://example.com/rpm", - "package": "rpm", - "immutable_patterns": [".*/x86_64/.*\\.rpm$", ".*/repodata/.*$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, - }, - "docker-test": { - "base_url": "https://registry.example.com", - "package": "docker", - "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, - }, - "docker-restricted": { - "base_url": "https://registry.example.com", - "package": "docker", - "immutable_patterns": ["^library/nginx"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, - }, - "docker-bantags-test": { - "base_url": "https://registry.example.com", - "package": "docker", - "ban_tags_enabled": True, - "ban_tags": ["latest", "edge"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, - }, - "generic-test": { - "base_url": "https://releases.example.com", - "package": "generic", - "immutable_patterns": [".*\\.tar\\.gz$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, - }, - "custom-index-test": { - "base_url": "https://example.com", - "package": "generic", - "mutable_patterns": ["metadata\\.json$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "check-mutable-test": { - "base_url": "https://example.com", - "package": "generic", - "mutable_patterns": ["metadata\\.json$"], - "check_mutable_updates": True, - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "pypi-test": { - "base_url": "https://files.pythonhosted.org", - "package": "pypi", - "immutable_patterns": [ - r"packages/.*\.whl$", - r"packages/.*\.whl\.metadata$", - r"packages/.*\.tar\.gz$", - ], - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "npm-test": { - "base_url": "https://registry.npmjs.org", - "package": "npm", - "immutable_patterns": [r"\.tgz$"], - "mutable_patterns": [r"^(?!.*\.tgz$).*"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "helm-test": { - "base_url": "https://helm.releases.hashicorp.com", - "package": "helm", - "immutable_patterns": [r"\.tgz$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 3600}, - }, - "quarantine-test": { - "base_url": "https://releases.example.com", - "package": "generic", - "immutable_patterns": [r".*\.tar\.gz$"], - "quarantine_new": True, - "quarantine_days": 3, - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, - }, - "quarantine-disabled": { - "base_url": "https://releases.example.com", - "package": "generic", - "immutable_patterns": [r".*\.tar\.gz$"], - "quarantine_new": False, - "quarantine_days": 3, - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, - }, - "helm-member-2": { - "base_url": "https://charts.example.com", - "package": "helm", - "immutable_patterns": [r"\.tgz$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 1800}, - }, - "puppet-test": { - "base_url": "https://forgeapi.puppet.com", - "package": "puppet", - "immutable_patterns": [r"^v3/files/.*\.tar\.gz$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 600}, - }, - "terraform-registry-test": { - "base_url": "https://registry.terraform.io", - "package": "terraform", - "immutable_patterns": [ - r"[^/]+/[^/]+/[^/]+/download/[^/]+/[^/]+$", - ], - "releases_remote": "hashicorp-releases-test", - "cache": {"immutable_ttl": 0, "mutable_ttl": 300}, - }, - "hashicorp-releases-test": { - "base_url": "https://releases.hashicorp.com", - "package": "generic", - "immutable_patterns": [r".*\.zip$", r".*SHA256SUMS(\.sig)?$"], - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, - }, - }, - "locals": { - "local-test": { - "package": "generic", - "cache": {"immutable_ttl": 0, "mutable_ttl": 0}, - }, - }, - "virtuals": { - "helm-virtual-test": { - "package": "helm", - "members": ["helm-test", "helm-member-2"], - }, - "unsupported-virtual-test": { - "package": "rpm", - "members": ["rpm-test"], - }, - "empty-virtual-test": { - "package": "helm", - "members": [], - }, - }, -} - -# --------------------------------------------------------------------------- -# Write temp config and set env vars BEFORE importing the package -# --------------------------------------------------------------------------- - -_tmpdir = tempfile.mkdtemp() -_config_path = os.path.join(_tmpdir, "remotes.yaml") -with open(_config_path, "w") as _f: - yaml.dump(TEST_REMOTES, _f) - -os.environ.update( - { - "CONFIG_PATH": _config_path, - "MINIO_ENDPOINT": "localhost:9000", - "MINIO_ACCESS_KEY": "testkey", - "MINIO_SECRET_KEY": "testsecret", - "MINIO_BUCKET": "testbucket", - "REDIS_URL": "redis://localhost:6379/0", - "DBHOST": "localhost", - "DBPORT": "5432", - "DBUSER": "test", - "DBPASS": "test", - "DBNAME": "test", - } -) - -# Patch external service connections before the package is imported. -# These stay active for the whole session (process exits after tests finish). -_boto3_patch = patch("boto3.client", return_value=MagicMock()) -_redis_patch = patch("redis.from_url", return_value=MagicMock()) -_psycopg2_patch = patch("psycopg2.connect", return_value=MagicMock()) -_boto3_patch.start() -_redis_patch.start() -_psycopg2_patch.start() - -# --------------------------------------------------------------------------- -# Shared fixtures -# --------------------------------------------------------------------------- - -import pytest # noqa: E402 -from fastapi.testclient import TestClient # noqa: E402 - - -@pytest.fixture(scope="session") -def app(): - from artifactapi.main import app as fastapi_app - - return fastapi_app - - -@pytest.fixture(scope="session") -def client(app): - return TestClient(app) - - -@pytest.fixture -def config_path(): - return _config_path - - -@pytest.fixture -def test_remotes(): - return TEST_REMOTES diff --git a/tests/test_cache.py b/tests/test_cache.py deleted file mode 100644 index 2c19593..0000000 --- a/tests/test_cache.py +++ /dev/null @@ -1,400 +0,0 @@ -"""Tests for RedisCache, focusing on is_mutable_file with configurable patterns.""" - -import hashlib -from unittest.mock import ANY, MagicMock, patch - -import pytest - -from artifactapi.cache import RedisCache -from artifactapi.config import _PACKAGE_MUTABLE_PATTERNS - - -@pytest.fixture -def bare_cache(): - """RedisCache instance bypassing __init__ (no Redis needed for pure-logic tests).""" - return RedisCache.__new__(RedisCache) - - -@pytest.fixture -def unavailable_cache(): - """RedisCache where Redis is not reachable.""" - with patch("redis.from_url", side_effect=Exception("connection refused")): - return RedisCache("redis://localhost:6379/0") - - -@pytest.fixture -def mock_redis_client(): - return MagicMock() - - -@pytest.fixture -def cache_with_redis(mock_redis_client): - """RedisCache backed by a MagicMock Redis client.""" - with patch("redis.from_url", return_value=mock_redis_client): - c = RedisCache("redis://localhost:6379/0") - c.client = mock_redis_client - c.available = True - return c - - -# --------------------------------------------------------------------------- -# is_mutable_file — alpine patterns -# --------------------------------------------------------------------------- - - -class TestIsMutableFileAlpine: - def test_apkindex_tarball_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz", patterns) - - def test_nested_apkindex_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert bare_cache.is_mutable_file("mirrors/dl-cdn/alpine/v3.19/community/x86_64/APKINDEX.tar.gz", patterns) - - def test_apk_package_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/musl-1.2.4-r2.apk", patterns) - - def test_random_tarball_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert not bare_cache.is_mutable_file("some/path/archive.tar.gz", patterns) - - def test_apkindex_signature_file_is_not_index(self, bare_cache): - # Signature file adjacent to the index should not be treated as an index - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.sig", patterns) - - def test_apkindex_tmp_file_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["alpine"] - assert not bare_cache.is_mutable_file("alpine/v3.18/x86_64/APKINDEX.tar.gz.tmp", patterns) - - -# --------------------------------------------------------------------------- -# is_mutable_file — rpm patterns -# --------------------------------------------------------------------------- - - -class TestIsMutableFileRpm: - def test_repomd_xml_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("almalinux/9/x86_64/repomd.xml", patterns) - - def test_repodata_primary_xml_gz_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("repo/repodata/primary.xml.gz", patterns) - - def test_repodata_sqlite_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("repo/repodata/primary.sqlite", patterns) - - def test_repodata_sqlite_bz2_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("repo/repodata/other.sqlite.bz2", patterns) - - def test_repodata_yaml_xz_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("repo/repodata/comps.yaml.xz", patterns) - - def test_packages_gz_pattern_matches_any_path(self, bare_cache): - # The Packages.gz$ regex is a carryover from the original hardcoded logic and - # deliberately matches any path ending in Packages.gz — including Debian-style paths. - # This test documents that intentional behaviour. - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert bare_cache.is_mutable_file("debian/dists/stable/main/binary-amd64/Packages.gz", patterns) - - def test_rpm_package_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert not bare_cache.is_mutable_file("almalinux/9/x86_64/Packages/bash-5.1.8.x86_64.rpm", patterns) - - def test_arbitrary_xml_outside_repodata_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["rpm"] - assert not bare_cache.is_mutable_file("some/path/config.xml", patterns) - - -# --------------------------------------------------------------------------- -# is_mutable_file — docker patterns -# --------------------------------------------------------------------------- - - -class TestIsMutableFileDocker: - def test_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert bare_cache.is_mutable_file("library/nginx/manifests/latest", patterns) - - def test_version_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert bare_cache.is_mutable_file("library/nginx/manifests/1.25.3", patterns) - - def test_hyphenated_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert bare_cache.is_mutable_file("library/nginx/manifests/latest-rc", patterns) - - def test_numeric_date_tag_manifest_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert bare_cache.is_mutable_file("library/nginx/manifests/20240101", patterns) - - def test_digest_manifest_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - digest = "sha256:" + "a" * 64 - assert not bare_cache.is_mutable_file(f"library/nginx/manifests/{digest}", patterns) - - def test_tags_list_is_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert bare_cache.is_mutable_file("library/nginx/tags/list", patterns) - - def test_blob_is_not_index(self, bare_cache): - patterns = _PACKAGE_MUTABLE_PATTERNS["docker"] - assert not bare_cache.is_mutable_file("library/nginx/blobs/sha256:abc123", patterns) - - -# --------------------------------------------------------------------------- -# is_mutable_file — edge cases -# --------------------------------------------------------------------------- - - -class TestIsMutableFileEdgeCases: - def test_empty_patterns_nothing_is_index(self, bare_cache): - assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", []) - assert not bare_cache.is_mutable_file("repomd.xml", []) - assert not bare_cache.is_mutable_file("library/nginx/manifests/latest", []) - - def test_none_patterns_nothing_is_index(self, bare_cache): - assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", None) - assert not bare_cache.is_mutable_file("repomd.xml", None) - - def test_custom_patterns_match(self, bare_cache): - patterns = [r"metadata\.json$", r"index\.yaml$"] - assert bare_cache.is_mutable_file("repo/metadata.json", patterns) - assert bare_cache.is_mutable_file("repo/subdir/index.yaml", patterns) - assert not bare_cache.is_mutable_file("repo/data.tar.gz", patterns) - - def test_custom_pattern_does_not_match_standard_index(self, bare_cache): - patterns = [r"metadata\.json$"] - assert not bare_cache.is_mutable_file("APKINDEX.tar.gz", patterns) - - -# --------------------------------------------------------------------------- -# get_index_cache_key -# --------------------------------------------------------------------------- - - -class TestGetIndexCacheKey: - def test_key_format_is_deterministic(self, bare_cache): - # Assert against a pre-computed value to pin the hash algorithm, - # truncation length, and format string in one assertion. - path = "alpine/v3.18/x86_64/APKINDEX.tar.gz" - expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - key = bare_cache.get_index_cache_key("alpine-test", path) - assert key == f"index:alpine-test:{expected_hash}" - - def test_different_paths_produce_different_keys(self, bare_cache): - k1 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.18/x86_64/APKINDEX.tar.gz") - k2 = bare_cache.get_index_cache_key("alpine-test", "alpine/v3.19/x86_64/APKINDEX.tar.gz") - assert k1 != k2 - - def test_different_remotes_produce_different_keys(self, bare_cache): - k1 = bare_cache.get_index_cache_key("remote-a", "path/to/APKINDEX.tar.gz") - k2 = bare_cache.get_index_cache_key("remote-b", "path/to/APKINDEX.tar.gz") - assert k1 != k2 - - def test_key_starts_with_index_prefix_and_remote(self, bare_cache): - key = bare_cache.get_index_cache_key("myremote", "some/path") - assert key.startswith("index:myremote:") - - def test_key_hash_segment_is_16_chars(self, bare_cache): - key = bare_cache.get_index_cache_key("myremote", "some/path/file.xml") - # Format: index::<16-char hash> — the fixed length matters for key-space hygiene - parts = key.split(":") - assert len(parts) == 3 - assert len(parts[2]) == 16 - - -# --------------------------------------------------------------------------- -# mark_index_cached / is_index_valid -# --------------------------------------------------------------------------- - - -class TestIndexValidity: - def test_mark_index_cached_calls_setex_with_correct_ttl(self, cache_with_redis, mock_redis_client): - cache_with_redis.mark_index_cached("remote", "path/APKINDEX.tar.gz", 300) - expected_key = cache_with_redis.get_index_cache_key("remote", "path/APKINDEX.tar.gz") - mock_redis_client.setex.assert_called_once_with(expected_key, 300, ANY) - - def test_present_key_is_valid(self, cache_with_redis, mock_redis_client): - mock_redis_client.exists.return_value = 1 - assert cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz") - - def test_missing_key_is_not_valid(self, cache_with_redis, mock_redis_client): - mock_redis_client.exists.return_value = 0 - assert not cache_with_redis.is_index_valid("remote", "path/APKINDEX.tar.gz") - - def test_unavailable_redis_is_not_valid(self, unavailable_cache): - assert not unavailable_cache.is_index_valid("remote", "some/path") - - def test_mark_cached_no_op_when_unavailable(self, unavailable_cache): - # client is None when Redis is unavailable — setex cannot be called - assert unavailable_cache.client is None - unavailable_cache.mark_index_cached("remote", "some/path", 300) # must not raise - - -# --------------------------------------------------------------------------- -# mutable meta (ETag / Last-Modified storage) -# --------------------------------------------------------------------------- - - -class TestMutableMeta: - def test_meta_key_format(self, bare_cache): - path = "repo/metadata.json" - expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - assert bare_cache.get_mutable_meta_key("myremote", path) == f"mutable:meta:myremote:{expected_hash}" - - def test_meta_key_hash_is_16_chars(self, bare_cache): - key = bare_cache.get_mutable_meta_key("remote", "some/path/file.json") - assert len(key.split(":")[-1]) == 16 - - def test_store_and_retrieve_etag(self, cache_with_redis, mock_redis_client): - mock_redis_client.hgetall.return_value = {"etag": '"abc123"'} - cache_with_redis.store_mutable_meta("remote", "path/meta.json", '"abc123"', None) - mock_redis_client.hset.assert_called_once() - meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json") - assert meta["etag"] == '"abc123"' - - def test_store_and_retrieve_last_modified(self, cache_with_redis, mock_redis_client): - lm = "Mon, 01 Jan 2024 00:00:00 GMT" - mock_redis_client.hgetall.return_value = {"last_modified": lm} - cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, lm) - meta = cache_with_redis.get_mutable_meta("remote", "path/meta.json") - assert meta["last_modified"] == lm - - def test_store_no_op_when_both_none(self, cache_with_redis, mock_redis_client): - cache_with_redis.store_mutable_meta("remote", "path/meta.json", None, None) - mock_redis_client.hset.assert_not_called() - - def test_store_no_op_when_unavailable(self, unavailable_cache): - unavailable_cache.store_mutable_meta("remote", "path", "etag", None) # must not raise - - def test_get_returns_empty_when_unavailable(self, unavailable_cache): - assert unavailable_cache.get_mutable_meta("remote", "path") == {} - - def test_delete_removes_meta_key(self, cache_with_redis, mock_redis_client): - expected_key = cache_with_redis.get_mutable_meta_key("remote", "path/meta.json") - cache_with_redis.delete_mutable_meta("remote", "path/meta.json") - mock_redis_client.delete.assert_called_once_with(expected_key) - - def test_delete_no_op_when_unavailable(self, unavailable_cache): - unavailable_cache.delete_mutable_meta("remote", "path") # must not raise - - -# --------------------------------------------------------------------------- -# artifact published date (quarantine support) -# --------------------------------------------------------------------------- - - -class TestArtifactPublished: - def test_key_format_is_deterministic(self, bare_cache): - path = "some/path/package-1.0.tar.gz" - expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - assert bare_cache.get_artifact_published_key("myremote", path) == f"pkg:published:myremote:{expected_hash}" - - def test_key_hash_is_16_chars(self, bare_cache): - key = bare_cache.get_artifact_published_key("remote", "path/to/file.whl") - assert len(key.split(":")[-1]) == 16 - - def test_different_paths_produce_different_keys(self, bare_cache): - k1 = bare_cache.get_artifact_published_key("remote", "pkg-1.0.tar.gz") - k2 = bare_cache.get_artifact_published_key("remote", "pkg-2.0.tar.gz") - assert k1 != k2 - - def test_store_calls_set_with_correct_value(self, cache_with_redis, mock_redis_client): - lm = "Mon, 01 Jan 2024 00:00:00 GMT" - cache_with_redis.store_artifact_published("remote", "path/pkg.tar.gz", lm) - expected_key = cache_with_redis.get_artifact_published_key("remote", "path/pkg.tar.gz") - mock_redis_client.set.assert_called_once_with(expected_key, lm) - - def test_get_returns_stored_value(self, cache_with_redis, mock_redis_client): - lm = "Tue, 15 Mar 2022 12:00:00 GMT" - mock_redis_client.get.return_value = lm - result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz") - assert result == lm - - def test_get_returns_none_when_not_stored(self, cache_with_redis, mock_redis_client): - mock_redis_client.get.return_value = None - result = cache_with_redis.get_artifact_published("remote", "path/pkg.tar.gz") - assert result is None - - def test_store_no_op_when_unavailable(self, unavailable_cache): - unavailable_cache.store_artifact_published("remote", "path", "Mon, 01 Jan 2024 00:00:00 GMT") - - def test_get_returns_none_when_unavailable(self, unavailable_cache): - assert unavailable_cache.get_artifact_published("remote", "path") is None - - -# --------------------------------------------------------------------------- -# fetch lock (thundering-herd deduplication) -# --------------------------------------------------------------------------- - - -class TestFetchLock: - def test_acquire_returns_true_when_lock_obtained(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = True - result = cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest") - assert result is True - - def test_acquire_calls_set_nx_with_ttl(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = True - cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest", ttl=15) - _, kwargs = mock_redis_client.set.call_args - assert kwargs.get("nx") is True - assert kwargs.get("ex") == 15 - - def test_acquire_returns_false_when_lock_already_held(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = None # Redis SET NX → None when key exists - result = cache_with_redis.acquire_fetch_lock("myremote", "library/nginx/manifests/latest") - assert result is False - - def test_acquire_fails_open_when_unavailable(self, unavailable_cache): - # caller must be allowed to proceed when Redis is down - assert unavailable_cache.acquire_fetch_lock("myremote", "some/path") is True - - def test_acquire_fails_open_on_redis_exception(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.side_effect = Exception("connection reset") - assert cache_with_redis.acquire_fetch_lock("myremote", "some/path") is True - - def test_lock_key_embeds_path_hash(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = True - path = "library/nginx/manifests/latest" - cache_with_redis.acquire_fetch_lock("myremote", path) - args, _ = mock_redis_client.set.call_args - expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - assert args[0] == f"fetchlock:myremote:{expected_hash}" - - def test_lock_key_hash_is_16_chars(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = True - cache_with_redis.acquire_fetch_lock("myremote", "some/long/path/file.tar.gz") - args, _ = mock_redis_client.set.call_args - # key format: fetchlock::<16-char hash> - parts = args[0].split(":") - assert len(parts) == 3 - assert len(parts[2]) == 16 - - def test_different_paths_produce_different_lock_keys(self, cache_with_redis, mock_redis_client): - mock_redis_client.set.return_value = True - cache_with_redis.acquire_fetch_lock("myremote", "path/a/manifests/latest") - key_a = mock_redis_client.set.call_args[0][0] - mock_redis_client.set.reset_mock() - cache_with_redis.acquire_fetch_lock("myremote", "path/b/manifests/latest") - key_b = mock_redis_client.set.call_args[0][0] - assert key_a != key_b - - def test_release_deletes_correct_key(self, cache_with_redis, mock_redis_client): - path = "library/nginx/manifests/latest" - cache_with_redis.release_fetch_lock("myremote", path) - expected_hash = hashlib.sha256(path.encode()).hexdigest()[:16] - mock_redis_client.delete.assert_called_once_with(f"fetchlock:myremote:{expected_hash}") - - def test_release_no_op_when_unavailable(self, unavailable_cache): - unavailable_cache.release_fetch_lock("myremote", "some/path") # must not raise - - def test_release_no_op_on_redis_exception(self, cache_with_redis, mock_redis_client): - mock_redis_client.delete.side_effect = Exception("timeout") - cache_with_redis.release_fetch_lock("myremote", "some/path") # must not raise diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index bb719b7..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,540 +0,0 @@ -"""Tests for ConfigManager, focusing on get_mutable_patterns and get_immutable_patterns.""" - -import os - -import pytest -import yaml - -from artifactapi.config import ConfigManager - - -@pytest.fixture -def make_config(tmp_path): - """Factory: write a remotes dict to a temp YAML and return a ConfigManager.""" - - def _make(remotes_dict): - cfg_file = tmp_path / "remotes.yaml" - cfg_file.write_text(yaml.dump({"remotes": remotes_dict})) - return ConfigManager(str(cfg_file)) - - return _make - - -# --------------------------------------------------------------------------- -# get_mutable_patterns -# --------------------------------------------------------------------------- - - -class TestGetMutablePatterns: - def test_alpine_returns_package_defaults(self, make_config): - cfg = make_config({"r": {"package": "alpine", "base_url": "https://x.com"}}) - patterns = cfg.get_mutable_patterns("r") - assert r"APKINDEX\.tar\.gz$" in patterns - - def test_rpm_returns_package_defaults(self, make_config): - cfg = make_config({"r": {"package": "rpm", "base_url": "https://x.com"}}) - patterns = cfg.get_mutable_patterns("r") - assert r"repomd\.xml$" in patterns - assert any("repodata" in p for p in patterns) - - def test_docker_returns_package_defaults(self, make_config): - cfg = make_config({"r": {"package": "docker", "base_url": "https://x.com"}}) - patterns = cfg.get_mutable_patterns("r") - assert any("manifests" in p for p in patterns) - assert any("tags/list" in p for p in patterns) - - def test_generic_returns_empty_list(self, make_config): - cfg = make_config({"r": {"package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_mutable_patterns("r") == [] - - def test_unknown_remote_returns_empty_list(self, make_config): - cfg = make_config({}) - assert cfg.get_mutable_patterns("nonexistent") == [] - - def test_missing_package_field_defaults_to_generic(self, make_config): - cfg = make_config({"r": {"base_url": "https://x.com"}}) - assert cfg.get_mutable_patterns("r") == [] - - def test_unknown_package_type_returns_empty_list(self, make_config): - # A mis-spelled package type silently returns [] — this is a known footgun - cfg = make_config({"r": {"package": "deb", "base_url": "https://x.com"}}) - assert cfg.get_mutable_patterns("r") == [] - - def test_extra_patterns_appended_after_defaults(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "alpine", - "base_url": "https://x.com", - "mutable_patterns": [r"custom\.json$"], - } - } - ) - patterns = cfg.get_mutable_patterns("r") - assert r"APKINDEX\.tar\.gz$" in patterns - assert r"custom\.json$" in patterns - # Defaults come first - assert patterns.index(r"APKINDEX\.tar\.gz$") < patterns.index(r"custom\.json$") - - def test_explicit_empty_extra_patterns_returns_defaults(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "alpine", - "base_url": "https://x.com", - "mutable_patterns": [], - } - } - ) - assert r"APKINDEX\.tar\.gz$" in cfg.get_mutable_patterns("r") - - def test_duplicate_extra_pattern_not_added_twice(self, make_config): - existing = r"APKINDEX\.tar\.gz$" - cfg = make_config( - { - "r": { - "type": "remote", - "package": "alpine", - "base_url": "https://x.com", - "mutable_patterns": [existing], - } - } - ) - patterns = cfg.get_mutable_patterns("r") - assert patterns.count(existing) == 1 - - def test_generic_with_only_extra_patterns(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "mutable_patterns": [r"meta\.json$", r"index\.yaml$"], - } - } - ) - assert cfg.get_mutable_patterns("r") == [r"meta\.json$", r"index\.yaml$"] - - def test_rpm_extra_patterns_merged(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "rpm", - "base_url": "https://x.com", - "mutable_patterns": [r"custom-meta\.xml$"], - } - } - ) - patterns = cfg.get_mutable_patterns("r") - assert r"repomd\.xml$" in patterns - assert r"custom-meta\.xml$" in patterns - - def test_npm_has_no_package_defaults(self, make_config): - cfg = make_config({"r": {"package": "npm", "base_url": "https://x.com"}}) - assert cfg.get_mutable_patterns("r") == [] - - def test_npm_explicit_mutable_pattern_matches_metadata(self, make_config): - import re - - cfg = make_config( - { - "r": { - "type": "remote", - "package": "npm", - "base_url": "https://x.com", - "mutable_patterns": [r"^(?!.*\.tgz$).*"], - } - } - ) - patterns = cfg.get_mutable_patterns("r") - assert any(re.search(p, "express") for p in patterns) - assert any(re.search(p, "@babel/core") for p in patterns) - - def test_helm_returns_index_yaml_as_mutable(self, make_config): - cfg = make_config({"r": {"package": "helm", "base_url": "https://helm.example.com"}}) - patterns = cfg.get_mutable_patterns("r") - assert r"index\.yaml$" in patterns - - def test_helm_chart_tarballs_not_mutable_by_default(self, make_config): - import re - - cfg = make_config({"r": {"package": "helm", "base_url": "https://helm.example.com"}}) - patterns = cfg.get_mutable_patterns("r") - # Only index.yaml is mutable; .tgz chart tarballs are not - assert not any(re.search(p, "vault-0.29.1.tgz") for p in patterns) - assert not any(re.search(p, "consul-1.5.0.tgz") for p in patterns) - - def test_npm_explicit_mutable_pattern_excludes_tarballs(self, make_config): - import re - - cfg = make_config( - { - "r": { - "type": "remote", - "package": "npm", - "base_url": "https://x.com", - "mutable_patterns": [r"^(?!.*\.tgz$).*"], - } - } - ) - patterns = cfg.get_mutable_patterns("r") - assert not any(re.search(p, "express-4.18.2.tgz") for p in patterns) - assert not any(re.search(p, "express/-/express-4.18.2.tgz") for p in patterns) - - -# --------------------------------------------------------------------------- -# get_immutable_patterns -# --------------------------------------------------------------------------- - - -class TestGetImmutablePatterns: - def test_returns_immutable_patterns(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "immutable_patterns": [r".*\.tar\.gz$"], - } - } - ) - assert cfg.get_immutable_patterns("r") == [r".*\.tar\.gz$"] - - def test_returns_empty_for_missing_remote(self, make_config): - cfg = make_config({}) - assert cfg.get_immutable_patterns("nonexistent") == [] - - def test_returns_empty_when_no_patterns_configured(self, make_config): - cfg = make_config({"r": {"package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_immutable_patterns("r") == [] - - def test_multiple_patterns_returned(self, make_config): - patterns = [r".*\.rpm$", r".*/repodata/.*$"] - cfg = make_config( - { - "r": { - "type": "remote", - "package": "rpm", - "base_url": "https://x.com", - "immutable_patterns": patterns, - } - } - ) - assert cfg.get_immutable_patterns("r") == patterns - - def test_dict_keyed_repositories_returns_per_repo_patterns(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "immutable_patterns": [r".*\.tar\.gz$"], - "repositories": { - "/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]}, - }, - } - } - ) - assert cfg.get_immutable_patterns("r", "/path/to/repo") == [r".*\.rpm$"] - - def test_dict_keyed_repositories_falls_back_to_remote_patterns(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "immutable_patterns": [r".*\.tar\.gz$"], - "repositories": { - "/path/to/repo": {"immutable_patterns": [r".*\.rpm$"]}, - }, - } - } - ) - assert cfg.get_immutable_patterns("r", "/unknown/path") == [r".*\.tar\.gz$"] - - -# --------------------------------------------------------------------------- -# get_user_mutable_patterns -# --------------------------------------------------------------------------- - - -class TestGetUserMutablePatterns: - def test_returns_only_user_patterns(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "alpine", - "base_url": "https://x.com", - "mutable_patterns": [r"custom\.json$"], - } - } - ) - assert cfg.get_user_mutable_patterns("r") == [r"custom\.json$"] - - def test_excludes_package_defaults(self, make_config): - # Package defaults (APKINDEX etc.) must NOT appear here - cfg = make_config({"r": {"package": "alpine", "base_url": "https://x.com"}}) - assert cfg.get_user_mutable_patterns("r") == [] - - def test_returns_empty_for_missing_remote(self, make_config): - cfg = make_config({}) - assert cfg.get_user_mutable_patterns("nonexistent") == [] - - def test_returns_empty_when_key_absent(self, make_config): - cfg = make_config({"r": {"package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_user_mutable_patterns("r") == [] - - -# --------------------------------------------------------------------------- -# get_cache_config -# --------------------------------------------------------------------------- - - -class TestGetCacheConfig: - def test_returns_cache_section(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "cache": {"immutable_ttl": 0, "mutable_ttl": 7200}, - } - } - ) - assert cfg.get_cache_config("r") == {"immutable_ttl": 0, "mutable_ttl": 7200} - - def test_returns_empty_dict_for_missing_remote(self, make_config): - cfg = make_config({}) - assert cfg.get_cache_config("nonexistent") == {} - - def test_returns_empty_dict_when_no_cache_key(self, make_config): - cfg = make_config({"r": {"package": "generic", "base_url": "https://x.com"}}) - assert cfg.get_cache_config("r") == {} - - -# --------------------------------------------------------------------------- -# Config file reload -# --------------------------------------------------------------------------- - - -class TestConfigReload: - def test_reloads_when_file_mtime_advances(self, tmp_path): - cfg_file = tmp_path / "remotes.yaml" - cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"package": "generic", "base_url": "https://x.com"}}})) - cfg = ConfigManager(str(cfg_file)) - assert "repo-a" in cfg.config["remotes"] - - cfg_file.write_text(yaml.dump({"remotes": {"repo-b": {"package": "generic", "base_url": "https://y.com"}}})) - future_mtime = cfg._last_modified + 1 - os.utime(str(cfg_file), (future_mtime, future_mtime)) - - cfg._check_reload() - - assert "repo-b" in cfg.config["remotes"] - assert "repo-a" not in cfg.config["remotes"] - - def test_no_reload_when_file_unchanged(self, tmp_path): - cfg_file = tmp_path / "remotes.yaml" - cfg_file.write_text(yaml.dump({"remotes": {"repo-a": {"package": "generic", "base_url": "https://x.com"}}})) - cfg = ConfigManager(str(cfg_file)) - - # Call check_reload without touching the file — should not reload - cfg._check_reload() - - assert "repo-a" in cfg.config["remotes"] - - -# --------------------------------------------------------------------------- -# get_quarantine_config -# --------------------------------------------------------------------------- - - -class TestGetQuarantineConfig: - def test_returns_false_zero_when_not_configured(self, make_config): - cfg = make_config({"r": {"package": "generic", "base_url": "https://x.com"}}) - enabled, days = cfg.get_quarantine_config("r") - assert enabled is False - assert days == 0 - - def test_returns_false_zero_for_missing_remote(self, make_config): - cfg = make_config({}) - enabled, days = cfg.get_quarantine_config("nonexistent") - assert enabled is False - assert days == 0 - - def test_enabled_true_and_days_returned(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "quarantine_new": True, - "quarantine_days": 7, - } - } - ) - enabled, days = cfg.get_quarantine_config("r") - assert enabled is True - assert days == 7 - - def test_quarantine_new_false_returns_disabled(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "quarantine_new": False, - "quarantine_days": 7, - } - } - ) - enabled, days = cfg.get_quarantine_config("r") - assert enabled is False - assert days == 7 - - def test_enabled_with_zero_days_returns_zero(self, make_config): - cfg = make_config( - { - "r": { - "type": "remote", - "package": "generic", - "base_url": "https://x.com", - "quarantine_new": True, - "quarantine_days": 0, - } - } - ) - enabled, days = cfg.get_quarantine_config("r") - assert enabled is True - assert days == 0 - - -# --------------------------------------------------------------------------- -# Directory mode (CONFIG_PATH points to a directory) -# --------------------------------------------------------------------------- - - -def _remote(base_url: str = "https://x.com") -> dict: - return {"package": "generic", "base_url": base_url} - - -class TestConfigDirMode: - def test_loads_all_yaml_files(self, tmp_path): - (tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}})) - (tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}})) - cfg = ConfigManager(str(tmp_path)) - assert "repo-a" in cfg.config["remotes"] - assert "repo-b" in cfg.config["remotes"] - - def test_later_file_overrides_earlier_on_same_key(self, tmp_path): - (tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://first.com")}})) - (tmp_path / "b.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://second.com")}})) - cfg = ConfigManager(str(tmp_path)) - assert cfg.config["remotes"]["r"]["base_url"] == "https://second.com" - - def test_empty_directory_returns_empty_remotes(self, tmp_path): - cfg = ConfigManager(str(tmp_path)) - assert cfg.config == {"remotes": {}, "virtuals": {}, "locals": {}} - - def test_ignores_non_yaml_files(self, tmp_path): - (tmp_path / "notes.txt").write_text("not yaml") - (tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}})) - cfg = ConfigManager(str(tmp_path)) - assert list(cfg.config["remotes"].keys()) == ["repo-a"] - - def test_reload_picks_up_new_file(self, tmp_path): - (tmp_path / "a.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}})) - cfg = ConfigManager(str(tmp_path)) - assert "repo-a" in cfg.config["remotes"] - assert "repo-b" not in cfg.config["remotes"] - - new_file = tmp_path / "b.yaml" - new_file.write_text(yaml.dump({"remotes": {"repo-b": _remote("https://y.com")}})) - future_mtime = cfg._last_modified + 1 - os.utime(str(new_file), (future_mtime, future_mtime)) - - cfg._check_reload() - - assert "repo-a" in cfg.config["remotes"] - assert "repo-b" in cfg.config["remotes"] - - -# --------------------------------------------------------------------------- -# config_dir key (main file contains a config_dir pointer) -# --------------------------------------------------------------------------- - - -class TestConfigDirKey: - def test_merges_remotes_from_config_dir(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - (conf_d / "remotes.yaml").write_text(yaml.dump({"remotes": {"repo-extra": _remote("https://extra.com")}})) - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}})) - cfg = ConfigManager(str(main)) - assert "repo-main" in cfg.config["remotes"] - assert "repo-extra" in cfg.config["remotes"] - - def test_relative_config_dir_resolved_from_main_file(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - (conf_d / "r.yaml").write_text(yaml.dump({"remotes": {"repo-a": _remote()}})) - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": "conf.d", "remotes": {}})) - cfg = ConfigManager(str(main)) - assert "repo-a" in cfg.config["remotes"] - - def test_config_dir_key_not_present_in_loaded_config(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}})) - cfg = ConfigManager(str(main)) - assert "config_dir" not in cfg.config - - def test_dir_remote_overrides_main_file_remote(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - (conf_d / "override.yaml").write_text(yaml.dump({"remotes": {"r": _remote("https://new.com")}})) - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"r": _remote("https://old.com")}})) - cfg = ConfigManager(str(main)) - assert cfg.config["remotes"]["r"]["base_url"] == "https://new.com" - - def test_empty_config_dir_uses_main_file_only(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {"repo-main": _remote()}})) - cfg = ConfigManager(str(main)) - assert list(cfg.config["remotes"].keys()) == ["repo-main"] - - def test_reload_picks_up_changed_dir_file(self, tmp_path): - conf_d = tmp_path / "conf.d" - conf_d.mkdir() - dir_file = conf_d / "r.yaml" - dir_file.write_text(yaml.dump({"remotes": {"repo-v1": _remote()}})) - main = tmp_path / "config.yaml" - main.write_text(yaml.dump({"config_dir": str(conf_d), "remotes": {}})) - cfg = ConfigManager(str(main)) - assert "repo-v1" in cfg.config["remotes"] - - dir_file.write_text(yaml.dump({"remotes": {"repo-v2": _remote("https://v2.com")}})) - future_mtime = cfg._last_modified + 1 - os.utime(str(dir_file), (future_mtime, future_mtime)) - - cfg._check_reload() - - assert "repo-v2" in cfg.config["remotes"] - assert "repo-v1" not in cfg.config["remotes"] diff --git a/tests/test_docker_auth.py b/tests/test_docker_auth.py deleted file mode 100644 index 77bf48d..0000000 --- a/tests/test_docker_auth.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Tests for docker_auth: WWW-Authenticate parsing and token caching.""" - -import time -from unittest.mock import AsyncMock, MagicMock, patch - -import httpx -import pytest - -from artifactapi import docker_auth -from artifactapi.docker_auth import ( - _cache_key, - _get_cached_token, - _store_token, - fetch_token, - get_docker_token_for_response, - parse_www_authenticate, -) - - -@pytest.fixture(autouse=True) -def clear_token_cache(): - """Isolate tests: wipe the module-level token cache before and after each test.""" - docker_auth._token_cache.clear() - yield - docker_auth._token_cache.clear() - - -# --------------------------------------------------------------------------- -# parse_www_authenticate -# --------------------------------------------------------------------------- - - -class TestParseWwwAuthenticate: - def test_full_bearer_header(self): - header = 'Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/nginx:pull"' - result = parse_www_authenticate(header) - assert result is not None - realm, service, scope = result - assert realm == "https://auth.docker.io/token" - assert service == "registry.docker.io" - assert scope == "repository:library/nginx:pull" - - def test_realm_only(self): - header = 'Bearer realm="https://auth.example.com/token"' - result = parse_www_authenticate(header) - assert result is not None - realm, service, scope = result - assert realm == "https://auth.example.com/token" - assert service == "" - assert scope == "" - - def test_realm_and_service_only(self): - header = 'Bearer realm="https://auth.example.com",service="registry.example.com"' - result = parse_www_authenticate(header) - assert result is not None - _, service, scope = result - assert service == "registry.example.com" - assert scope == "" - - def test_invalid_scheme_returns_none(self): - assert parse_www_authenticate('Basic realm="example"') is None - - def test_empty_header_returns_none(self): - assert parse_www_authenticate("") is None - - def test_case_insensitive_bearer_parses_realm(self): - header = 'bearer realm="https://auth.example.com/token"' - result = parse_www_authenticate(header) - assert result is not None - realm, _, _ = result - assert realm == "https://auth.example.com/token" - - def test_field_order_scope_before_service_drops_service(self): - # The regex requires realm,service,scope order; scope before service - # results in service being silently dropped. This test documents the known limitation. - header = 'Bearer realm="https://auth.example.com",scope="repo:pull",service="svc"' - result = parse_www_authenticate(header) - assert result is not None - realm, service, scope = result - assert realm == "https://auth.example.com" - assert scope == "repo:pull" - assert service == "" # silently dropped when out of order - - -# --------------------------------------------------------------------------- -# _cache_key -# --------------------------------------------------------------------------- - - -class TestCacheKey: - def test_key_contains_all_components(self): - key = _cache_key("https://realm.com", "svc", "scope", "user") - assert "https://realm.com" in key - assert "svc" in key - assert "scope" in key - assert "user" in key - - def test_none_username_uses_empty_string(self): - key = _cache_key("https://realm.com", "svc", "scope", None) - assert key.endswith("|") - - def test_different_services_give_different_keys(self): - k1 = _cache_key("realm", "svc1", "scope", None) - k2 = _cache_key("realm", "svc2", "scope", None) - assert k1 != k2 - - def test_different_scopes_give_different_keys(self): - k1 = _cache_key("realm", "svc", "scope:read", None) - k2 = _cache_key("realm", "svc", "scope:write", None) - assert k1 != k2 - - def test_pipe_in_field_value_can_collide_with_adjacent_fields(self): - # The "|" separator is not escaped, so a pipe embedded in one field - # produces the same key as the same pipe appearing as a separator boundary. - # This is a known limitation: _cache_key("a|b","c","d",None) == - # _cache_key("a","b|c","d",None). Documents the behaviour, not a claim it's correct. - k1 = _cache_key("a|b", "c", "d", None) - k2 = _cache_key("a", "b|c", "d", None) - assert k1 == k2 - - -# --------------------------------------------------------------------------- -# _get_cached_token / _store_token -# --------------------------------------------------------------------------- - - -class TestTokenCaching: - def test_get_returns_none_when_not_cached(self): - assert _get_cached_token("no-such-key") is None - - def test_get_returns_token_when_valid(self): - _store_token("mykey", "tok-abc", 300) - assert _get_cached_token("mykey") == "tok-abc" - - def test_get_returns_none_when_expired(self): - docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1) - assert _get_cached_token("mykey") is None - - def test_expired_entry_is_removed_from_cache(self): - docker_auth._token_cache["mykey"] = ("old-token", time.time() - 1) - _get_cached_token("mykey") - assert "mykey" not in docker_auth._token_cache - - def test_store_expires_30s_before_stated_time(self): - before = time.time() - _store_token("mykey", "tok", 100) - _, expires_at = docker_auth._token_cache["mykey"] - # expires_in - 30 = 70; allow ±2 s clock wiggle - assert before + 68 <= expires_at <= before + 72 - - def test_store_enforces_minimum_10s_expiry(self): - before = time.time() - _store_token("mykey", "tok", 5) # expires_in - 30 would be negative - _, expires_at = docker_auth._token_cache["mykey"] - assert expires_at >= before + 10 - - -# --------------------------------------------------------------------------- -# fetch_token (async, mocks httpx) -# --------------------------------------------------------------------------- - - -def _make_mock_http_client(token_payload: dict): - mock_response = MagicMock() - mock_response.raise_for_status = MagicMock() - mock_response.json.return_value = token_payload - - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - - ctx = MagicMock() - ctx.__aenter__ = AsyncMock(return_value=mock_client) - ctx.__aexit__ = AsyncMock(return_value=False) - return ctx, mock_client - - -class TestFetchToken: - async def test_returns_token_field(self): - ctx, _ = _make_mock_http_client({"token": "bearer-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token == "bearer-tok" - - async def test_falls_back_to_access_token_field(self): - ctx, _ = _make_mock_http_client({"access_token": "access-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token == "access-tok" - - async def test_returns_none_when_response_missing_token_field(self): - ctx, _ = _make_mock_http_client({"not_token": "value", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token is None - - async def test_defaults_expires_in_to_300_when_missing(self): - ctx, _ = _make_mock_http_client({"token": "tok"}) # no expires_in key - before = time.time() - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token == "tok" - key = _cache_key("https://auth.example.com", "svc", "scope", None) - _, expires_at = docker_auth._token_cache[key] - # Default expires_in=300, stored as time.time() + max(300-30, 10) = 270 - assert before + 268 <= expires_at <= before + 272 - - async def test_uses_cache_on_second_call_without_http(self): - ctx, mock_client = _make_mock_http_client({"token": "cached-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - await fetch_token("https://auth.example.com", "svc", "scope") - mock_client.get.reset_mock() - token = await fetch_token("https://auth.example.com", "svc", "scope") - mock_client.get.assert_not_called() - assert token == "cached-tok" - - async def test_returns_none_on_network_error(self): - mock_client = AsyncMock() - mock_client.get = AsyncMock(side_effect=Exception("connection refused")) - ctx = MagicMock() - ctx.__aenter__ = AsyncMock(return_value=mock_client) - ctx.__aexit__ = AsyncMock(return_value=False) - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token is None - - async def test_returns_none_on_http_status_error(self): - mock_response = MagicMock() - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError("401 Unauthorized", request=MagicMock(), response=MagicMock()) - mock_client = AsyncMock() - mock_client.get = AsyncMock(return_value=mock_response) - ctx = MagicMock() - ctx.__aenter__ = AsyncMock(return_value=mock_client) - ctx.__aexit__ = AsyncMock(return_value=False) - with patch("httpx.AsyncClient", return_value=ctx): - token = await fetch_token("https://auth.example.com", "svc", "scope") - assert token is None - - async def test_passes_credentials_as_auth_tuple(self): - ctx, mock_client = _make_mock_http_client({"token": "authed-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - await fetch_token("https://auth.example.com", "svc", "scope", "user", "pass") - call_kwargs = mock_client.get.call_args.kwargs - assert call_kwargs.get("auth") == ("user", "pass") - - async def test_no_auth_when_no_credentials(self): - ctx, mock_client = _make_mock_http_client({"token": "anon-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - await fetch_token("https://auth.example.com", "svc", "scope") - call_kwargs = mock_client.get.call_args.kwargs - assert call_kwargs.get("auth") is None - - -# --------------------------------------------------------------------------- -# get_docker_token_for_response -# --------------------------------------------------------------------------- - - -class TestGetDockerTokenForResponse: - async def test_returns_none_for_non_bearer_header(self): - token = await get_docker_token_for_response('Basic realm="example"') - assert token is None - - async def test_end_to_end_parse_and_fetch(self): - """parse_www_authenticate → fetch_token wired together end-to-end.""" - header = 'Bearer realm="https://auth.example.com",service="svc",scope="repo:pull"' - ctx, mock_client = _make_mock_http_client({"token": "e2e-tok", "expires_in": 300}) - with patch("httpx.AsyncClient", return_value=ctx): - token = await get_docker_token_for_response(header, "user", "pass") - assert token == "e2e-tok" - call_kwargs = mock_client.get.call_args.kwargs - assert call_kwargs["params"]["service"] == "svc" - assert call_kwargs["params"]["scope"] == "repo:pull" - assert call_kwargs["auth"] == ("user", "pass") diff --git a/tests/test_routes.py b/tests/test_routes.py deleted file mode 100644 index 6195e59..0000000 --- a/tests/test_routes.py +++ /dev/null @@ -1,1528 +0,0 @@ -"""FastAPI route tests using TestClient with mocked service dependencies.""" - -import hashlib -import json -from datetime import UTC -from unittest.mock import ANY, AsyncMock, MagicMock, patch - -import pytest - -# --------------------------------------------------------------------------- -# Per-test service mocks (replace module-level globals in main.py) -# --------------------------------------------------------------------------- - - -@pytest.fixture -def mock_storage(): - m = MagicMock() - m.get_object_key.return_value = "test-remote/abc123/file.ext" - m.exists.return_value = False - m.download_object.return_value = b"fake content" - m.bucket = "testbucket" - m.client = MagicMock() - return m - - -@pytest.fixture -def mock_cache(): - m = MagicMock() - m.is_mutable_file.return_value = False - m.is_index_valid.return_value = True - m.available = False - m.client = None - return m - - -@pytest.fixture -def mock_database(): - m = MagicMock() - m.available = False - return m - - -@pytest.fixture -def mock_metrics(): - return MagicMock() - - -@pytest.fixture -def patched_deps(mock_storage, mock_cache, mock_database, mock_metrics): - """Swap the module-level service instances in main.py for the duration of a test.""" - import artifactapi.main as main_mod - - with ( - patch.object(main_mod, "storage", mock_storage), - patch.object(main_mod, "cache", mock_cache), - patch.object(main_mod, "database", mock_database), - patch.object(main_mod, "metrics", mock_metrics), - ): - yield { - "storage": mock_storage, - "cache": mock_cache, - "database": mock_database, - "metrics": mock_metrics, - } - - -# --------------------------------------------------------------------------- -# Basic / health endpoints -# --------------------------------------------------------------------------- - - -class TestBasicEndpoints: - def test_root_returns_remote_list(self, client): - response = client.get("/") - assert response.status_code == 200 - data = response.json() - assert "remotes" in data - assert isinstance(data["remotes"], list) - assert len(data["remotes"]) > 0 - - def test_root_contains_version(self, client): - response = client.get("/") - assert "version" in response.json() - - def test_health_check(self, client): - response = client.get("/health") - assert response.status_code == 200 - assert response.json()["status"] == "healthy" - - def test_docker_v2_ping(self, client): - response = client.get("/v2/") - assert response.status_code == 200 - assert response.headers.get("Docker-Distribution-Api-Version") == "registry/2.0" - assert response.json() == {} - - -# --------------------------------------------------------------------------- -# Docker proxy /v2/{remote}/{path} -# --------------------------------------------------------------------------- - - -class TestDockerProxy: - def test_unknown_remote_returns_404(self, client, patched_deps): - response = client.get("/v2/no-such-remote/library/nginx/manifests/latest") - assert response.status_code == 404 - - def test_non_docker_package_returns_400(self, client, patched_deps): - # alpine-test is package: alpine, not docker - response = client.get("/v2/alpine-test/library/nginx/manifests/latest") - assert response.status_code == 400 - - def test_pattern_blocked_returns_403(self, client, patched_deps): - # docker-restricted allows only "library/nginx" - response = client.get("/v2/docker-restricted/library/ubuntu/manifests/latest") - assert response.status_code == 403 - - def test_allowed_pattern_proceeds_to_cache(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-restricted/library/nginx/manifests/latest") - assert response.status_code == 200 - - def test_cache_hit_manifest_returns_correct_content_type(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.docker.distribution.manifest.v2+json", - "schemaVersion": 2, - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - assert response.status_code == 200 - ct = response.headers["content-type"] - assert ct.startswith("application/vnd.docker.distribution.manifest.v2+json") - - def test_cache_hit_sets_docker_content_digest_header(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - expected = f"sha256:{hashlib.sha256(manifest).hexdigest()}" - assert response.headers["Docker-Content-Digest"] == expected - - def test_cache_hit_records_metrics(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = False - - client.get("/v2/docker-test/library/nginx/manifests/latest") - deps["metrics"].record_cache_hit.assert_called_once_with("docker-test", ANY) - - def test_head_request_returns_no_body(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = False - - response = client.head("/v2/docker-test/library/nginx/manifests/latest") - assert response.status_code == 200 - assert response.content == b"" - - def test_cache_miss_calls_upstream_fetch(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - - def test_cache_miss_on_index_marks_index_cached(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ): - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_called_once() - - def test_index_expired_triggers_refetch(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps( - { - "mediaType": "application/vnd.oci.image.manifest.v1+json", - "layers": [], - } - ).encode() - deps["storage"].exists.return_value = True # cached in S3 - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False # but TTL expired - deps["storage"].download_object.return_value = manifest - - with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=True): - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - - # --- Issue 1: sha256 digest cross-linking --- - - def test_tag_manifest_is_stored_under_digest_key_on_cache_hit(self, client, patched_deps): - # When serving a cached tag manifest the handler must also write the content - # under the sha256 digest key so subsequent sha256-addressed pulls hit cache. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - # First exists call (tag manifest): hit. Second (digest key): miss → triggers upload. - deps["storage"].exists.side_effect = [True, False] - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-test/library/nginx/manifests/v1.25.3") - - assert response.status_code == 200 - deps["storage"].upload.assert_called_once_with(deps["storage"].get_object_key.return_value, manifest) - - def test_tag_manifest_digest_key_not_written_when_already_exists(self, client, patched_deps): - # When the digest key already exists in storage upload must not be called. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - # Both the tag key and the digest key already present. - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - client.get("/v2/docker-test/library/nginx/manifests/v1.25.3") - - deps["storage"].upload.assert_not_called() - - def test_sha256_manifest_request_is_not_cross_linked(self, client, patched_deps): - # sha256-addressed manifests are immutable — the cross-link logic must not apply. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = False # sha256 manifest is immutable - - with patch("artifactapi.artifact.proxy._fetch_last_modified", new_callable=AsyncMock, return_value=None): - client.get("/v2/docker-test/library/nginx/manifests/sha256:" + "a" * 64) - - deps["storage"].upload.assert_not_called() - - # --- Issue 2: thundering herd distributed lock --- - - def test_lock_acquired_and_released_on_upstream_fetch(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.side_effect = [False, False] # initial miss; digest key also absent - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].acquire_fetch_lock.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ): - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - deps["cache"].acquire_fetch_lock.assert_called_once() - deps["cache"].release_fetch_lock.assert_called_once() - assert response.status_code == 200 - - def test_lock_released_even_when_fetch_returns_error(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["cache"].is_mutable_file.return_value = True - deps["cache"].acquire_fetch_lock.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "error", "error": "upstream down"}, - ): - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - deps["cache"].release_fetch_lock.assert_called_once() - assert response.status_code == 502 - - def test_thundering_herd_polls_storage_when_lock_not_acquired(self, client, patched_deps): - # When the lock is held by another pod the handler must poll storage and serve - # from cache once the competing fetch completes, without issuing its own upstream request. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - # Initial cache check: miss. First poll iteration: another pod has written it. - # Third call is for the digest cross-link check (is_mutable=True path); digest key exists. - deps["storage"].exists.side_effect = [False, True, True] - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - deps["cache"].acquire_fetch_lock.return_value = False # lock held by peer - - with patch("artifactapi.artifact.docker.asyncio.sleep", new_callable=AsyncMock): - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - ) as mock_fetch: - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - mock_fetch.assert_not_called() - assert response.status_code == 200 - - def test_thundering_herd_falls_through_to_fetch_if_poll_times_out(self, client, patched_deps): - # If the item never appears in storage during the poll window the handler must - # still issue its own upstream fetch as a fallback. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - # All exists calls return False — item never appears during polling. - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].acquire_fetch_lock.return_value = False # lock held by peer - - with patch("artifactapi.artifact.docker.asyncio.sleep", new_callable=AsyncMock): - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - - -# --------------------------------------------------------------------------- -# Docker ban_tags feature -# --------------------------------------------------------------------------- - - -class TestDockerBanTags: - def test_banned_tag_returns_403(self, client, patched_deps): - response = client.get("/v2/docker-bantags-test/library/nginx/manifests/latest") - assert response.status_code == 403 - assert "latest" in response.json()["detail"] - - def test_second_banned_tag_returns_403(self, client, patched_deps): - response = client.get("/v2/docker-bantags-test/library/nginx/manifests/edge") - assert response.status_code == 403 - assert "edge" in response.json()["detail"] - - def test_allowed_tag_proceeds(self, client, patched_deps): - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-bantags-test/library/nginx/manifests/1.25.3") - assert response.status_code == 200 - - def test_digest_pull_bypasses_ban(self, client, patched_deps): - # sha256-addressed pulls must never be blocked by the tag ban list - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = False - - digest = "sha256:" + "a" * 64 - with patch("artifactapi.artifact.proxy._fetch_last_modified", new_callable=AsyncMock, return_value=None): - response = client.get(f"/v2/docker-bantags-test/library/nginx/manifests/{digest}") - assert response.status_code == 200 - - def test_ban_tags_disabled_by_default(self, client, patched_deps): - # docker-test has no ban_tags_enabled — "latest" must pass through - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-test/library/nginx/manifests/latest") - assert response.status_code == 200 - - def test_ban_tags_enabled_but_empty_list_allows_all(self, client, patched_deps): - # If ban_tags_enabled is true but ban_tags is empty nothing should be blocked. - # docker-test doesn't have ban_tags_enabled, but we can verify via the - # docker-bantags-test remote with an unlisted tag. - deps = patched_deps - manifest = json.dumps({"mediaType": "application/vnd.oci.image.manifest.v1+json", "layers": []}).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = manifest - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/v2/docker-bantags-test/library/nginx/manifests/stable") - assert response.status_code == 200 - - def test_ban_check_does_not_apply_to_blobs(self, client, patched_deps): - # Blob paths don't contain /manifests/ — the ban check must not interfere - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"\x00" * 100 - deps["cache"].is_mutable_file.return_value = False - - with patch("artifactapi.artifact.proxy._fetch_last_modified", new_callable=AsyncMock, return_value=None): - response = client.get("/v2/docker-bantags-test/library/nginx/blobs/sha256:" + "b" * 64) - assert response.status_code == 200 - - -# --------------------------------------------------------------------------- -# Generic artifact route /api/v1/remote/{remote}/{path} -# --------------------------------------------------------------------------- - - -class TestGenericArtifactRoute: - def test_unknown_remote_returns_404(self, client, patched_deps): - response = client.get("/api/v1/remote/nonexistent/path/to/file.tar.gz") - assert response.status_code == 404 - - def test_pattern_blocked_returns_403(self, client, patched_deps): - # generic-test only allows .tar.gz - response = client.get("/api/v1/remote/generic-test/some/path/file.rpm") - assert response.status_code == 403 - - def test_cache_hit_returns_200_with_source_header(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"tar content" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - assert response.status_code == 200 - assert response.headers["X-Artifact-Source"] == "cache" - assert response.content == b"tar content" - - def test_cache_hit_sets_content_disposition(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - disposition = response.headers["content-disposition"] - assert "attachment" in disposition - assert "archive.tar.gz" in disposition - - def test_cache_hit_sets_artifact_size_header(self, client, patched_deps): - deps = patched_deps - content = b"some artifact content bytes" - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = content - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - assert response.headers["X-Artifact-Size"] == str(len(content)) - - def test_cache_hit_records_metrics(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - deps["metrics"].record_cache_hit.assert_called_once_with("generic-test", ANY) - - def test_cache_hit_records_artifact_mapping(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - deps["database"].record_artifact_mapping.assert_called_once() - - def test_cache_hit_rpm_returns_correct_content_type(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"rpm bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/rpm-test/almalinux/9/x86_64/bash-5.1.8.x86_64.rpm") - assert response.status_code == 200 - assert "application/x-rpm" in response.headers["content-type"] - - def test_cache_hit_xml_returns_correct_content_type(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/rpm-test/repo/repodata/primary.xml") - assert response.status_code == 200 - assert "application/xml" in response.headers["content-type"] - - def test_cache_miss_fetches_upstream_and_returns_200(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"fresh content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - assert response.headers["X-Artifact-Source"] == "remote" - - def test_cache_miss_records_metrics(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"fresh content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ): - client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - - deps["metrics"].record_cache_miss.assert_called_once_with("generic-test", ANY) - - def test_cache_miss_on_index_marks_index_cached(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"APKINDEX content" - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ): - response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz") - - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_called_once() - - def test_upstream_error_returns_502(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "error", "error": "upstream unreachable"}, - ): - response = client.get("/api/v1/remote/generic-test/some/path/archive.tar.gz") - - assert response.status_code == 502 - - def test_mutable_file_bypasses_immutable_patterns(self, client, patched_deps): - """Mutable files must be served even when they don't match immutable_patterns.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"APKINDEX content" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - # APKINDEX.tar.gz does not match alpine-test's immutable_patterns (.*.apk$), - # but since is_mutable_file returns True it must be allowed through. - response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz") - assert response.status_code == 200 - - def test_mutable_unchanged_refreshes_ttl_without_redownload(self, client, patched_deps): - """When check_mutable_updates=True and upstream says 304, TTL is refreshed in place.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"metadata content" - # File is mutable and its TTL has expired - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} - - with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=False): - response = client.get("/api/v1/remote/check-mutable-test/metadata.json") - - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_called() - # S3 object must NOT have been deleted (no re-download) - deps["storage"].client.delete_object.assert_not_called() - - def test_mutable_changed_triggers_redownload(self, client, patched_deps): - """When check_mutable_updates=True and upstream says 200, cache is invalidated.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} - - with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True): - with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache: - mock_cache.return_value = {"status": "error", "error": "upstream gone"} - response = client.get("/api/v1/remote/check-mutable-test/metadata.json") - - assert response.status_code == 502 - - def test_mutable_changed_redownloads_successfully(self, client, patched_deps): - """When check_mutable_updates=True and upstream says 200, fresh copy is fetched and served.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"fresh metadata" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} - - with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock, return_value=True): - with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache: - mock_cache.return_value = {"status": "cached", "etag": '"def"', "last_modified": None} - response = client.get("/api/v1/remote/check-mutable-test/metadata.json") - - assert response.status_code == 200 - mock_cache.assert_called_once() - - def test_mutable_backend_unreachable_on_check_updates_keeps_stale(self, client, patched_deps): - """When check_mutable_updates=True and backend is unreachable, stale copy is kept and TTL refreshed.""" - from artifactapi.artifact.proxy import UpstreamUnreachable - - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"stale metadata" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - deps["cache"].get_mutable_meta.return_value = {"etag": '"abc"'} - - with patch("artifactapi.artifact.proxy.check_upstream_changed", side_effect=UpstreamUnreachable("connection refused")): - response = client.get("/api/v1/remote/check-mutable-test/metadata.json") - - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_called() - deps["storage"].client.delete_object.assert_not_called() - - def test_mutable_backend_unreachable_on_expiry_keeps_stale(self, client, patched_deps): - """When a regular mutable file expires and backend is unreachable, stale copy is kept and TTL refreshed.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"stale APKINDEX" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - - with patch("artifactapi.artifact.proxy._upstream_reachable", new_callable=AsyncMock, return_value=False): - response = client.get("/api/v1/remote/alpine-test/alpine/v3.18/x86_64/APKINDEX.tar.gz") - - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_called() - deps["storage"].client.delete_object.assert_not_called() - - def test_mutable_flag_off_skips_conditional_check(self, client, patched_deps): - """When check_mutable_updates is not set, expired mutable files are always re-fetched.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = False - - with patch("artifactapi.artifact.proxy.check_upstream_changed", new_callable=AsyncMock) as mock_check: - with patch("artifactapi.artifact.proxy.cache_single_artifact", new_callable=AsyncMock) as mock_cache: - mock_cache.return_value = {"status": "error", "error": "upstream gone"} - client.get("/api/v1/remote/custom-index-test/metadata.json") - - mock_check.assert_not_called() - - def test_local_repo_file_not_found_returns_404(self, client, patched_deps): - deps = patched_deps - deps["database"].get_local_file_metadata.return_value = None - deps["database"].available = True - - response = client.get("/api/v1/local/local-test/path/to/nonexistent.bin") - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# Upload route PUT /api/v1/local/{local}/{path} -# --------------------------------------------------------------------------- - - -class TestUploadRoute: - def test_unknown_local_returns_404(self, client, patched_deps): - response = client.put( - "/api/v1/local/nonexistent/path/to/file.tar.gz", - files={"file": ("file.tar.gz", b"content", "application/octet-stream")}, - ) - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# HEAD route HEAD /api/v1/local/{local}/{path} -# --------------------------------------------------------------------------- - - -class TestHeadRoute: - def test_local_repo_file_not_found_returns_404(self, client, patched_deps): - deps = patched_deps - deps["database"].get_local_file_metadata.return_value = None - deps["database"].available = True - - response = client.head("/api/v1/local/local-test/path/to/nonexistent.bin") - assert response.status_code == 404 - - def test_unknown_local_returns_404(self, client, patched_deps): - response = client.head("/api/v1/local/nonexistent/path/to/file.bin") - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# DELETE route DELETE /api/v1/local/{local}/{path} -# --------------------------------------------------------------------------- - - -class TestDeleteRoute: - def test_unknown_local_returns_404(self, client, patched_deps): - response = client.delete("/api/v1/local/nonexistent/path/to/file.tar.gz") - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# Cache flush PUT /cache/flush -# --------------------------------------------------------------------------- - - -class TestCacheFlushEndpoint: - def test_flush_all_returns_flushed_structure(self, client, patched_deps): - deps = patched_deps - deps["cache"].available = False - deps["storage"].client.list_objects_v2.return_value = {} - - response = client.put("/cache/flush") - assert response.status_code == 200 - data = response.json() - assert "flushed" in data - assert "redis_keys" in data["flushed"] - assert "s3_objects" in data["flushed"] - - def test_flush_specific_remote_echoes_remote(self, client, patched_deps): - deps = patched_deps - deps["cache"].available = False - deps["storage"].client.list_objects_v2.return_value = {} - - response = client.put("/cache/flush?remote=alpine-test") - assert response.status_code == 200 - assert response.json()["remote"] == "alpine-test" - - def test_flush_all_deletes_redis_keys_when_cache_available(self, client, patched_deps): - deps = patched_deps - deps["cache"].available = True - redis_mock = MagicMock() - deps["cache"].client = redis_mock - # index:* returns keys; mutable:meta:* and metrics:* return nothing - redis_mock.keys.side_effect = [["index:test:abc", "index:test:def"], [], []] - deps["storage"].client.list_objects_v2.return_value = {} - - response = client.put("/cache/flush") - assert response.status_code == 200 - data = response.json() - assert data["flushed"]["redis_keys"] == 2 - redis_mock.delete.assert_called_once_with("index:test:abc", "index:test:def") - - -# --------------------------------------------------------------------------- -# Metrics endpoint GET /metrics -# --------------------------------------------------------------------------- - - -class TestMetricsEndpoint: - def test_returns_prometheus_text_by_default(self, client, patched_deps): - response = client.get("/metrics") - assert response.status_code == 200 - assert response.headers["content-type"].startswith("text/plain") - - -# --------------------------------------------------------------------------- -# Config endpoint GET /config -# --------------------------------------------------------------------------- - - -class TestConfigEndpoint: - def test_returns_config_with_remotes(self, client): - response = client.get("/config") - assert response.status_code == 200 - data = response.json() - assert "remotes" in data - assert "alpine-test" in data["remotes"] - - -# --------------------------------------------------------------------------- -# PyPI remote /api/v1/remote/pypi-test/... -# --------------------------------------------------------------------------- - - -class TestPyPIRemote: - def test_simple_index_is_mutable(self, client, patched_deps): - """simple/ paths are detected as mutable (package-type default).""" - deps = patched_deps - html = b"..." - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = html - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/pypi-test/simple/requests/") - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_not_called() - - def test_simple_index_urls_rewritten_to_proxy(self, client, patched_deps): - """files.pythonhosted.org URLs in a cached simple index are rewritten to our proxy.""" - deps = patched_deps - html = b"..." - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = html - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/pypi-test/simple/requests/") - assert response.status_code == 200 - assert b"files.pythonhosted.org" not in response.content - assert b"/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz" in response.content - - def test_simple_index_content_type_is_html(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/pypi-test/simple/requests/") - assert response.status_code == 200 - assert "text/html" in response.headers["content-type"] - - def test_simple_index_cache_miss_fetches_upstream(self, client, patched_deps): - deps = patched_deps - html = b"..." - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = html - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/pypi-test/simple/requests/") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - assert b"files.pythonhosted.org" not in response.content - - def test_wheel_file_immutable_returns_correct_content_type(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"PK wheel bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0-py3-none-any.whl") - assert response.status_code == 200 - assert "application/zip" in response.headers["content-type"] - assert response.headers["X-Artifact-Source"] == "cache" - - def test_sdist_immutable_returns_correct_content_type(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"tar bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/pypi-test/packages/requests-2.31.0.tar.gz") - assert response.status_code == 200 - assert "application/gzip" in response.headers["content-type"] - - def test_unknown_extension_on_pypi_remote_returns_403(self, client, patched_deps): - """Paths that don't match immutable_patterns and aren't mutable are blocked.""" - response = client.get("/api/v1/remote/pypi-test/packages/requests.unknown") - assert response.status_code == 403 - - -# --------------------------------------------------------------------------- -# npm remote /api/v1/remote/npm-test/... -# --------------------------------------------------------------------------- - - -class TestNpmRemote: - def test_package_metadata_is_mutable(self, client, patched_deps): - """Top-level package metadata paths are detected as mutable.""" - deps = patched_deps - meta = b'{"name":"express","versions":{}}' - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/npm-test/express") - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_not_called() - - def test_metadata_tarball_urls_rewritten_to_proxy(self, client, patched_deps): - """registry.npmjs.org tarball URLs in metadata JSON are rewritten to our proxy.""" - deps = patched_deps - meta = b'{"dist":{"tarball":"https://registry.npmjs.org/express/-/express-4.18.2.tgz"}}' - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/npm-test/express") - assert response.status_code == 200 - assert b"registry.npmjs.org" not in response.content - assert b"/api/v1/remote/npm-test/express/-/express-4.18.2.tgz" in response.content - - def test_metadata_content_type_is_json(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"name":"express"}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/npm-test/express") - assert response.status_code == 200 - assert "application/json" in response.headers["content-type"] - - def test_scoped_package_metadata_rewritten(self, client, patched_deps): - """@scope/package metadata URLs are also rewritten back to the same npm-test remote.""" - deps = patched_deps - meta = b'{"dist":{"tarball":"https://registry.npmjs.org/@babel/core/-/core-7.21.0.tgz"}}' - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/npm-test/@babel/core") - assert response.status_code == 200 - assert b"registry.npmjs.org" not in response.content - assert b"/api/v1/remote/npm-test/@babel/core/-/core-7.21.0.tgz" in response.content - - def test_tarball_not_rewritten(self, client, patched_deps): - """Tarball requests (.tgz) bypass URL rewriting and return binary.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"\x1f\x8b tgz bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz") - assert response.status_code == 200 - assert "application/gzip" in response.headers["content-type"] - assert response.headers["X-Artifact-Source"] == "cache" - - def test_metadata_cache_miss_fetches_upstream(self, client, patched_deps): - deps = patched_deps - meta = b'{"dist":{"tarball":"https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"}}' - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/npm-test/lodash") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - assert b"registry.npmjs.org" not in response.content - - def test_tarball_immutable_allowed_on_npm_remote(self, client, patched_deps): - """Tarballs (.tgz) match immutable_patterns and are served without rewriting.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"tgz bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/npm-test/express/-/express-4.18.2.tgz") - assert response.status_code == 200 - assert "application/gzip" in response.headers["content-type"] - - -# --------------------------------------------------------------------------- -# Helm remote /api/v1/remote/helm-test/... -# --------------------------------------------------------------------------- - - -class TestHelmRemote: - def test_index_yaml_is_mutable(self, client, patched_deps): - """index.yaml is detected as mutable (package-type default).""" - deps = patched_deps - index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = index - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/helm-test/index.yaml") - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_not_called() - - def test_index_yaml_urls_rewritten_to_proxy(self, client, patched_deps): - """base_url chart URLs in a cached index.yaml are rewritten to our proxy.""" - deps = patched_deps - index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = index - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/helm-test/index.yaml") - assert response.status_code == 200 - assert b"helm.releases.hashicorp.com" not in response.content - assert b"/api/v1/remote/helm-test/vault-0.29.1.tgz" in response.content - - def test_index_yaml_content_type_is_yaml(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n" - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/helm-test/index.yaml") - assert response.status_code == 200 - assert "text/yaml" in response.headers["content-type"] - - def test_chart_tarball_immutable_returns_gzip_content_type(self, client, patched_deps): - """Versioned chart tarballs match immutable_patterns and are served as binary.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"\x1f\x8b chart bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/helm-test/vault-0.29.1.tgz") - assert response.status_code == 200 - assert "application/gzip" in response.headers["content-type"] - assert response.headers["X-Artifact-Source"] == "cache" - - def test_index_yaml_cache_miss_fetches_upstream(self, client, patched_deps): - deps = patched_deps - index = b"apiVersion: v1\nentries:\n vault:\n - urls:\n - https://helm.releases.hashicorp.com/vault-0.29.1.tgz\n" - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = index - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/helm-test/index.yaml") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - assert b"helm.releases.hashicorp.com" not in response.content - - def test_non_tgz_non_yaml_path_blocked_by_pattern(self, client, patched_deps): - """Paths that don't match immutable_patterns and aren't mutable are blocked.""" - deps = patched_deps - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/helm-test/vault.zip") - assert response.status_code == 403 - - -# --------------------------------------------------------------------------- -# Puppet Forge remote /api/v1/remote/puppet-test/... -# --------------------------------------------------------------------------- - - -class TestPuppetRemote: - def test_module_metadata_is_mutable(self, client, patched_deps): - """v3/modules/ paths are detected as mutable (package-type default).""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"current_release":{"file_uri":"/v3/files/puppetlabs-stdlib-9.7.0.tar.gz"}}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/puppet-test/v3/modules/puppetlabs-stdlib") - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_not_called() - - def test_releases_path_is_mutable(self, client, patched_deps): - """v3/releases paths are detected as mutable (package-type default).""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"file_uri":"/v3/files/puppetlabs-stdlib-9.7.0.tar.gz"}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/puppet-test/v3/releases/puppetlabs-stdlib-9.7.0") - assert response.status_code == 200 - - def test_relative_file_uri_rewritten_to_absolute_proxy_url(self, client, patched_deps): - """Relative /v3/files/ paths in JSON responses are rewritten to absolute proxy URLs.""" - deps = patched_deps - meta = b'{"current_release":{"file_uri":"/v3/files/puppetlabs-stdlib-9.7.0.tar.gz","version":"9.7.0"}}' - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/puppet-test/v3/modules/puppetlabs-stdlib") - assert response.status_code == 200 - assert b'"/v3/files/' not in response.content - assert b"/api/v1/remote/puppet-test/v3/files/puppetlabs-stdlib-9.7.0.tar.gz" in response.content - - def test_absolute_forge_url_rewritten_to_proxy(self, client, patched_deps): - """Absolute forgeapi.puppet.com URLs in JSON are rewritten to the proxy URL.""" - deps = patched_deps - meta = b'{"uri":"https://forgeapi.puppet.com/v3/modules/puppetlabs-stdlib"}' - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/puppet-test/v3/modules/puppetlabs-stdlib") - assert response.status_code == 200 - assert b"forgeapi.puppet.com" not in response.content - assert b"/api/v1/remote/puppet-test" in response.content - - def test_metadata_content_type_is_json(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"current_release":{}}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/puppet-test/v3/modules/puppetlabs-concat") - assert response.status_code == 200 - assert "application/json" in response.headers["content-type"] - - def test_tarball_served_without_rewriting(self, client, patched_deps): - """Module tarballs (v3/files/*.tar.gz) are served as binary without URL rewriting.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"\x1f\x8b tarball bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/puppet-test/v3/files/puppetlabs-stdlib-9.7.0.tar.gz") - assert response.status_code == 200 - assert "application/gzip" in response.headers["content-type"] - assert response.headers["X-Artifact-Source"] == "cache" - - def test_tarball_not_blocked_by_immutable_pattern(self, client, patched_deps): - """v3/files/*.tar.gz matches the configured immutable_patterns and is allowed.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"\x1f\x8b tarball bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/puppet-test/v3/files/puppetlabs-inifile-6.2.0.tar.gz") - assert response.status_code == 200 - - def test_unknown_path_blocked(self, client, patched_deps): - """Paths outside v3/modules, v3/releases, and v3/files are blocked.""" - deps = patched_deps - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/puppet-test/v3/users/puppetlabs") - assert response.status_code == 403 - - def test_metadata_cache_miss_fetches_upstream(self, client, patched_deps): - deps = patched_deps - meta = b'{"current_release":{"file_uri":"/v3/files/puppetlabs-stdlib-9.7.0.tar.gz"}}' - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = meta - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/puppet-test/v3/modules/puppetlabs-stdlib") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - assert b'"/v3/files/' not in response.content - - -# --------------------------------------------------------------------------- -# Terraform registry remote (terraform-registry-test) -# --------------------------------------------------------------------------- - - -class TestTerraformRemote: - def test_versions_path_is_mutable(self, client, patched_deps): - """Provider versions listing is detected as mutable.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"versions":[]}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/versions") - assert response.status_code == 200 - deps["cache"].mark_index_cached.assert_not_called() - - def test_versions_returns_json_content_type(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b'{"versions":[]}' - deps["cache"].is_mutable_file.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/versions") - assert response.status_code == 200 - assert "application/json" in response.headers["content-type"] - - def test_download_info_download_url_rewritten(self, client, patched_deps): - """download_url in download-info JSON is rewritten to point to the releases proxy.""" - deps = patched_deps - download_info = json.dumps( - { - "os": "linux", - "arch": "amd64", - "filename": "terraform-provider-vault_0.28.0_linux_amd64.zip", - "download_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_linux_amd64.zip", - "shasums_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS", - "shasums_signature_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS.sig", - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = download_info - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/0.28.0/download/linux/amd64") - assert response.status_code == 200 - data = response.json() - assert "releases.hashicorp.com" not in data["download_url"] - assert "/api/v1/remote/hashicorp-releases-test/" in data["download_url"] - - def test_download_info_shasums_url_rewritten(self, client, patched_deps): - """shasums_url is also rewritten to the releases proxy.""" - deps = patched_deps - download_info = json.dumps( - { - "os": "linux", - "arch": "amd64", - "filename": "terraform-provider-vault_0.28.0_linux_amd64.zip", - "download_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_linux_amd64.zip", - "shasums_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS", - "shasums_signature_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS.sig", - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = download_info - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/0.28.0/download/linux/amd64") - assert response.status_code == 200 - data = response.json() - assert "/api/v1/remote/hashicorp-releases-test/" in data["shasums_url"] - assert "/api/v1/remote/hashicorp-releases-test/" in data["shasums_signature_url"] - assert "releases.hashicorp.com" not in data["shasums_url"] - assert "releases.hashicorp.com" not in data["shasums_signature_url"] - - def test_download_info_path_preserved(self, client, patched_deps): - """The path portion of the upstream URL is preserved when rewriting.""" - deps = patched_deps - zip_path = "/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_linux_amd64.zip" - download_info = json.dumps( - { - "download_url": f"https://releases.hashicorp.com{zip_path}", - "shasums_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS", - "shasums_signature_url": "https://releases.hashicorp.com/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_SHA256SUMS.sig", - } - ).encode() - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = download_info - deps["cache"].is_mutable_file.return_value = False - - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/0.28.0/download/linux/amd64") - assert response.status_code == 200 - data = response.json() - assert data["download_url"].endswith(zip_path) - - def test_zip_served_as_binary(self, client, patched_deps): - """Provider zip files are served as binary without JSON rewriting.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"PK\x03\x04 zip bytes" - deps["cache"].is_mutable_file.return_value = False - - response = client.get( - "/api/v1/remote/hashicorp-releases-test/terraform-provider-vault/0.28.0/terraform-provider-vault_0.28.0_linux_amd64.zip" - ) - assert response.status_code == 200 - assert response.headers["X-Artifact-Source"] == "cache" - - def test_construct_url_prepends_v1_providers(self, client, patched_deps): - """Upstream URL for the terraform package type prepends /v1/providers/.""" - deps = patched_deps - deps["storage"].exists.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - deps["storage"].download_object.return_value = b'{"versions":[]}' - deps["cache"].is_mutable_file.return_value = True - client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/versions") - - called_url = mock_fetch.call_args[0][0] - assert called_url == "https://registry.terraform.io/v1/providers/hashicorp/vault/versions" - - def test_versions_cache_miss_fetches_upstream(self, client, patched_deps): - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b'{"versions":[]}' - deps["cache"].is_mutable_file.return_value = True - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached"}, - ) as mock_fetch: - response = client.get("/api/v1/remote/terraform-registry-test/hashicorp/vault/versions") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - - -# --------------------------------------------------------------------------- -# Quarantine (quarantine-test remote: quarantine_new=True, quarantine_days=3) -# --------------------------------------------------------------------------- - - -class TestQuarantine: - def _recent_date(self, days_ago=1): - """Return an HTTP-format date string N days in the past (within quarantine window).""" - from datetime import datetime, timedelta - from email.utils import format_datetime - - dt = datetime.now(UTC) - timedelta(days=days_ago) - return format_datetime(dt, usegmt=True) - - def _old_date(self, days_ago=10): - """Return an HTTP-format date string N days in the past (outside quarantine window).""" - from datetime import datetime, timedelta - from email.utils import format_datetime - - dt = datetime.now(UTC) - timedelta(days=days_ago) - return format_datetime(dt, usegmt=True) - - def test_cache_miss_recent_artifact_quarantined(self, client, patched_deps): - """Cache miss: artifact published within quarantine window → 404.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached", "last_modified": self._recent_date()}, - ): - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 404 - assert "quarantined" in response.json()["detail"].lower() - - def test_cache_miss_old_artifact_allowed(self, client, patched_deps): - """Cache miss: artifact published outside quarantine window → 200.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached", "last_modified": self._old_date()}, - ): - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 200 - - def test_cache_miss_no_last_modified_fails_open(self, client, patched_deps): - """Cache miss: no Last-Modified header → fail open (200, not quarantined).""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached", "last_modified": None}, - ): - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 200 - - def test_cache_hit_recent_artifact_quarantined(self, client, patched_deps): - """Cache hit: stored publish date within quarantine window → 404.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - deps["cache"].get_artifact_published.return_value = self._recent_date() - - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 404 - assert "quarantined" in response.json()["detail"].lower() - - def test_cache_hit_old_artifact_allowed(self, client, patched_deps): - """Cache hit: stored publish date outside quarantine window → 200.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - deps["cache"].get_artifact_published.return_value = self._old_date() - - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 200 - - def test_cache_hit_no_stored_date_fetches_upstream(self, client, patched_deps): - """Cache hit: no stored date → HEAD upstream to get Last-Modified.""" - deps = patched_deps - deps["storage"].exists.return_value = True - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - deps["cache"].get_artifact_published.return_value = None - - with patch( - "artifactapi.artifact.proxy._fetch_last_modified", - new_callable=AsyncMock, - return_value=self._old_date(), - ) as mock_fetch: - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - mock_fetch.assert_called_once() - assert response.status_code == 200 - - def test_quarantine_disabled_allows_recent_artifact(self, client, patched_deps): - """quarantine_new=False: recent artifacts are not blocked.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached", "last_modified": self._recent_date()}, - ): - response = client.get("/api/v1/remote/quarantine-disabled/some/path/package-1.0.tar.gz") - - assert response.status_code == 200 - - def test_quarantine_detail_includes_available_date(self, client, patched_deps): - """The 404 detail should include the date when the artifact becomes available.""" - deps = patched_deps - deps["storage"].exists.return_value = False - deps["storage"].download_object.return_value = b"content" - deps["cache"].is_mutable_file.return_value = False - - with patch( - "artifactapi.artifact.proxy.cache_single_artifact", - new_callable=AsyncMock, - return_value={"status": "cached", "last_modified": self._recent_date()}, - ): - response = client.get("/api/v1/remote/quarantine-test/some/path/package-1.0.tar.gz") - - assert response.status_code == 404 - detail = response.json()["detail"] - assert "available after" in detail - assert "3-day" in detail diff --git a/tests/test_storage.py b/tests/test_storage.py deleted file mode 100644 index e5b5c6a..0000000 --- a/tests/test_storage.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Tests for S3Storage: get_object_key (pure logic) and I/O methods.""" - -import hashlib -from unittest.mock import MagicMock, patch - -import pytest -from botocore.exceptions import ClientError -from fastapi import HTTPException - -from artifactapi.storage import S3Storage - - -@pytest.fixture -def storage(): - """S3Storage with a mocked boto3 client.""" - with patch("boto3.client", return_value=MagicMock()): - s = S3Storage( - endpoint="localhost:9000", - access_key="testkey", - secret_key="testsecret", - bucket="testbucket", - secure=False, - ) - s.client = MagicMock() - return s - - -# --------------------------------------------------------------------------- -# get_object_key -# --------------------------------------------------------------------------- - - -class TestGetObjectKey: - def test_key_has_three_part_structure(self, storage): - # remote / hash-segment / filename - key = storage.get_object_key("myremote", "some/path/to/file.rpm") - parts = key.split("/") - assert len(parts) == 3 - assert parts[0] == "myremote" - assert parts[2] == "file.rpm" - assert len(parts[1]) == 16 # SHA-256 hex truncated to 16 chars - - def test_key_uses_sha256_of_directory_path(self, storage): - # Pin the hash algorithm, truncation length, and format in one assertion - key = storage.get_object_key("myremote", "some/path/to/file.rpm") - expected_hash = hashlib.sha256(b"some/path/to").hexdigest()[:16] - assert key == f"myremote/{expected_hash}/file.rpm" - - def test_different_remotes_give_different_keys(self, storage): - k1 = storage.get_object_key("remote-a", "path/to/file.rpm") - k2 = storage.get_object_key("remote-b", "path/to/file.rpm") - assert k1 != k2 - - def test_different_directories_give_different_keys(self, storage): - k1 = storage.get_object_key("myremote", "path/version-1/file.rpm") - k2 = storage.get_object_key("myremote", "path/version-2/file.rpm") - assert k1 != k2 - assert k1.split("/")[-1] == k2.split("/")[-1] == "file.rpm" - - def test_leading_slash_stripped(self, storage): - k1 = storage.get_object_key("myremote", "/path/to/file.rpm") - k2 = storage.get_object_key("myremote", "path/to/file.rpm") - assert k1 == k2 - - def test_file_with_no_directory(self, storage): - key = storage.get_object_key("myremote", "file.rpm") - assert key == "myremote/file.rpm" - - def test_docker_blob_uses_digest_path(self, storage): - digest = "a" * 64 # realistic 64-char SHA-256 hex string - path = f"library/nginx/blobs/sha256:{digest}" - key = storage.get_object_key("dockerhub", path) - assert key == f"dockerhub/blobs/sha256/{digest}" - - def test_docker_blob_deduplication_across_images(self, storage): - """Same blob digest pulled from different images maps to the same S3 key.""" - digest = "deadbeef" * 8 # 64-char hex - k1 = storage.get_object_key("dockerhub", f"library/nginx/blobs/sha256:{digest}") - k2 = storage.get_object_key("dockerhub", f"library/ubuntu/blobs/sha256:{digest}") - assert k1 == k2 - - def test_docker_blob_different_digests_different_keys(self, storage): - k1 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "a" * 64) - k2 = storage.get_object_key("dockerhub", "library/nginx/blobs/sha256:" + "b" * 64) - assert k1 != k2 - - def test_docker_blob_different_remotes_different_keys(self, storage): - digest = "abc" * 21 + "d" # 64-char hex - k1 = storage.get_object_key("remote-a", f"library/nginx/blobs/sha256:{digest}") - k2 = storage.get_object_key("remote-b", f"library/nginx/blobs/sha256:{digest}") - assert k1 != k2 - - -# --------------------------------------------------------------------------- -# get_url -# --------------------------------------------------------------------------- - - -class TestGetUrl: - def test_returns_http_url_for_insecure_endpoint(self, storage): - url = storage.get_url("myremote/abc123/file.rpm") - assert url == "http://localhost:9000/testbucket/myremote/abc123/file.rpm" - - def test_returns_http_url_for_secure_storage(self): - with patch("boto3.client", return_value=MagicMock()): - s = S3Storage(endpoint="s3.example.com", access_key="k", secret_key="s", bucket="b", secure=True) - s.client = MagicMock() - # get_url uses http:// always (direct internal access address, not the S3 protocol) - assert s.get_url("path/to/file.rpm") == "http://s3.example.com/b/path/to/file.rpm" - - -# --------------------------------------------------------------------------- -# upload / download_object -# --------------------------------------------------------------------------- - - -class TestUpload: - def test_upload_returns_s3_uri(self, storage): - storage.client.put_object.return_value = {} - result = storage.upload("myremote/abc123/file.rpm", b"content") - assert result == "s3://testbucket/myremote/abc123/file.rpm" - - -class TestDownloadObject: - def test_download_object_raises_404_on_client_error(self, storage): - storage.client.get_object.side_effect = ClientError( - {"Error": {"Code": "NoSuchKey", "Message": "The specified key does not exist"}}, - "GetObject", - ) - with pytest.raises(HTTPException) as exc_info: - storage.download_object("nonexistent/key") - assert exc_info.value.status_code == 404 diff --git a/tests/test_virtual.py b/tests/test_virtual.py deleted file mode 100644 index 9ad5be9..0000000 --- a/tests/test_virtual.py +++ /dev/null @@ -1,830 +0,0 @@ -"""Unit tests for the virtual repository handler (artifact/virtual.py).""" - -from datetime import UTC, date, datetime -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -import yaml - -from artifactapi.artifact.virtual import ( - _HANDLERS, - _entries_to_msgpack_safe, - _get_member_index, - _HelmDumper, - _HelmHandler, - _merge_helm_indexes, - _rewrite_urls, - _VirtualHandler, - _YamlDumperBase, - _YamlLoader, -) - -# --------------------------------------------------------------------------- -# Shared sample data -# --------------------------------------------------------------------------- - -_INDEX_A = b"""\ -apiVersion: v1 -entries: - vault: - - name: vault - version: "0.27.0" - urls: - - https://helm.releases.hashicorp.com/vault-0.27.0.tgz - consul: - - name: consul - version: "1.2.0" - urls: - - https://helm.releases.hashicorp.com/consul-1.2.0.tgz -generated: "2023-01-01T00:00:00.000Z" -""" - -_INDEX_B = b"""\ -apiVersion: v1 -entries: - nginx: - - name: nginx - version: "15.0.0" - urls: - - https://charts.example.com/nginx-15.0.0.tgz - vault: - - name: vault - version: "0.27.0" - urls: - - https://charts.example.com/vault-0.27.0.tgz - - name: vault - version: "0.26.0" - urls: - - https://charts.example.com/vault-0.26.0.tgz -generated: "2023-01-01T00:00:00.000Z" -""" - -_INDEX_SIMPLE = b"""\ -apiVersion: v1 -entries: - mychart: - - name: mychart - version: "1.0.0" - urls: - - https://helm.releases.hashicorp.com/mychart-1.0.0.tgz -generated: "2023-01-01T00:00:00.000Z" -""" - -_INDEX_RELATIVE = b"""\ -apiVersion: v1 -entries: - rancher: - - name: rancher - version: "2.13.1" - urls: - - rancher-2.13.1.tgz -generated: "2023-01-01T00:00:00.000Z" -""" - -_CFG_A = {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} -_CFG_B = {"base_url": "https://charts.example.com", "cache": {"mutable_ttl": 1800}} - - -# --------------------------------------------------------------------------- -# _YamlLoader / _YamlDumperBase — C extension selection -# --------------------------------------------------------------------------- - - -class TestYamlExtensionSelection: - def test_loader_is_a_class(self): - assert isinstance(_YamlLoader, type) - - def test_dumper_base_is_a_class(self): - assert isinstance(_YamlDumperBase, type) - - def test_helm_dumper_uses_selected_base(self): - assert issubclass(_HelmDumper, _YamlDumperBase) - - def test_c_extensions_used_when_available(self): - try: - assert _YamlLoader is yaml.CSafeLoader - assert _YamlDumperBase is yaml.CDumper - except AttributeError: - assert _YamlLoader is yaml.SafeLoader - assert _YamlDumperBase is yaml.Dumper - - def test_loader_can_parse_yaml(self): - result = yaml.load(b"key: value", Loader=_YamlLoader) - assert result == {"key": "value"} - - -# --------------------------------------------------------------------------- -# _HelmDumper — datetime/date YAML serialization -# --------------------------------------------------------------------------- - - -class TestHelmDumper: - def _dump(self, value): - return yaml.dump({"v": value}, Dumper=_HelmDumper) - - def test_datetime_with_tz_includes_Z_suffix(self): - dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) - assert "Z" in self._dump(dt) - - def test_datetime_without_tz_has_no_Z_suffix(self): - dt = datetime(2023, 6, 15, 12, 0, 0) - assert "Z" not in self._dump(dt) - - def test_datetime_uses_T_separator_not_space(self): - dt = datetime(2023, 6, 15, 12, 30, 0, tzinfo=UTC) - assert "T12:30:00" in self._dump(dt) - - def test_date_serialized_as_iso_string(self): - assert "2023-01-15" in self._dump(date(2023, 1, 15)) - - def test_datetime_round_trips_as_string_not_python_datetime(self): - dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) - parsed = yaml.safe_load(self._dump(dt)) - # yaml.safe_load must not re-parse this as a datetime object - assert isinstance(parsed["v"], str) - - def test_date_round_trips_as_string_not_python_date(self): - parsed = yaml.safe_load(self._dump(date(2023, 1, 15))) - assert isinstance(parsed["v"], str) - - -# --------------------------------------------------------------------------- -# _HelmHandler -# --------------------------------------------------------------------------- - - -class TestHelmHandler: - def setup_method(self): - self.handler = _HelmHandler() - - def test_accepts_index_yaml(self): - assert self.handler.accepts_path("index.yaml") is True - - def test_rejects_tgz_path(self): - assert self.handler.accepts_path("vault-0.27.0.tgz") is False - - def test_rejects_subdirectory_index(self): - assert self.handler.accepts_path("charts/index.yaml") is False - - def test_rejects_empty_path(self): - assert self.handler.accepts_path("") is False - - def test_path_error_is_non_empty_string(self): - msg = self.handler.path_error() - assert isinstance(msg, str) and len(msg) > 0 - - def test_merge_returns_bytes(self): - result = self.handler.merge([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com") - assert isinstance(result, bytes) - - def test_merge_delegates_to_merge_helm_indexes(self): - with patch("artifactapi.artifact.virtual._merge_helm_indexes", return_value=b"merged") as mock_fn: - result = self.handler.merge([b"data"], [None], ["m"], [{}], "http://proxy") - mock_fn.assert_called_once_with([b"data"], [None], ["m"], [{}], "http://proxy") - assert result == b"merged" - - -# --------------------------------------------------------------------------- -# _HANDLERS registry -# --------------------------------------------------------------------------- - - -class TestHandlersRegistry: - def test_helm_handler_is_registered(self): - assert "helm" in _HANDLERS - assert isinstance(_HANDLERS["helm"], _HelmHandler) - - def test_helm_handler_satisfies_protocol(self): - assert isinstance(_HANDLERS["helm"], _VirtualHandler) - - -# --------------------------------------------------------------------------- -# _rewrite_urls -# --------------------------------------------------------------------------- - - -class TestRewriteUrls: - def _rewrite(self, urls, base_url="https://upstream.example.com", proxy_base="http://proxy.example.com", member_name="my-remote"): - return _rewrite_urls(urls, base_url, proxy_base, member_name) - - def test_absolute_url_matching_base_is_rewritten(self): - result = self._rewrite(["https://upstream.example.com/chart-1.0.0.tgz"]) - assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"] - - def test_relative_url_is_prepended_with_proxy_remote(self): - result = self._rewrite(["chart-1.0.0.tgz"]) - assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"] - - def test_relative_url_with_leading_slash(self): - result = self._rewrite(["/chart-1.0.0.tgz"]) - assert result == ["http://proxy.example.com/api/v1/remote/my-remote/chart-1.0.0.tgz"] - - def test_absolute_url_not_matching_base_is_unchanged(self): - result = self._rewrite(["https://other.example.com/chart-1.0.0.tgz"]) - assert result == ["https://other.example.com/chart-1.0.0.tgz"] - - def test_empty_url_list_returns_empty(self): - assert self._rewrite([]) == [] - - def test_multiple_urls_all_rewritten(self): - urls = ["https://upstream.example.com/a-1.0.0.tgz", "b-2.0.0.tgz"] - result = self._rewrite(urls) - assert result[0] == "http://proxy.example.com/api/v1/remote/my-remote/a-1.0.0.tgz" - assert result[1] == "http://proxy.example.com/api/v1/remote/my-remote/b-2.0.0.tgz" - - -# --------------------------------------------------------------------------- -# _merge_helm_indexes -# --------------------------------------------------------------------------- - - -class TestMergeHelmIndexes: - def _merge(self, raw_indexes, member_names, member_configs, proxy_base="http://proxy.example.com"): - return _merge_helm_indexes(raw_indexes, [None] * len(raw_indexes), member_names, member_configs, proxy_base) - - def _parse(self, raw): - return yaml.safe_load(raw) - - def test_single_member_all_charts_present(self): - index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) - assert "vault" in index["entries"] - assert "consul" in index["entries"] - - def test_two_members_non_overlapping_charts_all_present(self): - index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) - assert "vault" in index["entries"] - assert "consul" in index["entries"] - assert "nginx" in index["entries"] - - def test_first_member_wins_on_duplicate_name_and_version(self): - index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) - v027 = next(e for e in index["entries"]["vault"] if e["version"] == "0.27.0") - assert "member-a" in v027["urls"][0] - - def test_absolute_urls_rewritten_to_proxy(self): - index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) - url = index["entries"]["vault"][0]["urls"][0] - assert url == "http://proxy.example.com/api/v1/remote/member-a/vault-0.27.0.tgz" - - def test_relative_urls_rewritten_to_proxy(self): - cfg = {"base_url": "https://releases.rancher.com/server-charts/stable", "cache": {"mutable_ttl": 3600}} - index = self._parse(self._merge([_INDEX_RELATIVE], ["rancher-stable"], [cfg])) - url = index["entries"]["rancher"][0]["urls"][0] - assert url == "http://proxy.example.com/api/v1/remote/rancher-stable/rancher-2.13.1.tgz" - - def test_different_versions_of_same_chart_both_included(self): - index = self._parse(self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B])) - versions = {e["version"] for e in index["entries"]["vault"]} - assert "0.27.0" in versions - assert "0.26.0" in versions - - def test_malformed_yaml_from_member_is_skipped(self): - index = self._parse(self._merge([_INDEX_A, b"{bad yaml"], ["member-a", "bad"], [_CFG_A, _CFG_B])) - assert "vault" in index["entries"] - assert "consul" in index["entries"] - - def test_output_has_apiVersion_v1(self): - index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) - assert index["apiVersion"] == "v1" - - def test_output_has_generated_field(self): - index = self._parse(self._merge([_INDEX_A], ["member-a"], [_CFG_A])) - assert "generated" in index - - def test_output_is_valid_yaml(self): - raw = self._merge([_INDEX_A, _INDEX_B], ["member-a", "member-b"], [_CFG_A, _CFG_B]) - assert isinstance(yaml.safe_load(raw), dict) - - def test_empty_index_from_member_produces_no_entries(self): - empty = b"apiVersion: v1\nentries: {}\ngenerated: '2023-01-01T00:00:00.000Z'\n" - index = self._parse(self._merge([empty], ["member-a"], [_CFG_A])) - assert index["entries"] == {} - - -# --------------------------------------------------------------------------- -# _get_member_index (async) -# --------------------------------------------------------------------------- - - -class TestGetMemberIndex: - @pytest.fixture - def storage(self): - m = MagicMock() - m.get_object_key.return_value = "member/key/index.yaml" - m.exists.return_value = False - m.download_object.return_value = b"cached bytes" - return m - - @pytest.fixture - def cache(self): - m = MagicMock() - m.is_index_valid.return_value = False - return m - - @pytest.fixture - def member_cfg(self): - return {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} - - def _fake_response(self, content=b"upstream bytes"): - r = MagicMock() - r.content = content - r.raise_for_status = MagicMock() - return r - - def _patch_httpx(self, response): - mock_client_cls = patch("artifactapi.artifact.virtual.httpx.AsyncClient") - p = mock_client_cls.start() - mock_client = AsyncMock() - p.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = response - return mock_client_cls, mock_client - - async def test_cache_hit_returns_stored_bytes(self, storage, cache, member_cfg): - storage.exists.return_value = True - cache.is_index_valid.return_value = True - - _, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == b"cached bytes" - - async def test_cache_hit_does_not_fetch_upstream(self, storage, cache, member_cfg): - storage.exists.return_value = True - cache.is_index_valid.return_value = True - - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - mock_cls.assert_not_called() - - async def test_cache_hit_storage_error_falls_through_to_upstream(self, storage, cache, member_cfg): - storage.exists.return_value = True - cache.is_index_valid.return_value = True - storage.download_object.side_effect = Exception("S3 read error") - - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response(b"fresh bytes") - - _, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == b"fresh bytes" - - async def test_cache_miss_fetches_from_upstream(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - _, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == b"upstream bytes" - - async def test_cache_miss_stores_result_in_s3(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - storage.upload.assert_called_once() - - async def test_cache_miss_marks_cache_with_configured_ttl(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - cache.mark_index_cached.assert_called_once_with("m", "index.yaml", 3600) - - async def test_cache_miss_with_auth_sends_basic_auth_header(self, storage, cache): - cfg = { - "base_url": "https://private.example.com", - "username": "user", - "password": "pass", - "cache": {"mutable_ttl": 3600}, - } - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - await _get_member_index("m", cfg, "index.yaml", storage, cache) - - headers = mock_client.get.call_args.kwargs["headers"] - assert "Authorization" in headers - assert headers["Authorization"].startswith("Basic ") - - async def test_no_credentials_sends_no_auth_header(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - headers = mock_client.get.call_args.kwargs["headers"] - assert "Authorization" not in headers - - async def test_upstream_fetch_failure_returns_none(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.side_effect = Exception("connection refused") - - _, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data is None - - async def test_s3_upload_failure_still_returns_data(self, storage, cache, member_cfg): - storage.upload.side_effect = Exception("S3 write error") - - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - _, _, _, raw_data, _ = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == b"upstream bytes" - - async def test_returns_ttl_from_config(self, storage, cache): - cfg = {"base_url": "https://example.com", "cache": {"mutable_ttl": 900}} - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - _, _, ttl, _, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache) - - assert ttl == 900 - - async def test_defaults_ttl_to_3600_when_not_configured(self, storage, cache): - cfg = {"base_url": "https://example.com"} - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - _, _, ttl, _, _ = await _get_member_index("m", cfg, "index.yaml", storage, cache) - - assert ttl == 3600 - - -# --------------------------------------------------------------------------- -# Virtual route GET /api/v1/virtual/{name}/{path} -# --------------------------------------------------------------------------- - - -@pytest.fixture -def mock_storage_v(): - m = MagicMock() - m.get_object_key.return_value = "virtual/helm-virtual-test/index.yaml" - m.exists.return_value = False - m.download_object.return_value = b"apiVersion: v1\nentries: {}\n" - return m - - -@pytest.fixture -def mock_cache_v(): - m = MagicMock() - m.is_index_valid.return_value = False - m.available = False - m.client = None - return m - - -@pytest.fixture -def patched_virtual_deps(mock_storage_v, mock_cache_v): - import artifactapi.main as main_mod - - with ( - patch.object(main_mod, "storage", mock_storage_v), - patch.object(main_mod, "cache", mock_cache_v), - ): - yield {"storage": mock_storage_v, "cache": mock_cache_v} - - -class TestVirtualRoute: - def test_unknown_virtual_name_returns_404(self, client, patched_virtual_deps): - response = client.get("/api/v1/virtual/no-such-virtual/index.yaml") - assert response.status_code == 404 - - def test_non_virtual_name_returns_404(self, client, patched_virtual_deps): - # helm-test is in remotes, not virtuals - response = client.get("/api/v1/virtual/helm-test/index.yaml") - assert response.status_code == 404 - - def test_unsupported_package_returns_400(self, client, patched_virtual_deps): - # unsupported-virtual-test has package "rpm" - response = client.get("/api/v1/virtual/unsupported-virtual-test/index.yaml") - assert response.status_code == 400 - - def test_non_index_path_returns_404(self, client, patched_virtual_deps): - response = client.get("/api/v1/virtual/helm-virtual-test/vault-0.27.0.tgz") - assert response.status_code == 404 - - def test_no_members_returns_500(self, client, patched_virtual_deps): - response = client.get("/api/v1/virtual/empty-virtual-test/index.yaml") - assert response.status_code == 500 - - def test_virtual_cache_hit_returns_200(self, client, patched_virtual_deps): - deps = patched_virtual_deps - deps["storage"].exists.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - assert response.status_code == 200 - - def test_virtual_cache_hit_content_type_is_yaml(self, client, patched_virtual_deps): - deps = patched_virtual_deps - deps["storage"].exists.return_value = True - deps["cache"].is_index_valid.return_value = True - - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - assert "text/yaml" in response.headers["content-type"] - - def test_virtual_cache_hit_returns_stored_content(self, client, patched_virtual_deps): - deps = patched_virtual_deps - deps["storage"].exists.return_value = True - deps["cache"].is_index_valid.return_value = True - deps["storage"].download_object.return_value = b"apiVersion: v1\nentries: {}\n" - - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - assert response.content == b"apiVersion: v1\nentries: {}\n" - - def test_virtual_cache_hit_skips_member_fetch(self, client, patched_virtual_deps): - deps = patched_virtual_deps - deps["storage"].exists.return_value = True - deps["cache"].is_index_valid.return_value = True - - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - mock_get.assert_not_called() - - def test_cache_miss_returns_200_with_yaml_content_type(self, client, patched_virtual_deps): - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - assert response.status_code == 200 - assert "text/yaml" in response.headers["content-type"] - - def test_cache_miss_response_contains_merged_entries(self, client, patched_virtual_deps): - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - index = yaml.safe_load(response.content) - assert "mychart" in index["entries"] - - def test_cache_miss_stores_result_in_s3(self, client, patched_virtual_deps): - deps = patched_virtual_deps - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - deps["storage"].upload.assert_called_once() - - def test_cache_miss_marks_index_cached(self, client, patched_virtual_deps): - deps = patched_virtual_deps - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - deps["cache"].mark_index_cached.assert_called_once() - - def test_cache_miss_uses_min_ttl_across_members(self, client, patched_virtual_deps): - deps = patched_virtual_deps - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.side_effect = [ - ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None), - ("helm-member-2", _CFG_B, 1800, _INDEX_SIMPLE, None), - ] - client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - _, _, ttl = deps["cache"].mark_index_cached.call_args[0] - assert ttl == 1800 - - def test_all_members_unreachable_returns_502(self, client, patched_virtual_deps): - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, None, None) - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - assert response.status_code == 502 - - def test_one_member_unreachable_still_returns_200(self, client, patched_virtual_deps): - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.side_effect = [ - ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None), - ("helm-member-2", _CFG_B, 1800, None, None), - ] - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - assert response.status_code == 200 - - def test_member_not_in_config_is_skipped(self, client, patched_virtual_deps): - import artifactapi.main as main_mod - - real_get = main_mod.config.get_remote_config - - def patched_get(name): - return None if name == "helm-member-2" else real_get(name) - - with ( - patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get, - patch.object(main_mod.config, "get_remote_config", side_effect=patched_get), - ): - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - # only helm-test was available — should succeed - assert response.status_code == 200 - mock_get.assert_called_once() - - def test_s3_store_failure_still_returns_200(self, client, patched_virtual_deps): - deps = patched_virtual_deps - deps["storage"].upload.side_effect = Exception("S3 write error") - - with patch("artifactapi.artifact.virtual._get_member_index", new_callable=AsyncMock) as mock_get: - mock_get.return_value = ("helm-test", _CFG_A, 3600, _INDEX_SIMPLE, None) - response = client.get("/api/v1/virtual/helm-virtual-test/index.yaml") - - assert response.status_code == 200 - - -# --------------------------------------------------------------------------- -# _entries_to_msgpack_safe -# --------------------------------------------------------------------------- - - -class TestEntriesToMsgpackSafe: - def test_plain_string_values_pass_through(self): - entries = {"chart": [{"name": "chart", "version": "1.0.0", "urls": ["http://x/c.tgz"]}]} - result = _entries_to_msgpack_safe(entries) - assert result["chart"][0]["version"] == "1.0.0" - - def test_datetime_converted_to_iso_string(self): - dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) - entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": dt}]} - result = _entries_to_msgpack_safe(entries) - assert isinstance(result["chart"][0]["created"], str) - assert "2023-06-15" in result["chart"][0]["created"] - - def test_date_converted_to_iso_string(self): - entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": date(2023, 6, 15)}]} - result = _entries_to_msgpack_safe(entries) - assert result["chart"][0]["created"] == "2023-06-15" - - def test_empty_entries_returns_empty_dict(self): - assert _entries_to_msgpack_safe({}) == {} - - def test_multiple_versions_all_converted(self): - dt = datetime(2023, 1, 1, tzinfo=UTC) - entries = { - "chart": [ - {"name": "chart", "version": "1.0.0", "created": dt}, - {"name": "chart", "version": "2.0.0", "created": dt}, - ] - } - result = _entries_to_msgpack_safe(entries) - for v in result["chart"]: - assert isinstance(v["created"], str) - - def test_result_is_msgpack_serializable(self): - import msgpack - - dt = datetime(2023, 6, 15, 12, 0, 0, tzinfo=UTC) - entries = {"chart": [{"name": "chart", "version": "1.0.0", "created": dt, "urls": ["http://x/c.tgz"]}]} - safe = _entries_to_msgpack_safe(entries) - packed = msgpack.packb(safe, use_bin_type=True) - unpacked = msgpack.unpackb(packed, raw=False) - assert unpacked["chart"][0]["created"] == safe["chart"][0]["created"] - - -# --------------------------------------------------------------------------- -# _merge_helm_indexes — pre-parsed entries path -# --------------------------------------------------------------------------- - - -class TestMergeHelmIndexesWithParsed: - """Verify that pre-parsed entries (from msgpack) produce the same output as raw YAML.""" - - def _parse_entries(self, raw: bytes) -> dict: - index = yaml.safe_load(raw) - return index.get("entries") or {} - - def test_parsed_entries_produce_same_charts_as_raw(self): - parsed = self._parse_entries(_INDEX_A) - raw_result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com")) - parsed_result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [parsed], ["member-a"], [_CFG_A], "http://proxy.example.com")) - assert set(raw_result["entries"].keys()) == set(parsed_result["entries"].keys()) - - def test_parsed_entries_urls_are_rewritten(self): - parsed = self._parse_entries(_INDEX_A) - result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [parsed], ["member-a"], [_CFG_A], "http://proxy.example.com")) - url = result["entries"]["vault"][0]["urls"][0] - assert "member-a" in url - assert "proxy.example.com" in url - - def test_none_parsed_falls_back_to_raw_bytes(self): - result = yaml.safe_load(_merge_helm_indexes([_INDEX_A], [None], ["member-a"], [_CFG_A], "http://proxy.example.com")) - assert "vault" in result["entries"] - - def test_mixed_parsed_and_raw_merge_correctly(self): - parsed_a = self._parse_entries(_INDEX_A) - result = yaml.safe_load( - _merge_helm_indexes( - [_INDEX_A, _INDEX_B], - [parsed_a, None], - ["member-a", "member-b"], - [_CFG_A, _CFG_B], - "http://proxy.example.com", - ) - ) - assert "vault" in result["entries"] - assert "nginx" in result["entries"] - - -# --------------------------------------------------------------------------- -# _get_member_index — msgpack cache behaviour -# --------------------------------------------------------------------------- - - -class TestGetMemberIndexMsgpack: - @pytest.fixture - def storage(self): - m = MagicMock() - m.get_object_key.side_effect = lambda name, path: f"{name}/{path}" - m.exists.return_value = False - m.download_object.return_value = _INDEX_SIMPLE - return m - - @pytest.fixture - def cache(self): - m = MagicMock() - m.is_index_valid.return_value = False - return m - - @pytest.fixture - def member_cfg(self): - return {"base_url": "https://helm.releases.hashicorp.com", "cache": {"mutable_ttl": 3600}} - - def _fake_response(self, content=_INDEX_SIMPLE): - r = MagicMock() - r.content = content - r.raise_for_status = MagicMock() - return r - - async def test_cache_hit_with_msgpack_returns_parsed_entries(self, storage, cache, member_cfg): - import msgpack - - entries = {"mychart": [{"name": "mychart", "version": "1.0.0", "urls": ["http://x/c.tgz"]}]} - packed = msgpack.packb(entries, use_bin_type=True) - - storage.exists.side_effect = lambda key: True - cache.is_index_valid.return_value = True - storage.download_object.side_effect = lambda key: packed if key.endswith("index.msgpack") else _INDEX_SIMPLE - - _, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert parsed == entries - - async def test_cache_miss_builds_msgpack_and_returns_parsed(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.return_value = self._fake_response() - - _, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == _INDEX_SIMPLE - assert isinstance(parsed, dict) - assert "mychart" in parsed - - async def test_broken_msgpack_rebuilds_from_raw_yaml(self, storage, cache, member_cfg): - storage.exists.side_effect = lambda key: True - cache.is_index_valid.return_value = True - storage.download_object.side_effect = lambda key: b"not-valid-msgpack" if key.endswith("index.msgpack") else _INDEX_SIMPLE - - _, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data == _INDEX_SIMPLE - # Falls back to YAML parse and rebuilds msgpack — entries are returned - assert isinstance(parsed, dict) - assert "mychart" in parsed - - async def test_upstream_failure_returns_none_for_both(self, storage, cache, member_cfg): - with patch("artifactapi.artifact.virtual.httpx.AsyncClient") as mock_cls: - mock_client = AsyncMock() - mock_cls.return_value.__aenter__.return_value = mock_client - mock_client.get.side_effect = Exception("timeout") - - _, _, _, raw_data, parsed = await _get_member_index("m", member_cfg, "index.yaml", storage, cache) - - assert raw_data is None - assert parsed is None diff --git a/tox.ini b/tox.ini deleted file mode 100644 index bf00acc..0000000 --- a/tox.ini +++ /dev/null @@ -1,8 +0,0 @@ -[tox] -envlist = py311 -isolated_build = true - -[testenv] -extras = dev -commands = - pytest {posargs:tests}