From 42dbecc2b7d99ae6e077decf6ee4185edd589131 Mon Sep 17 00:00:00 2001 From: Ben Vincent Date: Sun, 28 Jun 2026 16:53:11 +1000 Subject: [PATCH] Add JSON schema generation for kubeconform CRD validation - ci/generate-schemas.sh fetches CRDs from the cluster via kubectl, supplements with offline CRD manifests (ArgoCD, Gateway API) and Kubernetes swagger spec - Schemas use Datree catalog convention: /_.json - validate-apps.sh and validate-clusters.sh check local schemas first - Makefile schemas target runs before kubeconform --- .gitignore | 1 + Makefile | 6 +- ci/generate-schemas.sh | 134 ++++++++++++++++++++++++++++++++++++++++ ci/validate-apps.sh | 3 + ci/validate-clusters.sh | 3 + 5 files changed, 146 insertions(+), 1 deletion(-) create mode 100755 ci/generate-schemas.sh diff --git a/.gitignore b/.gitignore index 13e88ab..5cd9c59 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ manifests/ +schemas/ apps/**/charts/ diff --git a/Makefile b/Makefile index 7c3849e..3a9aed1 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,12 @@ build: @mkdir -p manifests/$(filter-out $@,$(MAKECMDGOALS)) @kustomize build --enable-helm $(filter-out $@,$(MAKECMDGOALS)) --output manifests/$(filter-out $@,$(MAKECMDGOALS)) +# Generate JSON schemas from CRDs and Kubernetes swagger spec +schemas: + @ci/generate-schemas.sh schemas + # kubeconform -kubeconform: +kubeconform: schemas @ci/validate-apps.sh && \ ci/validate-clusters.sh diff --git a/ci/generate-schemas.sh b/ci/generate-schemas.sh new file mode 100755 index 0000000..f5060f5 --- /dev/null +++ b/ci/generate-schemas.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCHEMA_DIR="${1:-schemas}" +mkdir -p "$SCHEMA_DIR" + +CRD_URLS=( + "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/argoproj/argo-cd/refs/tags/v3.3.2/manifests/ha/install.yaml" + "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github/kubernetes-sigs/gateway-api/releases/download/v1.5.1/standard-install.yaml" +) + +SWAGGER_URL="https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/kubernetes/kubernetes/refs/tags/v1.33.7/api/openapi-spec/swagger.json" + +echo "==> Fetching CRDs from cluster..." >&2 +kubectl get crds -o json | python3 -c " +import sys, json, os + +data = json.load(sys.stdin) +schema_dir = '$SCHEMA_DIR' + +for crd in data.get('items', []): + spec = crd.get('spec', {}) + group = spec.get('group', '') + kind = spec.get('names', {}).get('kind', '') + + for ver in spec.get('versions', []): + version = ver.get('name', '') + openapi = ver.get('schema', {}).get('openAPIV3Schema', {}) + if not openapi: + continue + + schema = dict(openapi) + schema['\$schema'] = 'http://json-schema.org/draft-07/schema#' + schema['type'] = 'object' + schema.setdefault('properties', {}) + schema['properties'].setdefault('apiVersion', {'type': 'string'}) + schema['properties'].setdefault('kind', {'type': 'string'}) + schema['properties'].setdefault('metadata', {'type': 'object'}) + + group_dir = os.path.join(schema_dir, group) + os.makedirs(group_dir, exist_ok=True) + fname = f'{kind}_{version}.json'.lower() + with open(os.path.join(group_dir, fname), 'w') as f: + json.dump(schema, f, indent=2) + print(f' Generated: {group}/{fname}', file=sys.stderr) +" + +echo "==> Downloading CRD manifests for offline schemas..." >&2 + +for url in "${CRD_URLS[@]}"; do + echo " Fetching: $url" >&2 + curl -sSfL "$url" +done | python3 -c " +import sys, json, yaml, os + +schema_dir = '$SCHEMA_DIR' + +for doc in yaml.safe_load_all(sys.stdin): + if doc is None: + continue + if doc.get('kind') != 'CustomResourceDefinition': + continue + + spec = doc.get('spec', {}) + group = spec.get('group', '') + kind = spec.get('names', {}).get('kind', '') + + for ver in spec.get('versions', []): + version = ver.get('name', '') + openapi = ver.get('schema', {}).get('openAPIV3Schema', {}) + if not openapi: + continue + + group_dir = os.path.join(schema_dir, group) + fname = f'{kind}_{version}.json'.lower() + out_path = os.path.join(group_dir, fname) + if os.path.exists(out_path): + continue + + schema = dict(openapi) + schema['\$schema'] = 'http://json-schema.org/draft-07/schema#' + schema['type'] = 'object' + schema.setdefault('properties', {}) + schema['properties'].setdefault('apiVersion', {'type': 'string'}) + schema['properties'].setdefault('kind', {'type': 'string'}) + schema['properties'].setdefault('metadata', {'type': 'object'}) + + os.makedirs(group_dir, exist_ok=True) + with open(out_path, 'w') as f: + json.dump(schema, f, indent=2) + print(f' Generated: {group}/{fname}', file=sys.stderr) +" + +echo "==> Downloading Kubernetes swagger spec..." >&2 + +curl -sSfL "$SWAGGER_URL" | python3 -c " +import sys, json, os + +swagger = json.load(sys.stdin) +definitions = swagger.get('definitions', {}) +schema_dir = '$SCHEMA_DIR' + +for defn_name, defn in definitions.items(): + gvk_list = defn.get('x-kubernetes-group-version-kind', []) + for gvk in gvk_list: + group = gvk.get('group', '') + version = gvk.get('version', '') + kind = gvk.get('kind', '') + + schema = { + '\$schema': 'http://json-schema.org/draft-07/schema#', + 'type': 'object', + 'properties': {}, + 'additionalProperties': True, + } + + for prop_name, prop_val in defn.get('properties', {}).items(): + prop_copy = {k: v for k, v in prop_val.items() if k != '\$ref'} + if not prop_copy.get('type') and 'description' in prop_copy: + prop_copy['type'] = 'object' + prop_copy['additionalProperties'] = True + schema['properties'][prop_name] = prop_copy + + group_dir = os.path.join(schema_dir, group) if group else schema_dir + os.makedirs(group_dir, exist_ok=True) + fname = f'{kind}_{version}.json'.lower() + with open(os.path.join(group_dir, fname), 'w') as f: + json.dump(schema, f, indent=2) + +print(f' Generated Kubernetes native schemas', file=sys.stderr) +" + +total=$(find "$SCHEMA_DIR" -name '*.json' | wc -l) +echo "==> Schema generation complete: $total schemas in $SCHEMA_DIR" >&2 diff --git a/ci/validate-apps.sh b/ci/validate-apps.sh index fc51271..e403367 100755 --- a/ci/validate-apps.sh +++ b/ci/validate-apps.sh @@ -3,7 +3,10 @@ set -euo pipefail KUBE_VERSION="1.33.7" +SCHEMA_DIR="${SCHEMA_DIR:-schemas}" + schema_args=( + -schema-location "$SCHEMA_DIR/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json" -schema-location "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/yannh/kubernetes-json-schema/master/{{.NormalizedKubernetesVersion}}-standalone{{.StrictSuffix}}/{{.ResourceKind}}{{.KindSuffix}}.json" -schema-location "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json" ) diff --git a/ci/validate-clusters.sh b/ci/validate-clusters.sh index 8ec7fb1..eb1dee0 100755 --- a/ci/validate-clusters.sh +++ b/ci/validate-clusters.sh @@ -3,7 +3,10 @@ set -euo pipefail KUBE_VERSION="1.33.7" +SCHEMA_DIR="${SCHEMA_DIR:-schemas}" + schema_args=( + -schema-location "$SCHEMA_DIR/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json" -schema-location "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/yannh/kubernetes-json-schema/master/{{.NormalizedKubernetesVersion}}-standalone{{.StrictSuffix}}/{{.ResourceKind}}{{.KindSuffix}}.json" -schema-location "https://artifactapi.k8s.syd1.au.unkin.net/api/v1/remote/github_user/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json" )