Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bd723dbd05 | |||
| 55d7345854 | |||
| 87ce0ec6ee | |||
| 53da6fe547 | |||
| 6adc52830f | |||
| e6176999c1 | |||
| ec03bcf778 | |||
| e535a75649 | |||
| a44aef5381 | |||
| 4a278b1419 | |||
| 698c977511 | |||
| 86108f5b75 | |||
| fb92b4c000 | |||
| ce0851dc3c | |||
| edc552413e | |||
| 25d4610903 |
Vendored
+49
-16
@@ -1,5 +1,5 @@
|
|||||||
# FleetDM Stack - Gitea Actions
|
# FleetDM Stack - Gitea Actions
|
||||||
# CI: lint on every push
|
# CI: lint on every push (skips docs-only changes)
|
||||||
# Semantic Release: auto-bump version on push to main/master
|
# Semantic Release: auto-bump version on push to main/master
|
||||||
# - merge from feature/* branch → major bump
|
# - merge from feature/* branch → major bump
|
||||||
# - any other commit (fix, chore, etc.) → patch bump
|
# - any other commit (fix, chore, etc.) → patch bump
|
||||||
@@ -12,6 +12,14 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
- master
|
- master
|
||||||
|
paths-ignore:
|
||||||
|
- 'docs/**'
|
||||||
|
- 'README.md'
|
||||||
|
- 'STATUS.md'
|
||||||
|
- 'AGENTS.md'
|
||||||
|
- 'TASKS.md'
|
||||||
|
- '.gitignore'
|
||||||
|
- 'djinni-*/**'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
@@ -24,7 +32,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
run: |
|
run: |
|
||||||
git clone --depth=1 https://git.produktor.io/${{ gitea.repository }}.git .
|
git clone --depth=1 https://${{ gitea.actor }}:${{ gitea.token }}@git.produktor.io/${{ gitea.repository }}.git .
|
||||||
git checkout ${{ gitea.sha }}
|
git checkout ${{ gitea.sha }}
|
||||||
|
|
||||||
- name: Install Helm
|
- name: Install Helm
|
||||||
@@ -48,7 +56,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- name: Checkout (full history for tags)
|
- name: Checkout (full history for tags)
|
||||||
run: |
|
run: |
|
||||||
git clone https://git.produktor.io/${{ gitea.repository }}.git .
|
git clone https://${{ gitea.actor }}:${{ gitea.token }}@git.produktor.io/${{ gitea.repository }}.git .
|
||||||
git fetch --tags
|
git fetch --tags
|
||||||
|
|
||||||
- name: Determine version bump
|
- name: Determine version bump
|
||||||
@@ -60,13 +68,11 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
echo "Latest tag: $LATEST_TAG"
|
echo "Latest tag: $LATEST_TAG"
|
||||||
|
|
||||||
# Strip 'v' prefix and split
|
|
||||||
VER="${LATEST_TAG#v}"
|
VER="${LATEST_TAG#v}"
|
||||||
MAJOR=$(echo "$VER" | cut -d. -f1)
|
MAJOR=$(echo "$VER" | cut -d. -f1)
|
||||||
MINOR=$(echo "$VER" | cut -d. -f2)
|
MINOR=$(echo "$VER" | cut -d. -f2)
|
||||||
PATCH=$(echo "$VER" | cut -d. -f3)
|
PATCH=$(echo "$VER" | cut -d. -f3)
|
||||||
|
|
||||||
# Check if this commit is a merge from a feature/* branch
|
|
||||||
COMMIT_MSG=$(git log -1 --format='%s' ${{ gitea.sha }})
|
COMMIT_MSG=$(git log -1 --format='%s' ${{ gitea.sha }})
|
||||||
echo "Commit message: $COMMIT_MSG"
|
echo "Commit message: $COMMIT_MSG"
|
||||||
|
|
||||||
@@ -74,7 +80,6 @@ jobs:
|
|||||||
if echo "$COMMIT_MSG" | grep -qiE "^Merge.*feature/"; then
|
if echo "$COMMIT_MSG" | grep -qiE "^Merge.*feature/"; then
|
||||||
IS_FEATURE="true"
|
IS_FEATURE="true"
|
||||||
fi
|
fi
|
||||||
# Also check parent branches for merge commits
|
|
||||||
if git log -1 --format='%P' ${{ gitea.sha }} | grep -q ' '; then
|
if git log -1 --format='%P' ${{ gitea.sha }} | grep -q ' '; then
|
||||||
MERGE_BRANCH=$(git log -1 --format='%s' ${{ gitea.sha }} | grep -oE "feature/[^ '\"]*" || true)
|
MERGE_BRANCH=$(git log -1 --format='%s' ${{ gitea.sha }} | grep -oE "feature/[^ '\"]*" || true)
|
||||||
if [ -n "$MERGE_BRANCH" ]; then
|
if [ -n "$MERGE_BRANCH" ]; then
|
||||||
@@ -116,6 +121,24 @@ jobs:
|
|||||||
mv fleetdm-stack-*.tgz .tmp/
|
mv fleetdm-stack-*.tgz .tmp/
|
||||||
ls -la .tmp/
|
ls -la .tmp/
|
||||||
|
|
||||||
|
- name: Mirror FleetDM image to Gitea registry
|
||||||
|
run: |
|
||||||
|
CRANE_VER="v0.20.3"
|
||||||
|
curl -fsSL "https://github.com/google/go-containerregistry/releases/download/${CRANE_VER}/go-containerregistry_Linux_x86_64.tar.gz" \
|
||||||
|
| tar -xz -C /usr/local/bin crane
|
||||||
|
|
||||||
|
APP_VER=$(grep '^appVersion:' fleetdm-stack/Chart.yaml | awk '{print $2}' | tr -d '"')
|
||||||
|
CHART_TAG="${{ steps.version.outputs.new_tag }}"
|
||||||
|
SRC="docker.io/fleetdm/fleet:v${APP_VER}"
|
||||||
|
OWNER=$(echo "${{ gitea.repository_owner }}" | tr '[:upper:]' '[:lower:]')
|
||||||
|
DST="git.produktor.io/${OWNER}/flamingo-tech-test"
|
||||||
|
|
||||||
|
crane auth login git.produktor.io -u "${{ gitea.actor }}" -p "${{ secrets.REPO_TOKEN }}"
|
||||||
|
crane copy "${SRC}" "${DST}:${APP_VER}"
|
||||||
|
crane tag "${DST}:${APP_VER}" "${CHART_TAG}"
|
||||||
|
crane tag "${DST}:${APP_VER}" "latest"
|
||||||
|
echo "Mirrored ${SRC} → ${DST}:{${APP_VER},${CHART_TAG},latest}"
|
||||||
|
|
||||||
- name: Create tag
|
- name: Create tag
|
||||||
run: |
|
run: |
|
||||||
git config user.name "Gitea Actions"
|
git config user.name "Gitea Actions"
|
||||||
@@ -124,14 +147,24 @@ jobs:
|
|||||||
git push https://${{ gitea.actor }}:${{ gitea.token }}@git.produktor.io/${{ gitea.repository }}.git "${{ steps.version.outputs.new_tag }}"
|
git push https://${{ gitea.actor }}:${{ gitea.token }}@git.produktor.io/${{ gitea.repository }}.git "${{ steps.version.outputs.new_tag }}"
|
||||||
|
|
||||||
- name: Create Gitea Release
|
- name: Create Gitea Release
|
||||||
uses: https://gitea.com/actions/gitea-release-action@v1
|
run: |
|
||||||
with:
|
TAG="${{ steps.version.outputs.new_tag }}"
|
||||||
server_url: ${{ gitea.server_url }}
|
BUMP="${{ steps.version.outputs.bump_type }}"
|
||||||
token: ${{ gitea.token }}
|
API="https://git.produktor.io/api/v1/repos/${{ gitea.repository }}/releases"
|
||||||
tag_name: ${{ steps.version.outputs.new_tag }}
|
TOKEN="${{ gitea.token }}"
|
||||||
name: FleetDM Stack ${{ steps.version.outputs.new_tag }}
|
|
||||||
body: |
|
|
||||||
**${{ steps.version.outputs.bump_type }}** release — `${{ steps.version.outputs.new_tag }}`
|
|
||||||
|
|
||||||
Helm chart for FleetDM Server with MySQL and Redis.
|
RELEASE=$(curl -sf -X POST "$API" \
|
||||||
files: .tmp/*.tgz
|
-H "Authorization: token $TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\":\"$TAG\",\"name\":\"FleetDM Stack $TAG\",\"body\":\"**${BUMP}** release — \`${TAG}\`\n\nHelm chart for FleetDM Server with MySQL and Redis.\"}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE" | grep -o '"id":[0-9]*' | head -1 | cut -d: -f2)
|
||||||
|
echo "Created release ID: $RELEASE_ID"
|
||||||
|
|
||||||
|
for f in .tmp/*.tgz; do
|
||||||
|
FNAME=$(basename "$f")
|
||||||
|
curl -sf -X POST "$API/$RELEASE_ID/assets?name=$FNAME" \
|
||||||
|
-H "Authorization: token $TOKEN" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
--data-binary "@$f"
|
||||||
|
echo "Uploaded: $FNAME"
|
||||||
|
done
|
||||||
|
|||||||
Vendored
+1
@@ -5,3 +5,4 @@ fleetdm-stack/charts/*.tgz
|
|||||||
.helm/
|
.helm/
|
||||||
*.log
|
*.log
|
||||||
/*-flamingo
|
/*-flamingo
|
||||||
|
.idea
|
||||||
|
|||||||
+8
@@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# FleetDM Stack — Flamingo DevOps Assignment
|
# 🦩 FleetDM Stack
|
||||||
|
|
||||||
Helm chart deploying **FleetDM Server** with **MySQL** and **Redis** to Kubernetes. Suitable for local development (Kind/Minikube) and adaptable for production.
|
Helm chart deploying **FleetDM Server** with **MySQL** and **Redis** to Kubernetes. Suitable for local development (Kind/Minikube) and adaptable for production.
|
||||||
|
|
||||||
@@ -23,6 +23,8 @@ make verify
|
|||||||
make port-forward
|
make port-forward
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### 1. Create local cluster
|
### 1. Create local cluster
|
||||||
@@ -63,6 +65,10 @@ make port-forward FLEET_PORT=9090
|
|||||||
Open **https://localhost:8585** in your browser (accept the self-signed certificate).
|
Open **https://localhost:8585** in your browser (accept the self-signed certificate).
|
||||||
Fleet setup wizard will guide you through initial configuration.
|
Fleet setup wizard will guide you through initial configuration.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
## Teardown
|
## Teardown
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -83,7 +89,7 @@ Verification checklist:
|
|||||||
|
|
||||||
| Component | Check |
|
| Component | Check |
|
||||||
| ----------- | ----------------------------------------------------------------- |
|
| ----------- | ----------------------------------------------------------------- |
|
||||||
| **FleetDM** | Pods running; `make port-forward` → https://localhost:8585 |
|
| **FleetDM** | Pods running; `make port-forward` → https://localhost:8585 |
|
||||||
| **MySQL** | `fleetdm-stack-mysql` service; Fleet connects and runs migrations |
|
| **MySQL** | `fleetdm-stack-mysql` service; Fleet connects and runs migrations |
|
||||||
| **Redis** | `fleetdm-stack-redis-master` service; Fleet uses it for cache |
|
| **Redis** | `fleetdm-stack-redis-master` service; Fleet uses it for cache |
|
||||||
|
|
||||||
@@ -162,5 +168,5 @@ tech-task/
|
|||||||
|
|
||||||
The architectural design document for "Company Inc." is in `docs/`:
|
The architectural design document for "Company Inc." is in `docs/`:
|
||||||
|
|
||||||
- [Architecture Design Document](docs/architecture-design-company-inc.md) — 1–2 page design (convert to PDF for submission)
|
- [Architecture Design Document](docs/architecture-design-company-inc.md) — 1-2 page design (convert to PDF for submission)
|
||||||
- [High-Level Diagram](docs/architecture-hld.md) — Mermaid diagrams (infra, CI/CD, network security)
|
- [High-Level Diagram](docs/architecture-hld.md) — Mermaid diagrams (infra, CI/CD, network security)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
This document outlines a robust, scalable, secure, and cost-effective infrastructure design for Company Inc., a startup deploying a web application with a Python/Flask REST API backend, React SPA frontend, and MongoDB database. The design leverages **Google Cloud Platform (GCP)** with **GKE (Google Kubernetes Engine)** as the primary compute platform.
|
This document outlines a robust, scalable, secure, and cost-effective infrastructure design for Company Inc., a startup deploying a web application with a Python/Flask REST API backend, React SPA frontend, and MongoDB database. The design leverages **Google Cloud Platform (GCP)** with **GKE (Google Kubernetes Engine)** as the primary compute platform.
|
||||||
|
|
||||||
**Key Design Principles:** Security-by-default, scalability from day one, cost optimization for early stage, and GitOps-based operations.
|
**Key Design Principles:** Cost awareness from day one, security-by-default, scalability when needed, and GitOps-based operations.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -20,20 +20,26 @@ This document outlines a robust, scalable, secure, and cost-effective infrastruc
|
|||||||
|
|
||||||
**Rationale:** GCP offers strong managed Kubernetes (GKE) with autopilot options, excellent MongoDB Atlas integration (or GCP-native DocumentDB alternatives), competitive pricing for startups, and simplified networking. GKE Autopilot reduces operational overhead for a small team with limited Kubernetes expertise.
|
**Rationale:** GCP offers strong managed Kubernetes (GKE) with autopilot options, excellent MongoDB Atlas integration (or GCP-native DocumentDB alternatives), competitive pricing for startups, and simplified networking. GKE Autopilot reduces operational overhead for a small team with limited Kubernetes expertise.
|
||||||
|
|
||||||
### 2.2 Multi-Project Structure
|
### 2.2 Project Structure (Cost-Optimised)
|
||||||
|
|
||||||
|
For a startup, fewer projects mean lower overhead and simpler billing. Start with **3 projects** and add more only when traffic or compliance demands it.
|
||||||
|
|
||||||
| Project | Purpose | Isolation |
|
| Project | Purpose | Isolation |
|
||||||
|---------|---------|-----------|
|
|---------|---------|-----------|
|
||||||
| **company-inc-prod** | Production workloads | High; sensitive data |
|
| **company-inc-prod** | Production workloads | High; sensitive data |
|
||||||
| **company-inc-staging** | Staging / pre-production | Medium |
|
| **company-inc-staging** | Staging, QA, and dev experimentation | Medium |
|
||||||
| **company-inc-shared** | CI/CD, shared tooling, DNS | Low; no PII |
|
| **company-inc-shared** | CI/CD, Artifact Registry, DNS | Low; no PII |
|
||||||
| **company-inc-sandbox** | Dev experimentation | Lowest |
|
|
||||||
|
**Why not 4+ projects?**
|
||||||
|
- A dedicated sandbox project adds billing, IAM, and networking overhead with little benefit at startup scale.
|
||||||
|
- Developers can use Kubernetes namespaces within the staging cluster for experimentation.
|
||||||
|
- A fourth project can be introduced later when team size or compliance (SOC2, HIPAA) requires it.
|
||||||
|
|
||||||
**Benefits:**
|
**Benefits:**
|
||||||
- Billing separation per environment
|
- Billing separation (prod costs are clearly visible)
|
||||||
- Blast-radius containment (prod issues do not affect staging)
|
- Blast-radius containment (prod issues do not affect staging)
|
||||||
- IAM and network isolation
|
- IAM isolation between environments
|
||||||
- Aligns with GCP best practices for multi-tenant or multi-env setups
|
- Minimal fixed cost — only 3 projects to manage
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -96,14 +102,79 @@ flowchart TD
|
|||||||
- **Frontend (React):** Static assets served via CDN or container; 1–2 replicas
|
- **Frontend (React):** Static assets served via CDN or container; 1–2 replicas
|
||||||
- **Ingress:** GKE Ingress for HTTP(S) routing; consider GKE Gateway API for advanced use
|
- **Ingress:** GKE Ingress for HTTP(S) routing; consider GKE Gateway API for advanced use
|
||||||
|
|
||||||
### 4.4 Containerisation and CI/CD
|
### 4.4 Blue-Green Deployment
|
||||||
|
|
||||||
|
Zero-downtime releases without duplicating infrastructure. Both versions run inside the **same GKE cluster**; the load balancer switches traffic atomically.
|
||||||
|
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
LB[Load Balancer]
|
||||||
|
LB -->|100% traffic| Green[Green — v1.2.0<br/>current stable]
|
||||||
|
LB -.->|0% traffic| Blue[Blue — v1.3.0<br/>new release]
|
||||||
|
Blue -.->|smoke tests pass| LB
|
||||||
|
```
|
||||||
|
---
|
||||||
|
| Phase | Action |
|
||||||
|
|-------|--------|
|
||||||
|
| **Deploy** | New version deployed to the idle slot (blue) |
|
||||||
|
| **Test** | Run smoke tests / synthetic checks against blue |
|
||||||
|
| **Switch** | Update Service selector or Ingress to point to blue |
|
||||||
|
| **Rollback** | Instant — revert selector back to green (old version still running) |
|
||||||
|
| **Cleanup** | Scale down old slot after confirmation period |
|
||||||
|
|
||||||
|
**Cost impact:** Near-zero — both slots share the same node pool; the idle slot consumes minimal resources until traffic is switched. Argo Rollouts automates the full lifecycle within ArgoCD.
|
||||||
|
|
||||||
|
### 4.5 Containerisation Strategy
|
||||||
|
|
||||||
|
#### Image Building Process
|
||||||
|
|
||||||
|
Each service (Flask backend, React frontend) has its own **multi-stage Dockerfile**:
|
||||||
|
|
||||||
|
1. **Build stage** — installs dependencies and compiles artefacts in a full SDK image (e.g. `python:3.12`, `node:20`).
|
||||||
|
2. **Runtime stage** — copies only the built artefacts into a minimal base image (e.g. `python:3.12-slim`, `nginx:alpine`). This cuts image size by 60–80% and removes build tools from the attack surface.
|
||||||
|
3. **Non-root user** — the runtime stage runs as a dedicated unprivileged user (`appuser`), never as root.
|
||||||
|
4. **Reproducible builds** — dependency lock files (`requirements.txt` / `package-lock.json`) are copied and installed before application code to maximise Docker layer caching.
|
||||||
|
|
||||||
|
**Tagging convention:** images are tagged with the **git SHA** for traceability and a `latest` alias for convenience. Semantic version tags (e.g. `v1.3.0`) are added on release.
|
||||||
|
|
||||||
|
#### Container Registry Management
|
||||||
|
|
||||||
|
All container images are stored in **GCP Artifact Registry** in the `company-inc-shared` project:
|
||||||
|
|
||||||
|
- **Single source of truth** — one registry serves both staging and production via cross-project IAM pull permissions.
|
||||||
|
- **Vulnerability scanning** — Artifact Registry's built-in scanning is enabled; CI fails if critical CVEs are detected.
|
||||||
|
- **Image retention policy** — keep the latest 10 tagged images per service; automatically garbage-collect untagged manifests older than 30 days.
|
||||||
|
- **Access control** — CI service account has `roles/artifactregistry.writer`; GKE node service accounts have `roles/artifactregistry.reader`. No human push access.
|
||||||
|
|
||||||
|
*For self-hosted Git platforms (e.g. Gitea), the built-in OCI container registry can serve the same role at zero additional cost, with Trivy added as a CI step for vulnerability scanning.*
|
||||||
|
|
||||||
|
#### Deployment Pipelines (CI/CD Integration)
|
||||||
|
|
||||||
|
The pipeline follows a **GitOps** model with clear separation between CI and CD:
|
||||||
|
|
||||||
|
| Phase | Tool | What happens |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| **Lint & Test** | Gitea / GitHub Actions | Unit tests, linting, Helm lint on every push |
|
||||||
|
| **Build & Push** | Gitea / GitHub Actions | `docker build` → tag with git SHA → push to registry |
|
||||||
|
| **Security Scan** | Trivy (in CI) | Scan image for OS and library CVEs; block on critical findings |
|
||||||
|
| **Manifest Update** | CI job | Update image tag in the GitOps manifests repo (or Helm values) |
|
||||||
|
| **Sync & Deploy** | ArgoCD | Detects manifest drift → triggers blue-green rollout via Argo Rollouts |
|
||||||
|
| **Promotion** | Argo Rollouts | Automated analysis (metrics, health checks) → promote or rollback |
|
||||||
|
|
||||||
|
**Key properties:**
|
||||||
|
- **CI never touches the cluster directly** — it only builds images and updates manifests. ArgoCD is the sole deployer.
|
||||||
|
- **Rollback is instant** — revert the manifest repo to the previous commit; ArgoCD syncs automatically.
|
||||||
|
- **Audit trail** — every deployment maps to a git commit in the manifests repo.
|
||||||
|
|
||||||
|
### 4.6 CI/CD Summary
|
||||||
|
|
||||||
| Aspect | Approach |
|
| Aspect | Approach |
|
||||||
|-------|----------|
|
|-------|----------|
|
||||||
| **Image build** | Dockerfile per service; multi-stage builds; non-root user |
|
| **Image build** | Multi-stage Dockerfile; layer caching; non-root; git-SHA tags |
|
||||||
| **Registry** | Artifact Registry (GCR) in `company-inc-shared` |
|
| **Registry** | Artifact Registry in `company-inc-shared` (or Gitea built-in OCI registry) |
|
||||||
| **CI** | GitHub Actions (or GitLab CI) — build, test, security scan |
|
| **CI** | Gitea / GitHub Actions — lint, test, build, scan, push |
|
||||||
| **CD** | ArgoCD or Flux — GitOps; app of apps pattern |
|
| **CD** | ArgoCD + Argo Rollouts — GitOps with blue-green strategy |
|
||||||
| **Secrets** | External Secrets Operator + GCP Secret Manager |
|
| **Secrets** | External Secrets Operator + GCP Secret Manager |
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -138,10 +209,48 @@ flowchart TD
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 6. High-Level Architecture Diagram
|
## 6. Cost Optimisation Strategy
|
||||||
|
|
||||||
|
| Lever | Approach | Estimated Savings |
|
||||||
|
|-------|----------|-------------------|
|
||||||
|
| **3 projects, not 4** | Drop sandbox; use staging namespaces | ~25% fewer fixed project costs |
|
||||||
|
| **GKE Autopilot** | Pay per pod, not per node; no idle nodes | 30–60% vs standard GKE |
|
||||||
|
| **Blue-green in-cluster** | No duplicate environments for releases | Near-zero deployment cost |
|
||||||
|
| **Spot/preemptible pods** | Use for staging and non-critical workloads | Up to 60–80% off compute |
|
||||||
|
| **Committed use discounts** | 1-year CUDs once baseline is established | 20–30% off sustained use |
|
||||||
|
| **CDN for frontend** | Offload SPA traffic from GKE | Fewer pod replicas needed |
|
||||||
|
| **MongoDB Atlas auto-scale** | Start M10; scale up only when needed | Avoid over-provisioning |
|
||||||
|
| **Cloud NAT shared** | Single NAT in shared project | Avoid per-project NAT cost |
|
||||||
|
|
||||||
|
**Monthly cost estimate (early stage):**
|
||||||
|
- GKE Autopilot (2–3 API pods + 1 SPA): ~$80–150
|
||||||
|
- MongoDB Atlas M10: ~$60
|
||||||
|
- Load Balancer + Cloud NAT: ~$30
|
||||||
|
- Artifact Registry + Secret Manager: ~$5
|
||||||
|
- **Total: ~$175–245/month**
|
||||||
|
|
||||||
|
### 6.1 What Would Be Overkill at This Stage
|
||||||
|
|
||||||
|
Not everything in a "best practices" architecture is worth implementing on day one. The following are valuable at scale but add cost and complexity that a startup with a few hundred users/day does not need yet.
|
||||||
|
|
||||||
|
| Component | Why it's overkill now | When to introduce |
|
||||||
|
|-----------|----------------------|-------------------|
|
||||||
|
| **Multi-region GKE** | Single region handles millions of req/day; multi-region doubles cost | When SLA requires 99.99% or users span continents |
|
||||||
|
| **Service mesh (Istio/Linkerd)** | Adds sidecar overhead, complexity, and debugging difficulty | When you have 10+ microservices with mTLS requirements |
|
||||||
|
| **Cross-region MongoDB replica** | Atlas M10 with multi-AZ is sufficient; cross-region adds ~2x DB cost | When RPO < 1 hour is a compliance requirement |
|
||||||
|
| **Dedicated observability stack** | GKE built-in monitoring + Cloud Logging is free; Prometheus/Grafana adds ops burden | When team has > 2 SREs and needs custom dashboards |
|
||||||
|
| **4+ GCP projects** | 3 projects cover prod/staging/shared; more adds IAM and billing complexity | When compliance (SOC2, HIPAA) requires strict separation |
|
||||||
|
| **API Gateway (Apigee, Kong)** | GKE Ingress handles routing; a gateway adds cost and latency | When you need rate limiting, API keys, or monetisation |
|
||||||
|
| **Vault for secrets** | GCP Secret Manager is cheaper, simpler, and natively integrated | When you need dynamic secrets or multi-cloud secret federation |
|
||||||
|
|
||||||
|
**Rule of thumb:** if a component doesn't solve a problem you have *today*, defer it. Every added piece increases the monthly bill and the on-call surface area.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. High-Level Architecture Diagram
|
||||||
|
|
||||||
```mermaid
|
```mermaid
|
||||||
flowchart TB
|
flowchart TD
|
||||||
Users((Users))
|
Users((Users))
|
||||||
|
|
||||||
Users --> CDN[Cloud CDN<br/>Static Assets]
|
Users --> CDN[Cloud CDN<br/>Static Assets]
|
||||||
@@ -164,21 +273,22 @@ flowchart TB
|
|||||||
|
|
||||||
API --> Mongo
|
API --> Mongo
|
||||||
API --> Secrets
|
API --> Secrets
|
||||||
GKE --> Registry
|
GKE ----> Registry
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 7. Summary of Recommendations
|
## 8. Summary of Recommendations
|
||||||
|
|
||||||
| Area | Recommendation |
|
| Area | Recommendation |
|
||||||
|------|----------------|
|
|------|----------------|
|
||||||
| **Cloud** | GCP with 4 projects (prod, staging, shared, sandbox) |
|
| **Cloud** | GCP with 3 projects (prod, staging, shared) |
|
||||||
| **Compute** | GKE Autopilot, private nodes, HPA |
|
| **Compute** | GKE Autopilot, private nodes, HPA |
|
||||||
|
| **Deployments** | Blue-green via Argo Rollouts — zero downtime, instant rollback |
|
||||||
| **Database** | MongoDB Atlas on GCP with multi-AZ, automated backups |
|
| **Database** | MongoDB Atlas on GCP with multi-AZ, automated backups |
|
||||||
| **CI/CD** | GitHub Actions + ArgoCD/Flux |
|
| **CI/CD** | GitHub/Gitea Actions + ArgoCD |
|
||||||
| **Security** | Private VPC, TLS everywhere, Secret Manager, least privilege |
|
| **Security** | Private VPC, TLS everywhere, Secret Manager, least privilege |
|
||||||
| **Cost** | Start small; use committed use discounts as usage grows |
|
| **Cost** | ~$175–245/month early stage; spot pods, CUDs as traffic grows |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
+47
-11
@@ -9,22 +9,25 @@ flowchart TB
|
|||||||
end
|
end
|
||||||
|
|
||||||
subgraph GCP["Google Cloud Platform"]
|
subgraph GCP["Google Cloud Platform"]
|
||||||
subgraph Projects["Project Structure"]
|
subgraph Projects["Project Structure (3 projects)"]
|
||||||
Prod[company-inc-prod]
|
Prod[company-inc-prod]
|
||||||
Staging[company-inc-staging]
|
Staging[company-inc-staging<br/>QA + dev namespaces]
|
||||||
Shared[company-inc-shared]
|
Shared[company-inc-shared]
|
||||||
Sandbox[company-inc-sandbox]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subgraph Edge["Edge / Networking"]
|
subgraph Edge["Edge / Networking"]
|
||||||
LB[Cloud Load Balancer<br/>HTTPS · TLS termination]
|
LB[Cloud Load Balancer<br/>HTTPS · TLS termination]
|
||||||
CDN[Cloud CDN<br/>Static Assets]
|
CDN[Cloud CDN<br/>Static Assets]
|
||||||
NAT[Cloud NAT<br/>Egress]
|
NAT[Cloud NAT<br/>Egress · shared]
|
||||||
end
|
end
|
||||||
|
|
||||||
subgraph VPC["VPC — Private Subnets"]
|
subgraph VPC["VPC — Private Subnets"]
|
||||||
subgraph GKE["GKE Autopilot Cluster"]
|
subgraph GKE["GKE Autopilot Cluster"]
|
||||||
Ingress[Ingress Controller]
|
Ingress[Ingress Controller]
|
||||||
|
subgraph BlueGreen["Blue-Green Deployment"]
|
||||||
|
Green[Green — stable<br/>receives traffic]
|
||||||
|
Blue[Blue — new release<br/>smoke tests]
|
||||||
|
end
|
||||||
subgraph Workloads
|
subgraph Workloads
|
||||||
API[Backend — Python / Flask<br/>HPA · 2–3 replicas]
|
API[Backend — Python / Flask<br/>HPA · 2–3 replicas]
|
||||||
SPA[Frontend — React SPA<br/>Nginx]
|
SPA[Frontend — React SPA<br/>Nginx]
|
||||||
@@ -44,14 +47,17 @@ flowchart TB
|
|||||||
subgraph CICD["CI / CD"]
|
subgraph CICD["CI / CD"]
|
||||||
Git[Git Repository]
|
Git[Git Repository]
|
||||||
Actions[Gitea / GitHub Actions<br/>Build · Test · Scan]
|
Actions[Gitea / GitHub Actions<br/>Build · Test · Scan]
|
||||||
Argo[ArgoCD / Flux<br/>GitOps Deploy]
|
Argo[ArgoCD + Argo Rollouts<br/>GitOps · Blue-Green]
|
||||||
end
|
end
|
||||||
|
|
||||||
Users --> LB
|
Users --> LB
|
||||||
Users --> CDN
|
Users --> CDN
|
||||||
LB --> Ingress
|
LB --> Ingress
|
||||||
CDN --> SPA
|
CDN --> SPA
|
||||||
Ingress --> API
|
Ingress -->|traffic| Green
|
||||||
|
Ingress -.->|after switch| Blue
|
||||||
|
Green --> API
|
||||||
|
Blue --> API
|
||||||
Ingress --> SPA
|
Ingress --> SPA
|
||||||
API --> Redis
|
API --> Redis
|
||||||
API --> Mongo
|
API --> Mongo
|
||||||
@@ -61,7 +67,25 @@ flowchart TB
|
|||||||
|
|
||||||
Git --> Actions
|
Git --> Actions
|
||||||
Actions --> Registry
|
Actions --> Registry
|
||||||
Argo --> GKE
|
Argo ----> GKE
|
||||||
|
```
|
||||||
|
|
||||||
|
## Blue-Green Deployment Flow
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
subgraph Cluster["GKE Cluster"]
|
||||||
|
LB[Load Balancer<br/>Service Selector]
|
||||||
|
Green[Green — v1.2.0<br/>current stable]
|
||||||
|
Blue[Blue — v1.3.0<br/>new release]
|
||||||
|
end
|
||||||
|
|
||||||
|
Deploy[ArgoCD<br/>Argo Rollouts] -->|deploy new version| Blue
|
||||||
|
Blue -->|smoke tests| Check{Tests pass?}
|
||||||
|
Check -->|yes| LB
|
||||||
|
LB -->|switch 100%| Blue
|
||||||
|
Check -->|no| Rollback[Rollback<br/>keep Green]
|
||||||
|
LB -.->|instant rollback| Green
|
||||||
```
|
```
|
||||||
|
|
||||||
## CI / CD Pipeline
|
## CI / CD Pipeline
|
||||||
@@ -70,19 +94,31 @@ flowchart TB
|
|||||||
flowchart LR
|
flowchart LR
|
||||||
Dev[Developer] -->|push| Repo[Git Repo]
|
Dev[Developer] -->|push| Repo[Git Repo]
|
||||||
Repo -->|webhook| CI[CI Pipeline<br/>lint · test · build]
|
Repo -->|webhook| CI[CI Pipeline<br/>lint · test · build]
|
||||||
CI -->|push image| Registry[Artifact Registry]
|
CI -->|docker build + push| Registry[Container Registry<br/>Artifact Registry / Gitea OCI]
|
||||||
|
CI -->|scan image| Trivy[Trivy<br/>CVE scan]
|
||||||
CI -->|update manifests| GitOps[GitOps Repo]
|
CI -->|update manifests| GitOps[GitOps Repo]
|
||||||
GitOps -->|sync| Argo[ArgoCD / Flux]
|
GitOps -->|sync| Argo[ArgoCD]
|
||||||
Argo -->|deploy| GKE[GKE Cluster]
|
Argo -->|blue-green deploy| GKE[GKE Cluster]
|
||||||
|
GKE -->|pull image| Registry
|
||||||
```
|
```
|
||||||
|
|
||||||
## Network Security Layers
|
## Network Security Layers
|
||||||
|
|
||||||
```mermaid
|
```mermaid
|
||||||
flowchart TD
|
flowchart LR
|
||||||
Internet((Internet)) --> FW[VPC Firewall<br/>Default deny]
|
Internet((Internet)) --> FW[VPC Firewall<br/>Default deny]
|
||||||
FW --> LB[Load Balancer<br/>HTTPS only]
|
FW --> LB[Load Balancer<br/>HTTPS only]
|
||||||
LB --> NP[K8s Network Policies]
|
LB --> NP[K8s Network Policies]
|
||||||
NP --> Pods[Application Pods<br/>Private IPs only]
|
NP --> Pods[Application Pods<br/>Private IPs only]
|
||||||
Pods --> PE[Private Endpoint<br/>MongoDB Atlas]
|
Pods --> PE[Private Endpoint<br/>MongoDB Atlas]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Cost Profile (Early Stage)
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
pie title Monthly Cost Breakdown (~$200)
|
||||||
|
"GKE Autopilot" : 120
|
||||||
|
"MongoDB Atlas M10" : 60
|
||||||
|
"LB + NAT" : 30
|
||||||
|
"Registry + Secrets" : 5
|
||||||
|
```
|
||||||
|
|||||||
Binary file not shown.
|
After Width: | Height: | Size: 161 KiB |
Reference in New Issue
Block a user