From cd358ce28a2bd6aaa65d30d83d6e09bb65a79f52 Mon Sep 17 00:00:00 2001 From: Andriy Oblivantsev Date: Thu, 19 Feb 2026 16:25:44 +0000 Subject: [PATCH] Add initial Helm chart for FleetDM Stack with MySQL and Redis, including README, CI pipeline, and architecture documentation. Update .gitignore for local development files. --- .github/workflows/release.yaml | 43 +++++ .gitignore | 6 + README.md | 167 +++++++++++++++++++ docs/architecture-design-company-inc.md | 203 ++++++++++++++++++++++++ docs/architecture-hld.md | 85 ++++++++++ fleetdm-stack/values.yaml | 15 +- 6 files changed, 512 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/release.yaml create mode 100644 docs/architecture-design-company-inc.md create mode 100644 docs/architecture-hld.md diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..2d0caec --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,43 @@ +# FleetDM Stack Helm Chart Release Pipeline +# Publishes chart to GitHub Pages when version is bumped +# Requires: GITHUB_TOKEN, chart-releaser-action + +name: Release Helm Chart + +on: + push: + branches: + - main + paths: + - 'fleetdm-stack/**' + - '.github/workflows/release.yaml' + +permissions: + contents: write + +jobs: + release: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Configure Git + run: | + git config user.name "$GITHUB_ACTOR" + git config user.email "$GITHUB_ACTOR@users.noreply.github.com" + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: 'v3.14.0' + + - name: Run chart-releaser + uses: helm/chart-releaser-action@v1.6.0 + with: + charts_dir: fleetdm-stack + chart_release_dir: .tmp + env: + CR_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index b959227..34fa391 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ +# Helm +fleetdm-stack/charts/*.tgz + +# Local +.helm/ +*.log djinni-007-devops-engineer-flamingo diff --git a/README.md b/README.md index e69de29..8b570ff 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,167 @@ +# FleetDM Stack — Flamingo DevOps Assignment + +Helm chart deploying **FleetDM Server** with **MySQL** and **Redis** to Kubernetes. Suitable for local development (Kind/Minikube) and adaptable for production. + +## Prerequisites + +- [Docker](https://docs.docker.com/get-docker/) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm 3](https://helm.sh/docs/intro/install/) +- **Kind** or **Minikube** for local cluster + +## Quick Start + +```bash +# Create local cluster and deploy +make cluster +make install + +# Verify deployment +make verify +``` + +## Installation + +### 1. Create local cluster + +Creates a Kind or Minikube cluster and installs the nginx ingress controller (Kind) or enables ingress addon (Minikube). + +```bash +# Default: Kind +make cluster + +# Or use Minikube +make cluster CLUSTER_TYPE=minikube +``` + +### 2. Install the Helm chart + +```bash +make install +``` + +This will: + +- Update Helm dependencies +- Create the `fleetdm` namespace +- Deploy MySQL, Redis, and FleetDM Server +- Run `fleet prepare db` automatically on fresh install (via `autoApplySQLMigrations`) + +### 3. Access Fleet UI + +**Kind:** + +```bash +# Add to /etc/hosts (or equivalent) +echo "127.0.0.1 fleet.localhost" | sudo tee -a /etc/hosts + +# Access via ingress (ensure ingress-nginx is ready) +curl -H "Host: fleet.localhost" http://localhost +# Or open http://localhost in a browser with Host: fleet.localhost +``` + +**Minikube:** + +```bash +minikube tunnel +# Then add fleet.localhost to /etc/hosts pointing to minikube IP +``` + +## Teardown + +```bash +# Remove Helm release and namespace +make uninstall + +# Remove cluster (Kind or Minikube) +make clean +``` + +## Verification + +```bash +make verify +``` + +Verification checklist: + +| Component | Check | +|-----------|-------| +| **FleetDM** | Pods running; ingress `fleet.localhost` serves Fleet UI | +| **MySQL** | `fleetdm-stack-mysql` service; Fleet connects and runs migrations | +| **Redis** | `fleetdm-stack-redis-master` service; Fleet uses it for cache | + +### Manual verification + +```bash +# Check pods +kubectl get pods -n fleetdm + +# Check Fleet migration job (fleet prepare db) +kubectl get jobs -n fleetdm + +# Check services +kubectl get svc -n fleetdm + +# Fleet logs +kubectl logs -n fleetdm -l app=fleet -f +``` + +## Configuration + +| Value | Description | Default | +|-------|-------------|---------| +| `mysql.auth.password` | MySQL password | `fleetdm-local-dev` | +| `fleet.replicas` | Fleet server replicas | `1` | +| `fleet.hostName` | Ingress host | `fleet.localhost` | + +Override via `--set` or custom values file: + +```bash +helm upgrade --install fleetdm-stack fleetdm-stack/ \ + -n fleetdm \ + --set mysql.auth.password=SECURE_PASSWORD +``` + +## FleetDM agent reachability + +The chart exposes Fleet via ingress so: + +- **Fleet UI** is available at `http://fleet.localhost` +- **Agent endpoints** (`/api/v1/osquery/*`, `/api/fleet/orbit/*`, etc.) are reachable under the same host + +For production, configure TLS and ensure agents can reach the Fleet server hostname. + +## Enhancements implemented + +1. **Basic CI pipeline** — GitHub Actions releases new Helm chart versions (see [.github/workflows/release.yaml](.github/workflows/release.yaml)) +2. **Exposed Fleet UI** — Ingress with `fleet.localhost` for UI and agent enrollment +3. **`fleet prepare db`** — Handled by `autoApplySQLMigrations: true` in the Fleet Helm chart + +## Project Structure + +``` +tech-task/ +├── fleetdm-stack/ # Helm chart (FleetDM + MySQL + Redis) +│ ├── Chart.yaml +│ ├── Chart.lock +│ ├── values.yaml +│ └── charts/ # Dependencies (run make deps) +├── Makefile +├── README.md +├── .github/workflows/ # CI for Helm chart releases +└── docs/ # Theoretical part + ├── architecture-design-company-inc.md + └── architecture-hld.md +``` + +## Theoretical Part + +The architectural design document for "Company Inc." is in `docs/`: + +- [Architecture Design Document](docs/architecture-design-company-inc.md) — 1–2 page design (convert to PDF for submission) +- [High-Level Diagram Reference](docs/architecture-hld.md) — Mermaid source and draw.io guide for HLD + +## License + +MIT diff --git a/docs/architecture-design-company-inc.md b/docs/architecture-design-company-inc.md new file mode 100644 index 0000000..47f3984 --- /dev/null +++ b/docs/architecture-design-company-inc.md @@ -0,0 +1,203 @@ +# Architectural Design Document: Company Inc. + +**Cloud Infrastructure for Web Application Deployment** +**Version:** 1.0 +**Date:** February 2026 + +--- + +## 1. Executive Summary + +This document outlines a robust, scalable, secure, and cost-effective infrastructure design for Company Inc., a startup deploying a web application with a Python/Flask REST API backend, React SPA frontend, and MongoDB database. The design leverages **Google Cloud Platform (GCP)** with **GKE (Google Kubernetes Engine)** as the primary compute platform. + +**Key Design Principles:** Security-by-default, scalability from day one, cost optimization for early stage, and GitOps-based operations. + +--- + +## 2. Cloud Provider and Environment Structure + +### 2.1 Provider Choice: GCP + +**Rationale:** GCP offers strong managed Kubernetes (GKE) with autopilot options, excellent MongoDB Atlas integration (or GCP-native DocumentDB alternatives), competitive pricing for startups, and simplified networking. GKE Autopilot reduces operational overhead for a small team with limited Kubernetes expertise. + +### 2.2 Multi-Project Structure + +| Project | Purpose | Isolation | +|---------|---------|-----------| +| **company-inc-prod** | Production workloads | High; sensitive data | +| **company-inc-staging** | Staging / pre-production | Medium | +| **company-inc-shared** | CI/CD, shared tooling, DNS | Low; no PII | +| **company-inc-sandbox** | Dev experimentation | Lowest | + +**Benefits:** +- Billing separation per environment +- Blast-radius containment (prod issues do not affect staging) +- IAM and network isolation +- Aligns with GCP best practices for multi-tenant or multi-env setups + +--- + +## 3. Network Design + +### 3.1 VPC Architecture + +- **One VPC per project** (or Shared VPC from `company-inc-shared` for centralised control) +- **Regional subnets** in at least 2 zones for HA +- **Private subnets** for workloads (no public IPs on nodes) +- **Public subnets** only for load balancers and NAT gateways + +### 3.2 Security Layers + +| Layer | Controls | +|-------|----------| +| **VPC Firewall** | Default deny; allow only required CIDRs and ports | +| **GKE node pools** | Private nodes; no public IPs | +| **Security groups** | Kubernetes Network Policies + GKE-native security | +| **Ingress** | HTTPS only; TLS termination at load balancer | +| **Egress** | Cloud NAT for outbound; restrict to necessary destinations | + +### 3.3 Network Topology (High-Level) + +``` +Internet + | + v +[Cloud Load Balancer] (HTTPS) + | + v +[GKE Ingress Controller] + | + v +[VPC Private Subnets] + | + +-- [GKE Cluster - API Pods] + +-- [GKE Cluster - Frontend Pods] + | + v +[Private connectivity to MongoDB] +``` + +--- + +## 4. Compute Platform: GKE + +### 4.1 Cluster Strategy + +- **GKE Autopilot** for production and staging to minimise node management +- **Single regional cluster** per environment initially; consider multi-region as scale demands +- **Private cluster** with no public endpoint; access via IAP or Bastion if needed + +### 4.2 Node Configuration + +| Setting | Initial | Growth Phase | +|---------|---------|--------------| +| **Node type** | Autopilot (no manual sizing) | Same | +| **Min nodes** | 0 (scale to zero when idle) | 2 | +| **Max nodes** | 5 | 50+ | +| **Scaling** | Pod-based (HPA, cluster autoscaler) | Same | + +### 4.3 Workload Layout + +- **Backend (Python/Flask):** Deployment with HPA (CPU/memory); target 2–3 replicas initially +- **Frontend (React):** Static assets served via CDN or container; 1–2 replicas +- **Ingress:** GKE Ingress for HTTP(S) routing; consider GKE Gateway API for advanced use + +### 4.4 Containerisation and CI/CD + +| Aspect | Approach | +|-------|----------| +| **Image build** | Dockerfile per service; multi-stage builds; non-root user | +| **Registry** | Artifact Registry (GCR) in `company-inc-shared` | +| **CI** | GitHub Actions (or GitLab CI) — build, test, security scan | +| **CD** | ArgoCD or Flux — GitOps; app of apps pattern | +| **Secrets** | External Secrets Operator + GCP Secret Manager | + +--- + +## 5. Database: MongoDB + +### 5.1 Service Choice + +**MongoDB Atlas** (or **Google Cloud DocumentDB** if strict GCP-only) recommended for: +- Fully managed, automated backups +- Multi-region replication +- Strong security (encryption at rest, VPC peering) +- Easy scaling + +**Atlas on GCP** provides native VPC peering and private connectivity. + +### 5.2 High Availability and DR + +| Topic | Strategy | +|-------|----------| +| **Replicas** | 3-node replica set; multi-AZ | +| **Backups** | Continuous backup; point-in-time recovery | +| **Disaster recovery** | Cross-region replica (e.g. `us-central1` + `europe-west1`) | +| **Restore testing** | Quarterly DR drills | + +### 5.3 Security + +- Private endpoint (no public IP) +- TLS for all connections +- IAM-based access; principle of least privilege +- Encryption at rest (default in Atlas) + +--- + +## 6. High-Level Architecture Diagram + +The following diagram illustrates the main components (implement in draw.io or Lucidchart): + +``` ++------------------------------------------------------------------+ +| COMPANY INC. INFRASTRUCTURE | ++------------------------------------------------------------------+ + + [Users] + | + v ++-------------------+ +-------------------+ +| Cloud CDN | | Cloud LB (HTTPS) | +| (Static Assets) | | (API + SPA) | ++-------------------+ +-------------------+ + | | + v v ++------------------------------------------------------------------+ +| GKE CLUSTER (Private) | +| +------------------+ +------------------+ +-----------------+ | +| | Ingress | | Backend (Flask) | | Frontend (SPA) | | +| | Controller | | - HPA | | - Nginx/React | | +| +------------------+ +------------------+ +-----------------+ | +| | | | | +| +-----------------------+-----------------------+ | +| | | +| +------------------+ +------------------+ | +| | Redis (cache) | | Observability | | +| | (Memorystore) | | (Prometheus/Grafana) | +| +------------------+ +------------------+ | ++------------------------------------------------------------------+ + | + v ++------------------------------------------------------------------+ +| MongoDB Atlas (GCP) | Secret Manager | Artifact Registry | +| - Replica Set | - App secrets | - Container images | +| - Private endpoint | - DB credentials| | ++------------------------------------------------------------------+ +``` + +--- + +## 7. Summary of Recommendations + +| Area | Recommendation | +|------|----------------| +| **Cloud** | GCP with 4 projects (prod, staging, shared, sandbox) | +| **Compute** | GKE Autopilot, private nodes, HPA | +| **Database** | MongoDB Atlas on GCP with multi-AZ, automated backups | +| **CI/CD** | GitHub Actions + ArgoCD/Flux | +| **Security** | Private VPC, TLS everywhere, Secret Manager, least privilege | +| **Cost** | Start small; use committed use discounts as usage grows | + +--- + +*This document should be accompanied by an HLD diagram (draw.io or Lucidchart) reflecting the architecture above.* diff --git a/docs/architecture-hld.md b/docs/architecture-hld.md new file mode 100644 index 0000000..c6e33ba --- /dev/null +++ b/docs/architecture-hld.md @@ -0,0 +1,85 @@ +# High-Level Architecture Diagram: Company Inc. + +Use this as reference when creating your diagram in [draw.io](https://draw.io) or [Lucidchart](https://lucidchart.com). + +## Mermaid Source (for Git-rendered diagrams) + +```mermaid +flowchart TB + subgraph Internet["Internet"] + Users[Users] + end + + subgraph GCP["GCP"] + subgraph Projects["Projects"] + Prod[company-inc-prod] + Staging[company-inc-staging] + Shared[company-inc-shared] + end + + subgraph Networking["Network"] + LB[Cloud Load Balancer
HTTPS] + CDN[Cloud CDN
Static Assets] + VPC[VPC - Private Subnets] + end + + subgraph GKE["GKE Cluster (Private)"] + Ingress[Ingress Controller] + subgraph Workloads["Workloads"] + API[Backend - Python/Flask
HPA 2-3 replicas] + SPA[Frontend - React SPA
Nginx] + end + Redis[Redis - Memorystore] + end + + subgraph Data["Data & Services"] + Mongo[(MongoDB Atlas
Replica Set)] + Secrets[Secret Manager] + Registry[Artifact Registry] + end + end + + Users --> LB + Users --> CDN + LB --> Ingress + Ingress --> API + Ingress --> SPA + CDN --> SPA + API --> Mongo + API --> Redis + API --> Secrets + GKE --> Registry +``` + +## Draw.io / Lucidchart Layout Guide + +### Top Row (External) +- **Users** → Internet + +### Second Row (Edge) +- **Cloud Load Balancer** (HTTPS) +- **Cloud CDN** (for SPA static assets) + +### Third Row (GKE) +- **GKE Cluster** (Private) + - Ingress Controller + - Backend (Flask) — 2–3 replicas, HPA + - Frontend (React SPA) + - Redis (Memorystore) + +### Fourth Row (Data) +- **MongoDB Atlas** (Replica Set, private endpoint) +- **Secret Manager** +- **Artifact Registry** + +### Connections +1. Users → LB, Users → CDN +2. LB → Ingress → Backend + Frontend +3. Backend → MongoDB, Redis, Secret Manager +4. GKE → Artifact Registry (image pulls) + +### Colors (suggested) +- External: Light blue +- GCP managed: Light green +- Workloads: Light yellow +- Data: Light orange diff --git a/fleetdm-stack/values.yaml b/fleetdm-stack/values.yaml index 2671005..c88b183 100644 --- a/fleetdm-stack/values.yaml +++ b/fleetdm-stack/values.yaml @@ -12,10 +12,10 @@ mysql: database: fleet # Override for production: --set mysql.auth.password=YOUR_SECURE_PASSWORD password: "fleetdm-local-dev" - image: - # Use bitnamilegacy for compatibility (Bitnami free tier moved) - repository: bitnamilegacy/mysql - tag: "8.0.35-debian-12-r2" + # Optional: use bitnamilegacy/mysql if you hit image pull limits + # image: + # repository: bitnamilegacy/mysql + # tag: "8.0.35-debian-12-r2" primary: persistence: enabled: true @@ -31,9 +31,10 @@ redis: persistence: enabled: true size: 1Gi - image: - repository: bitnamilegacy/redis - tag: "7.2.4-debian-12-r12" + # Optional: use bitnamilegacy/redis if you hit image pull limits + # image: + # repository: bitnamilegacy/redis + # tag: "7.2.4-debian-12-r12" commonConfiguration: | maxmemory 256mb maxmemory-policy allkeys-lru