diff --git a/kubernetes/apps/auth/authelia/app/helmrelease.yaml b/kubernetes/apps/auth/authelia/app/helmrelease.yaml new file mode 100644 index 00000000..1e41a632 --- /dev/null +++ b/kubernetes/apps/auth/authelia/app/helmrelease.yaml @@ -0,0 +1,178 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app authelia +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: lldap + namespace: auth + values: + controllers: + authelia: + replicas: 3 + strategy: RollingUpdate + annotations: + reloader.stakater.com/auto: "true" + initContainers: + init-db: + image: + repository: ghcr.io/onedr0p/postgres-init + tag: 16 + env: + INIT_POSTGRES_HOST: &dbHost postgres16-rw.database.svc.cluster.local + INIT_POSTGRES_DBNAME: &dbName authelia + INIT_POSTGRES_USER: + valueFrom: + secretKeyRef: + name: authelia-secret + key: AUTHELIA_STORAGE_POSTGRES_USERNAME + INIT_POSTGRES_PASS: + valueFrom: + secretKeyRef: + name: authelia-secret + key: AUTHELIA_STORAGE_POSTGRES_PASSWORD + INIT_POSTGRES_SUPER_PASS: "${POSTGRES_SUPER_PASSWORD}" + containers: + app: + image: + repository: ghcr.io/authelia/authelia + tag: 4.38.8@sha256:19375b10024caeef4e0b119a6247beae84cbaa02c846cfd750e92dea910d4b6a + env: + AUTHELIA_THEME: dark + AUTHELIA_SERVER_ADDRESS: tcp://0.0.0.0:80 + AUTHELIA_SERVER_DISABLE_HEALTHCHECK: "true" + AUTHELIA_TELEMETRY_METRICS_ADDRESS: tcp://0.0.0.0:8080 + AUTHELIA_TELEMETRY_METRICS_ENABLED: "true" + AUTHELIA_SESSION_REDIS_HOST: dragonfly.database.svc.cluster.local + AUTHELIA_SESSION_REDIS_PORT: 6379 + AUTHELIA_SESSION_REDIS_DATABASE_INDEX: 2 + AUTHELIA_STORAGE_POSTGRES_DATABASE: *dbName + AUTHELIA_STORAGE_POSTGRES_ADDRESS: *dbHost + AUTHELIA_NOTIFIER_DISABLE_STARTUP_CHECK: "true" + AUTHELIA_NOTIFIER_SMTP_HOST: maddy.comms.svc.cluster.local + AUTHELIA_NOTIFIER_SMTP_PORT: 25 + AUTHELIA_NOTIFIER_SMTP_SENDER: "Authelia <${SMTP_USER}>" + AUTHELIA_NOTIFIER_SMTP_DISABLE_REQUIRE_TLS: "true" + AUTHELIA_AUTHENTICATION_BACKEND_PASSWORD_RESET_DISABLE: "true" + AUTHELIA_AUTHENTICATION_BACKEND_REFRESH_INTERVAL: 1m + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_IMPLEMENTATION: custom + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_URL: ldap://lldap.auth.svc.cluster.local:3890 + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_TIMEOUT: 5s + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_START_TLS: "false" + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_BASE_DN: dc=home,dc=arpa + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_USERNAME_ATTRIBUTE: uid + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_ADDITIONAL_USERS_DN: ou=people + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_USERS_FILTER: "(&({username_attribute}={input})(objectClass=person))" + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_ADDITIONAL_GROUPS_DN: ou=groups + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_GROUPS_FILTER: (member={dn}) + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_GROUP_NAME_ATTRIBUTE: cn + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_DISPLAY_NAME_ATTRIBUTE: displayName + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_MAIL_ATTRIBUTE: mail + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_USER: cn=admin,ou=people,dc=home,dc=arpa + AUTHELIA_ACCESS_CONTROL_DEFAULT_POLICY: one_factor + AUTHELIA_TOTP_DISABLE: false + AUTHELIA_TOTP_ISSUER: authelia.com + DOMAIN: "${PUBLIC_DOMAIN}" + GRAFANA_OAUTH_CLIENT_SECRET: "${GRAFANA_OAUTH_CLIENT_SECRET}" + CLUSTER_CIDR: "${CLUSTER_CIDR}" + NODE_CIDR: "${NODE_CIDR}" + HOME_CIDR: "${HOME_CIDR}" + envFrom: + - secretRef: + name: authelia-secret + probes: + liveness: &probes + enabled: true + custom: true + spec: + httpGet: + path: /api/health + port: &port 80 + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + readiness: *probes + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + resources: + requests: + cpu: 10m + limits: + memory: 128Mi + defaultPodOptions: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + seccompProfile: { type: RuntimeDefault } + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app.kubernetes.io/name: *app + service: + app: + controller: authelia + ports: + http: + port: *port + metrics: + port: 8080 + serviceMonitor: + app: + serviceName: authelia + endpoints: + - port: metrics + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s + ingress: + app: + className: external + annotations: + external-dns.alpha.kubernetes.io/target: "external.${PUBLIC_DOMAIN}" + nginx.ingress.kubernetes.io/configuration-snippet: | + add_header Cache-Control "no-store"; + add_header Pragma "no-cache"; + add_header X-Frame-Options "SAMEORIGIN"; + add_header X-XSS-Protection "1; mode=block"; + hosts: + - host: "auth.${PUBLIC_DOMAIN}" + paths: + - path: / + service: + identifier: app + port: http + persistence: + config: + type: configMap + name: authelia-configmap + globalMounts: + - path: /config/configuration.yaml + subPath: configuration.yaml + readOnly: true diff --git a/kubernetes/apps/auth/authelia/app/kustomization.yaml b/kubernetes/apps/auth/authelia/app/kustomization.yaml new file mode 100644 index 00000000..c49aa5a7 --- /dev/null +++ b/kubernetes/apps/auth/authelia/app/kustomization.yaml @@ -0,0 +1,14 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml + - ../../../../templates/gatus/external +configMapGenerator: + - name: authelia-configmap + files: + - configuration.yaml=./resources/configuration.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/kubernetes/apps/auth/authelia/app/resources/configuration.yaml b/kubernetes/apps/auth/authelia/app/resources/configuration.yaml new file mode 100644 index 00000000..1106d848 --- /dev/null +++ b/kubernetes/apps/auth/authelia/app/resources/configuration.yaml @@ -0,0 +1,35 @@ +--- +# Note: Authelia vars should be escaped with $${VAR_NAME} to avoid interpolation by Flux +session: + same_site: lax + inactivity: 5m + expiration: 1h + remember_me: 1M + cookies: + - name: $${DOMAIN}_session + domain: $${DOMAIN} + authelia_url: https://auth.$${DOMAIN} + default_redirection_url: https://$${DOMAIN} + +access_control: + default_policy: &policy two_factor + networks: + - name: internal + networks: ["$${CLUSTER_CIDR}", "$${NODE_CIDR}", "$${HOME_CIDR}"] + rules: [] + +identity_providers: + oidc: + cors: + endpoints: ["authorization", "token", "revocation", "introspection"] + allowed_origins_from_client_redirect_uris: true + clients: + - client_name: Grafana + client_id: grafana + client_secret: "$${GRAFANA_OAUTH_CLIENT_SECRET}" + public: false + authorization_policy: *policy + pre_configured_consent_duration: 1y + scopes: ["openid", "profile", "groups", "email"] + redirect_uris: ["https://grafana.$${DOMAIN}/login/generic_oauth"] + userinfo_signed_response_alg: none diff --git a/kubernetes/apps/auth/authelia/app/secret.sops.yaml b/kubernetes/apps/auth/authelia/app/secret.sops.yaml new file mode 100644 index 00000000..d28851dc --- /dev/null +++ b/kubernetes/apps/auth/authelia/app/secret.sops.yaml @@ -0,0 +1,32 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: authelia-secret +type: Opaque +stringData: + AUTHELIA_JWT_SECRET: ENC[AES256_GCM,data:mfEvmQht1KPeTg1Jf0724SKiwPRAMn6EAajgICKMZQT22iKELI2NW3iDXkaLpKccjgW2aNvCdL8V0YUdZGJiTxYUqNqeR0+TjmTbrKCJvOUYXJvmBPeim9KueiGRXlgijh99x0nukPT31K0hoWTn9VmS9Fn9sFxRhbh6votAd88=,iv:oQQnrrMDMm2jsd2gapIoYSRtqqCyy1CVP5FxreyPWhE=,tag:71ueJgUCWef9iMJtJbitRQ==,type:str] + AUTHELIA_SESSION_SECRET: ENC[AES256_GCM,data:6DHH1g3wDGmWucN6dqMhhptBA6LvwAfyMShQ6VPE81cx6LTSAM1wcIDlGsZySU3zbD4myyHTZ+cGMJ3dNk1PhYPIiABTFnVMAdgh/TDyfBTlYbjttwS6VV0VwzjP6ghPQE/qnUco22EAv2hwrvd+nY6mFtkonrjabUHfQjK1RXc=,iv:teI7BvRbHVuqiwFVfPBCqooFezHybYsjEpLi5CacMPI=,tag:/TGjfIbUt+ZFFqpQ4jQQgw==,type:str] + AUTHELIA_STORAGE_ENCRYPTION_KEY: ENC[AES256_GCM,data:K1AQUYYY3RwLC4SE+Ej6fzanrCyL46tyXqPsfJO/S8B8AEM7pr3rt4S/9acXG6deE8vCy/K/rIxa+yRmrM3WzLaeY0U9taUQhXwt2FWmw8FlFjQ18NUMzPcHcJr0CPB1G5gONjo3EON4BTfq6Wngah2/ZeetK/Wy8EHR3zUFnHc=,iv:cphoP0v+uFKg/txW0TK5H8s/+djhvcSsTbh2BQulEr4=,tag:DC1RUdu0qvCt8VrETm3hMg==,type:str] + AUTHELIA_STORAGE_POSTGRES_USERNAME: ENC[AES256_GCM,data:HhiaqD/1ulM=,iv:MHcAroZJhJRkotBAIIPHhRZ+oZ9uRpTENmERhPMPjiA=,tag:YKztQGqukVQ71TDY/nBlVg==,type:str] + AUTHELIA_STORAGE_POSTGRES_PASSWORD: ENC[AES256_GCM,data:s2n6nMmQrK/pl4Wjvdu4PYh97piEkUnue/VXx2iMRrI=,iv:N5b4u0I2u/cjmz3z3uf6JaVzLfDN94gJhAffiTauVXU=,tag:SCEh9V+ZuPExxHJTtFmMDg==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBSTmYvQlRVMFh4WGtCRmVh + TERzWElNcHdkSWppY09zK2JYbkt6clE2MDB3CnI2NzRBT1NCU0xlVUtnRUwxRGxs + b0s4cmlOb3MwdW1aU0wzK003c0piczQKLS0tIFlvaUtsZXFBM0xxWEw4d0t6NTk2 + OW9QYktDNkJuaDdVY2ZZcWFTZ3VPNTgKpTbeUIMemVJs6hcGMZMA+UVHS1+pYa28 + lLBER3QQ86PIPit8iwmppSQjwjOBP1KjMXj34ejtUyUOH5Ksudjzpw== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-02T05:00:37Z" + mac: ENC[AES256_GCM,data:ELct7B7/lYZV8srh81GRK2lnk+mvcWSU2x+0YasYOuFCNTElC9j0nTPL4wYgiPxbOEsTvJvIWb4FoOhvM69k6LKuZvBJz3y+Ej6txYhjk2V+//XP5sLwGs5BtaR6goOK/AfLoqjCO5T5lcZ7MtEni3LjkABFBBfJDsK7KFN8EQI=,iv:4dyqCuMx58Jr3U+S+Z9XAp2Y1LP/XA5aT6vqWI3zRhY=,tag:52y5ynWF/BDh2h/JPmfuWA==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/auth/authelia/ks.yaml b/kubernetes/apps/auth/authelia/ks.yaml new file mode 100644 index 00000000..8f2fc42f --- /dev/null +++ b/kubernetes/apps/auth/authelia/ks.yaml @@ -0,0 +1,28 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app authelia + namespace: flux-system +spec: + targetNamespace: auth + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg-cluster + - name: dragonfly-cluster + path: ./kubernetes/apps/auth/authelia/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m + postBuild: + substitute: + APP: *app + GATUS_SUBDOMAIN: auth diff --git a/kubernetes/apps/auth/kustomization.yaml b/kubernetes/apps/auth/kustomization.yaml new file mode 100644 index 00000000..2a325b3d --- /dev/null +++ b/kubernetes/apps/auth/kustomization.yaml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./namespace.yaml + - ./authelia/ks.yaml + - ./lldap/ks.yaml diff --git a/kubernetes/apps/auth/lldap/app/helmrelease.yaml b/kubernetes/apps/auth/lldap/app/helmrelease.yaml new file mode 100644 index 00000000..09a00779 --- /dev/null +++ b/kubernetes/apps/auth/lldap/app/helmrelease.yaml @@ -0,0 +1,90 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: lldap +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + lldap: + replicas: 3 + annotations: + reloader.stakater.com/auto: "true" + initContainers: + init-db: + image: + repository: ghcr.io/onedr0p/postgres-init + tag: 16 + env: + INIT_POSTGRES_HOST: postgres16-rw.database.svc.cluster.local + INIT_POSTGRES_DBNAME: lldap + INIT_POSTGRES_USER: + valueFrom: + secretKeyRef: + name: lldap-secret + key: POSTGRES_USER + INIT_POSTGRES_PASS: + valueFrom: + secretKeyRef: + name: lldap-secret + key: POSTGRES_PASSWORD + INIT_POSTGRES_SUPER_PASS: "${POSTGRES_SUPER_PASSWORD}" + containers: + app: + image: + repository: ghcr.io/nitnelave/lldap + tag: 2024-04-24-alpine + env: + TZ: "${TIMEZONE}" + UID: "${SECURITY_CONTEXT_RUN_AS_USER}" + GID: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + LLDAP_LDAP_BASE_DN: "dc=home,dc=arpa" + envFrom: + - secretRef: + name: lldap-secret + service: + app: + controller: lldap + ports: + http: + port: 17170 + ldap: + enabled: true + port: 3890 + ingress: + app: + enabled: true + className: internal + hosts: + - host: &host "ldap.${PUBLIC_DOMAIN}" + paths: + - path: / + service: + identifier: app + port: http + tls: + - hosts: + - *host + resources: + requests: + cpu: 10m + memory: 100Mi + limits: + memory: 500Mi diff --git a/kubernetes/apps/auth/lldap/app/kustomization.yaml b/kubernetes/apps/auth/lldap/app/kustomization.yaml new file mode 100644 index 00000000..95bf4747 --- /dev/null +++ b/kubernetes/apps/auth/lldap/app/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/auth/lldap/app/secret.sops.yaml b/kubernetes/apps/auth/lldap/app/secret.sops.yaml new file mode 100644 index 00000000..bdde7280 --- /dev/null +++ b/kubernetes/apps/auth/lldap/app/secret.sops.yaml @@ -0,0 +1,32 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: lldap-secret +type: Opaque +stringData: + POSTGRES_USER: ENC[AES256_GCM,data:Z05rU70=,iv:BvRRNDEdEWLDXZXP1bibIbJh4J5wPUkMnmoQYIMpe5s=,tag:JmpN7UoG84lceZLe3vRbVA==,type:str] + POSTGRES_PASWORD: ENC[AES256_GCM,data:ZI/BuKx72V1K5kgOXTgxmVLvhLuaRiamyS9VBknZJqQ=,iv:sMMgqDoPaNu7jEwgkza4KOdIyUVNNH6YkWA/zZQuogM=,tag:9rYomTf7bc3zobpOFg5N4w==,type:str] + LLDAP_LDAP_USER_PASS: ENC[AES256_GCM,data:FJCBy4/s7FKOp5dN8qNrV1ahgQ11gD1wBLb9eQ7ijSs=,iv:6qvAIKxlezfn34iZOQy2GFnLTmsmPjmJ77qfjROAShA=,tag:do45SdaUq7ycPUor+f+giA==,type:str] + LLDAP_JWT_SECRET: ENC[AES256_GCM,data:XPOo7pfzxO9JzKkHNUAqSgWkskrFxZT8IlzJ1W0P6i0=,iv:fc43uYAUWk705J8uFHxY8ejM63SVkGOgE7I+V7dGJPY=,tag:lxbefhwYPsWxqP1E5sBFNg==,type:str] + LLDAP_DATABASE_URL: ENC[AES256_GCM,data:tqLHdEa5E4e/kqHb5vY9ehXEGo1lgagX5m8IGosg8xMS3I6u7INQ4sdCSetOIhJjcGQH46/sFDr7ElrvDCH+CyiMpMnmMJg09nIzIeZekCt+LMGeV9sbwFfr8Pc=,iv:jHkaHASca+LWCMz9Srcc6+vbZomA7hH5MaL4plAnddE=,tag:dd/e39y6yG3CkRql+fx93g==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBPcnBzajlSRytaZHFseDRx + d2ZiRHJzWW1SUWNpT3p2VWhMQlJ3dlNnOVJBCjVVUTBjc3pCTWNsMjR5NTdRTDBD + aUpJc1lMOHcvK2ZRL2FkSDFwOERyUjgKLS0tIFIxdkQwTktjemtVbjRBakJFNEVM + ejV2NmNaUmFQYUgrRkY3L2lMZTVSRjgK7a5ejlVRba0fB674z5NGqRl6UPAppjnv + 3GtQNhuPhfl1pfOgyru+xUDrCjo0QybMk+aAAaVtjzuJIApNAh0Pcw== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-02T04:03:52Z" + mac: ENC[AES256_GCM,data:qthQB6wmBJ0n9J0i6ThPQU6yFBfbKwzZU2wPZBwmDnA54Mmx9pmVcPi9v/6gHUHSfsZ8eSxazwOxAf2+WKnrJNzPwwrBVgr62/N+Suw2PqDMHTwhEyKDFlNSXTLyV2lXBCdoumbtLE34ZstTnFebe8K2Ec/DRmdl1E6yO7RYzyo=,iv:zCskGvS4xY9JbwxzTr19d1u1jNjf9Yq1evD/qJQUzH0=,tag:5pBdQlpDYUFiXGClzHZvlQ==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/auth/lldap/ks.yaml b/kubernetes/apps/auth/lldap/ks.yaml new file mode 100644 index 00000000..58fe5a6a --- /dev/null +++ b/kubernetes/apps/auth/lldap/ks.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app lldap + namespace: flux-system +spec: + targetNamespace: auth + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg-cluster + path: ./kubernetes/apps/auth/lldap/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/auth/namespace.yaml b/kubernetes/apps/auth/namespace.yaml new file mode 100644 index 00000000..14c7f8bd --- /dev/null +++ b/kubernetes/apps/auth/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: auth + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/kubernetes/apps/comms/maddy/app/helmrelease.yaml b/kubernetes/apps/comms/maddy/app/helmrelease.yaml index 1feb6702..8cf185db 100644 --- a/kubernetes/apps/comms/maddy/app/helmrelease.yaml +++ b/kubernetes/apps/comms/maddy/app/helmrelease.yaml @@ -7,12 +7,12 @@ # maddy.comms.svc.cluster.local:2525 --- # yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json -apiVersion: helm.toolkit.fluxcd.io/v2beta1 +apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: name: &app maddy spec: - interval: 15m + interval: 30m chart: spec: chart: app-template diff --git a/kubernetes/apps/comms/maddy/app/secret.sops.yaml b/kubernetes/apps/comms/maddy/app/secret.sops.yaml index 9f64ae3c..becb773d 100644 --- a/kubernetes/apps/comms/maddy/app/secret.sops.yaml +++ b/kubernetes/apps/comms/maddy/app/secret.sops.yaml @@ -4,12 +4,11 @@ apiVersion: v1 type: Opaque metadata: name: maddy - namespace: comms stringData: - SMTP_USER: ENC[AES256_GCM,data:mKhr+aXO2Pz4u/WGvBnuFMcqRlU1RU4=,iv:XihqG/QZf1PJA96weieMvLO1+TnSava09TGLZ5SGZhI=,tag:pkguyu9vsQdP4v6JYKIILg==,type:str] - SMTP_PASSWORD: ENC[AES256_GCM,data:+2yhdJqeozv9Y+NiRrS/ow==,iv:Yvq0fbmWrQy7E9H0OsWAOh9nAhrtyZsvJZlU8+OCZgQ=,tag:Hp2zUgR2MMsKd+7YOH72tw==,type:str] - SMTP_HOST: ENC[AES256_GCM,data:VsqIcdNEtmMZLkzkzTk=,iv:n7wzd41Lo8tPyLahU4hQEYFFagnzJGH9aSpHd3kCp8g=,tag:AkJV8zR2neJhnFytrBwjJw==,type:str] - SMTP_DOMAIN: ENC[AES256_GCM,data:VKXPtS0neYZD,iv:bZG7rDbgUsvqbdYBQm+babiq8wRdHYl1fNXShqmMUws=,tag:1POpQxLou934tF+89/G0Sg==,type:str] + SMTP_USER: ENC[AES256_GCM,data:RLn0zuHL2qsefcgEkqdQtrQ9u8SJ2lg=,iv:DtsVr6EuEIntPQaqzFylWWFle0iyUVCtaKgvLBr+sm4=,tag:JWHZU2LZYqaVgzsYtZmWgw==,type:str] + SMTP_PASSWORD: ENC[AES256_GCM,data:lNmXzdvvIlr5hdAc2vSM2w==,iv:o29RLZuWbBpKhq4gSbPQMLAIyiGouurWSOx62n7UGQo=,tag:OlOLYRcmZTS7+kI5KK17Ug==,type:str] + SMTP_HOST: ENC[AES256_GCM,data:dQl2rDi9A3T9Io4LDvs=,iv:B4uaohvDES+nfsIdFcQ5yC1m5roL+7fMsrxgvIRMX2Q=,tag:NSxAC0HB4fPD3qlJrlqVnw==,type:str] + SMTP_DOMAIN: ENC[AES256_GCM,data:T1/5bVqk8/JJ,iv:manI07B6aTuPtguWLqf7fgfmJAvABMcLf5ryN/tCV+w=,tag:6QQciJUdVQCRUQMHtJtzqw==,type:str] sops: kms: [] gcp_kms: [] @@ -19,14 +18,14 @@ sops: - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBhZStMQUxmTXdhWFVJZE1I - SGUvMU9ocjlHbm9nRFMxOWJoNVBCbTNpUGpBCkVRbkRIcjNacDRwMlF4RnExRGhz - ZGRmbVdaRlpsMkdNL1RlVVVXTUZ0VGcKLS0tIEswc0NFT1NaQWFlRERiaVFlSmhE - R2hYNmxtaWhVKyt2SnNIczRVeGZWWG8KzK19LnCQXAZkoe+ziWsg9Cq6UlDON38I - HJ+YWA3A9YJ1M7R+WHBheVxVol50qxZz2tFhLH6cIRNBaFmRo7t21g== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBDQ3NtU01hVG5rOTNrTW1C + T1VybFR3SFV5ajNCNGo1TGY2VUhnMitrK1VRClZBRGpLSzJOd2k4OUVZcktKK2xM + a0xmMGhPd2tiNzZzT3Bpa0dYcVFoNWMKLS0tIE9ady9sR0w4azlNRFV0dThhVndW + QTNDeVhqZW8xY0c3QmZHSVBPdDdRRXcKV0FVcEeLhPq+s5iJXT4SM4M36THwrPGT + POgufdqSLb2QnbY4NqV6oMN0ZbmBntr7WTv28jwa+C2jY2CodXnMhg== -----END AGE ENCRYPTED FILE----- - lastmodified: "2022-11-14T05:21:32Z" - mac: ENC[AES256_GCM,data:A5djXW0qHN9V4i5e0X/e1sSAQ8j9EOU8AXpJ1Lj8/Y+bApEhAE7lOS/Hypl5ZiMjltfBDn2CKw2Am2BDXEhuAz4ifpUhKZ9kaYtCB7gjntdlBUKyfJKSO6WY2SLOfga5FC/k+nQ6uOKWfr0YtuGH15v6tNHYYYeWoonQKTdtOdc=,iv:DdaHFnyGGTW2Wh3J4f3AFtSPfA2gBzZjmukhkvzCs3U=,tag:vo8LojTLvqsJQd8kaSbW7w==,type:str] + lastmodified: "2024-05-02T05:28:31Z" + mac: ENC[AES256_GCM,data:eKnnw7eJTIAzMQqOk7PMSREPDA2EukJLDazy9zQgtgNr3O4F9Qcqk6PYpxu4oTziHJlrpCHcczvh5F89lOIl0Y9/d2lsBwFhed2XJAgMBduXKT797AzPDLT8ZAh7iSNaDQHLaZSNScNizQ09NR/oI7oEqD7O0m3wUopz9HcKbio=,iv:bO0swb8TulQLkTNHR2KmdRQdRrBTZLhTYUfHt++khB8=,tag:2xjWS7kOqe2vhXQUle0BDA==,type:str] pgp: [] encrypted_regex: ^(data|stringData)$ - version: 3.7.3 + version: 3.8.1 diff --git a/kubernetes/apps/comms/ntfy/app/helmrelease.yaml b/kubernetes/apps/comms/ntfy/app/helmrelease.yaml index 4535c47a..81d80422 100644 --- a/kubernetes/apps/comms/ntfy/app/helmrelease.yaml +++ b/kubernetes/apps/comms/ntfy/app/helmrelease.yaml @@ -1,95 +1,79 @@ -# curl -u user:password -d "Hello, from the CLI" "https://ntfy.example.com/test" --- -apiVersion: helm.toolkit.fluxcd.io/v2beta1 +apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: name: ntfy - namespace: comms spec: - interval: 15m + interval: 30m chart: spec: chart: app-template - version: 1.5.1 + version: 3.1.0 sourceRef: kind: HelmRepository name: bjw-s namespace: flux-system - interval: 15m - maxHistory: 2 install: - createNamespace: true remediation: retries: 3 upgrade: cleanupOnFail: true remediation: retries: 3 - uninstall: - keepHistory: false - dependsOn: - - name: kyverno - namespace: kyverno - - name: ingress-nginx - namespace: network values: - image: - repository: docker.io/binwiederhier/ntfy - tag: latest - env: - TZ: "${TIMEZONE}" - # https://ntfy.sh/docs/config/#ios-instant-notifications - NTFY_UPSTREAM_BASE_URL: https://ntfy.sh - NTFY_BEHIND_PROXY: true - NTFY_BASE_URL: "https://ntfy.${PUBLIC_DOMAIN}" - # https://ntfy.sh/docs/config/#e-mail-notifications - # Requires user and pass right now... - # NTFY_SMTP_SENDER_ADDR: maddy.comms.svc.cluster.local:2525 - # NTFY_SMTP_SENDER_FROM: "${SMTP_USER}" - # https://ntfy.sh/docs/config/#e-mail-publishing - # Doesn't support auth: https://github.com/binwiederhier/ntfy/issues/420 - # DNS: MX ntfy CNAME - # NTFY_SMTP_SERVER_LISTEN: ":25" - # NTFY_SMTP_SERVER_DOMAIN: "ntfy.${PUBLIC_DOMAIN}" - # NTFY_SMTP_SERVER_ADDR_PREFIX: "ntfy+" - # https://ntfy.sh/docs/config/#example-private-instance - # If not ingress basic auth, then use: - # NTFY_AUTH_DEFAULT_ACCESS: deny-all - # NTFY_AUTH_FILE: /var/lib/ntfy/user.db - args: [serve] - persistence: - data: - enabled: true - existingClaim: appdata - subPath: - # - path: ntfy/config - # mountPath: /var/lib/ntfy - - path: ntfy/cache - mountPath: /var/cache/ntfy + controllers: + ntfy: + strategy: RollingUpdate + containers: + app: + image: + repository: docker.io/binwiederhier/ntfy + tag: latest + env: + TZ: "${TIMEZONE}" + # https://docs.ntfy.sh/config/#config-options + NTFY_UPSTREAM_BASE_URL: https://ntfy.sh + NTFY_BEHIND_PROXY: true + NTFY_BASE_URL: "https://ntfy.${PUBLIC_DOMAIN}" + NTFY_SMTP_SENDER_ADDR: maddy.comms.svc.cluster.local:25 + NTFY_SMTP_SENDER_FROM: "${SMTP_USER}" + NTFY_AUTH_DEFAULT_ACCESS: read-only + # NTFY_AUTH_FILE: /var/lib/ntfy/auth.db + NTFY_CACHE_FILE: /var/cache/ntfy/cache.db + NTFY_ATTACHMENT_CACHE_DIR: /var/cache/ntfy/attachments + NTFY_ENABLE_LOGIN: true + NTFY_ENABLE_SIGNUP: true + args: [serve] service: - main: + app: + controller: ntfy ports: http: port: 80 - # smtp: - # enabled: true - # ports: - # smtp: - # enabled: true - # port: 25 ingress: - main: + app: enabled: true - ingressClassName: nginx + className: external annotations: - external-dns.home.arpa/enabled: "true" - nginx.ingress.kubernetes.io/auth-type: basic - nginx.ingress.kubernetes.io/auth-secret: ntfy + external-dns.alpha.kubernetes.io/target: "external.${PUBLIC_DOMAIN}" hosts: - host: &host "ntfy.${PUBLIC_DOMAIN}" paths: - path: / - pathType: Prefix + service: + identifier: app + port: http tls: - hosts: - *host + persistance: + cache: + type: pvc + mountPath: "${CLUSTER_HOST_STORAGE_DIR}/ntfy" + storageClass: openebs-hostpath + config: + type: hostPath + hostPath: "${CLUSTER_HOST_STORAGE_DIR}/minio" + hostPathType: Directory + globalMounts: + - path: /data diff --git a/kubernetes/apps/comms/ntfy/app/kustomization.yaml b/kubernetes/apps/comms/ntfy/app/kustomization.yaml index 6e4891d2..95bf4747 100644 --- a/kubernetes/apps/comms/ntfy/app/kustomization.yaml +++ b/kubernetes/apps/comms/ntfy/app/kustomization.yaml @@ -2,5 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - secret.sops.yaml - - helmrelease.yaml + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/comms/ntfy/app/secret.sops.yaml b/kubernetes/apps/comms/ntfy/app/secret.sops.yaml index de5d71c0..317e529c 100644 --- a/kubernetes/apps/comms/ntfy/app/secret.sops.yaml +++ b/kubernetes/apps/comms/ntfy/app/secret.sops.yaml @@ -6,7 +6,7 @@ metadata: name: ntfy namespace: comms stringData: - auth: ENC[AES256_GCM,data:z7br0Ta6kPEhNYYwq10Sq3gDvHv98PYZEID7pcpWGe1rCu56vwdavaX6wX6QNqSMhLCG6jUBY1AlRi+dCPhc5/tJoSgGFA==,iv:BabtNW2wljZQqvEaoxN+ZwsMXMjow/z8LEMRljC16wQ=,tag:hXnVsjoMN3/XVLVXrLxzPA==,type:str] + auth: ENC[AES256_GCM,data:ZP2fhxUwh7Fvi3Dmhtkop8TbIp1ryNueW73OJqpaSlgEI8MdpS/vLf8YS/dBr7SeDTjWpFvYxgYVAD1Tt+aYArpWJNEakQ==,iv:4Xjc3f0naxZPNpXakxXpcf6Tt6V6dBIJB8NJuS+AAZ0=,tag:4ZXflsmoGEKCQ71JMt3cyQ==,type:str] sops: kms: [] gcp_kms: [] @@ -16,14 +16,14 @@ sops: - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBvKzNlK1JFTHFsWVJWL1ZL - Z3N6Z2N0Q3BiK043dlJMSzR6NnUrcXZtekhzCk9NVGJsaXAybkw5a2huMTZKVDlq - U3YxYkZKZVFzK0ttNWVWbFVRYkh3cHcKLS0tIEcwcVIvK2dSYlQwRm8xQU9Jekp6 - WlovODEzQUNBbGhUSWF3VTNveXNTZnMKcqpSKdZAAELozoRyuKt2rboF2R0tg2fR - 3j73A2X1bjTCQlG0ZTa25rWq+CsDGYYUOOvVVUWh6QwojVqS9yt2fA== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBPd01xYUE1Q3B1N29MNFBC + MmVzTmYrL1Jmc1lBeDQ3QkEzcFA4QUlJOWlVCnpnSFdaTG1vOUNidW1XWmRhaWlS + SXo1cU5za3gzeThUbm5DbXNTeklIN2cKLS0tIG9yeFRMc05IZUFiU0tQaG83c1JQ + c0paMkpLenYrNlViMGptVTNZdVg4R0kK8jMUb0oLWxhZ5/Cd8bHjFJk/OnpYzyGO + x2QnTjB6wxA09U+G9iIrrkDlkKZ+a4RM5qO5f57IfWRToJXJJ4i6pg== -----END AGE ENCRYPTED FILE----- - lastmodified: "2022-11-14T05:21:50Z" - mac: ENC[AES256_GCM,data:ViFOZOYCM7/YAwIVChP6K0Nz/R55sHQF5m6nSg51+mnKayTBdeLcCDXfMJqwvtNB0t7zbepY1w5v/FiZDn3fU6RdcssmX3l1LN+K+Pcfsd4XVNZaRmP3W8+r0aAoRjOrw7DULYvAqhi86LtRNDxbevvNQAboB3SqraVm99myWVA=,iv:laakPSqHrfrVghzmuCdUtIqlrqqPxuxxxXHyTjR+urs=,tag:MkncoDyQTsFLB6/nrDrc6Q==,type:str] + lastmodified: "2024-05-01T16:53:44Z" + mac: ENC[AES256_GCM,data:1OPdL2YrQSBdaFZISVQ6E1h1COQh5qdSsoE8TymFFH/dNHZI5Hv9Ieh9HwnPaE/eJAOXsRDo3LOloh78WpEN/fFJ+XJm1jOqGD9Epgj+ZrDcTISrqt/WQvCVkmp3v6mx+qS+KNnZwxdOZjmXUi1M2FLIodWQSCNtfJdaQyJsb8c=,iv:pn4FwVkecsGxtvHsNrP8vMfwtWm5xXHY1ovNyVbwXOA=,tag:RW9bxk9a/I9v6dqRkyHC0g==,type:str] pgp: [] encrypted_regex: ^(data|stringData)$ - version: 3.7.3 + version: 3.8.1 diff --git a/kubernetes/apps/comms/ntfy/ks.yaml b/kubernetes/apps/comms/ntfy/ks.yaml index 17aec64f..36eca934 100644 --- a/kubernetes/apps/comms/ntfy/ks.yaml +++ b/kubernetes/apps/comms/ntfy/ks.yaml @@ -2,15 +2,19 @@ apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: - name: apps-ntfy + name: &app ntfy namespace: flux-system spec: + targetNamespace: comms + commonMetadata: + labels: + app.kubernetes.io/name: *app path: ./kubernetes/apps/comms/ntfy/app prune: true sourceRef: kind: GitRepository - name: homelab - wait: false # no flux ks dependents + name: home-kubernetes + wait: false interval: 30m retryInterval: 1m timeout: 5m diff --git a/kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml b/kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml new file mode 100644 index 00000000..f49273d1 --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/app/helmrelease.yaml @@ -0,0 +1,34 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: cloudnative-pg +spec: + interval: 30m + chart: + spec: + chart: cloudnative-pg + version: 0.21.2 + sourceRef: + kind: HelmRepository + name: cloudnative-pg + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: openebs + namespace: openebs-system + values: + crds: + create: true + monitoring: + podMonitorEnabled: false + grafanaDashboard: + create: true diff --git a/kubernetes/apps/database/cloudnative-pg/app/kustomization.yaml b/kubernetes/apps/database/cloudnative-pg/app/kustomization.yaml new file mode 100644 index 00000000..16a6ce30 --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/database/cloudnative-pg/app/secret.sops.yaml b/kubernetes/apps/database/cloudnative-pg/app/secret.sops.yaml new file mode 100644 index 00000000..3f3a846e --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/app/secret.sops.yaml @@ -0,0 +1,29 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: cloudnative-pg-secret +stringData: + username: ENC[AES256_GCM,data:EQKIUlpVmqA=,iv:lhev8dXH65zFY8h6PovJtzR2kzsKfJRIW/FV88XA/es=,tag:AY2fw5AifsrjjCfjM9RFgg==,type:str] + password: ENC[AES256_GCM,data:t+1x5LDvZ3U2QmPY64DGOsrXYqs1579ZFQmgm/vsWEM=,iv:eyspWDwtPOVGhc6HzcqVA0v9DZVSqQbec7870Tm4ExE=,tag:jHeGr9/opgY6oSM83vL1yQ==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFRUxZWU5saEk4UnNaNmpZ + ajdUbEdGelo1MzJ6c2g5bXBNRjJ1Mk05OEZZCmZaUzl0aDAySlBySi9RMWVIL3Vn + anZXNFJvL1YvdWtlSkYrR21hWHhUTzgKLS0tIHl1VUIxS2gwUHh4dkhiUDU3ZDh2 + Zjk4WjNWM1o5YjBQQ1UwNmM0QjF5ek0KsYj1shqnoZabOWUYBYPCrtybC+0QxOUq + wdFEg1Un0RonCgTgOZ0IsF7fqEzoBNO5Ba7/2cX3p1M5NCsnbW61LQ== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-02T03:18:02Z" + mac: ENC[AES256_GCM,data:WfZPj1GDVqJ64klenNKbjRZW30e2xXtVFbm5M63lZeXJjCEoJ4KMDlHoHktwemZ0VbD6zd6a1tM+jrQXfNMhvcohAtCMldTDI8XC/F0SFF8udjAB0eN9sGnoANejqnmW/0VPOAuDzYJN2BjopUzL5tTSvTYyz3nxJckq23KwcG4=,iv:x/2a6k0VYsug7u4mv/q/VQYOsGHd1gqzSP366IUYcJA=,tag:P9YBGOdaJ3pOkuks3eJHdQ==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml new file mode 100644 index 00000000..f186d75d --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/cluster16.yaml @@ -0,0 +1,65 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/postgresql.cnpg.io/cluster_v1.json +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: postgres16 +spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:16.2-16 + primaryUpdateStrategy: unsupervised + storage: + size: 20Gi + storageClass: openebs-hostpath + superuserSecret: + name: cloudnative-pg-secret + enableSuperuserAccess: true + postgresql: + parameters: + max_connections: "400" + shared_buffers: 256MB + nodeMaintenanceWindow: + inProgress: false + reusePVC: true + resources: + requests: + cpu: 500m + limits: + memory: 4Gi + monitoring: + enablePodMonitor: true + # Ref: https://github.com/cloudnative-pg/cloudnative-pg/issues/2501 + podMonitorMetricRelabelings: + - { sourceLabels: ["cluster"], targetLabel: cnpg_cluster, action: replace } + - { regex: cluster, action: labeldrop } + backup: + retentionPolicy: 30d + barmanObjectStore: &barmanObjectStore + data: + compression: bzip2 + wal: + compression: bzip2 + maxParallel: 8 + destinationPath: s3://cloudnative-pg/ + endpointURL: http://minio.database.svc.cluster.local:9000 + # Note: serverName version needs to be implemented + # when recovering from an existing cnpg cluster + serverName: ¤tCluster postgres16-v6 + s3Credentials: + accessKeyId: + name: minio-secret + key: MINIO_ROOT_USER + secretAccessKey: + name: minio-secret + key: MINIO_ROOT_PASSWORD + # Note: previousCluster needs to be set to the name of the previous + # cluster when recovering from an existing cnpg cluster + bootstrap: + recovery: + source: &previousCluster postgres16-v5 + # Note: externalClusters is needed when recovering from an existing cnpg cluster + externalClusters: + - name: *previousCluster + barmanObjectStore: + <<: *barmanObjectStore + serverName: *previousCluster diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/gatus.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/gatus.yaml new file mode 100644 index 00000000..6aba0f04 --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/gatus.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-gatus + labels: + gatus.io/enabled: "true" +data: + config.yaml: | + endpoints: + - name: postgres + group: infrastructure + url: tcp://postgres16-rw.database.svc.cluster.local:5432 + interval: 1m + ui: + hide-url: true + hide-hostname: true + conditions: + - "[CONNECTED] == true" + alerts: + - type: ntfy diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml new file mode 100644 index 00000000..831641ea --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./cluster16.yaml + - ./gatus.yaml + - ./prometheusrule.yaml + - ./service.yaml diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml new file mode 100644 index 00000000..9c1d6a8d --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/prometheusrule.yaml @@ -0,0 +1,67 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/prometheusrule_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: cloudnative-pg-rules + labels: + prometheus: k8s + role: alert-rules +spec: + groups: + - name: cloudnative-pg.rules + rules: + - alert: LongRunningTransaction + annotations: + description: Pod {{ $labels.pod }} is taking more than 5 minutes (300 seconds) for a query. + summary: A query is taking longer than 5 minutes. + expr: |- + cnpg_backends_max_tx_duration_seconds > 300 + for: 1m + labels: + severity: warning + - alert: BackendsWaiting + annotations: + description: Pod {{ $labels.pod }} has been waiting for longer than 5 minutes + summary: If a backend is waiting for longer than 5 minutes + expr: |- + cnpg_backends_waiting_total > 300 + for: 1m + labels: + severity: warning + - alert: PGDatabase + annotations: + description: Over 150,000,000 transactions from frozen xid on pod {{ $labels.pod }} + summary: Number of transactions from the frozen XID to the current one + expr: |- + cnpg_pg_database_xid_age > 150000000 + for: 1m + labels: + severity: warning + - alert: PGReplication + annotations: + description: Standby is lagging behind by over 300 seconds (5 minutes) + summary: The standby is lagging behind the primary + expr: |- + cnpg_pg_replication_lag > 300 + for: 1m + labels: + severity: warning + - alert: LastFailedArchiveTime + annotations: + description: Archiving failed for {{ $labels.pod }} + summary: Checks the last time archiving failed. Will be < 0 when it has not failed. + expr: |- + (cnpg_pg_stat_archiver_last_failed_time - cnpg_pg_stat_archiver_last_archived_time) > 1 + for: 1m + labels: + severity: warning + - alert: DatabaseDeadlockConflicts + annotations: + description: There are over 10 deadlock conflicts in {{ $labels.pod }} + summary: Checks the number of database conflicts + expr: |- + cnpg_pg_stat_database_deadlocks > 10 + for: 1m + labels: + severity: warning diff --git a/kubernetes/apps/database/cloudnative-pg/cluster/service.yaml b/kubernetes/apps/database/cloudnative-pg/cluster/service.yaml new file mode 100644 index 00000000..e3d7240f --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/cluster/service.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres-lb + annotations: + external-dns.alpha.kubernetes.io/hostname: "postgres.${PUBLIC_DOMAIN}" + io.cilium/lb-ipam-ips: "${LB_POSTGRES_ADDR}" +spec: + type: LoadBalancer + ports: + - name: postgres + port: 5432 + protocol: TCP + targetPort: 5432 + selector: + cnpg.io/cluster: postgres16 + role: primary diff --git a/kubernetes/apps/database/cloudnative-pg/ks.yaml b/kubernetes/apps/database/cloudnative-pg/ks.yaml new file mode 100644 index 00000000..6f1c7031 --- /dev/null +++ b/kubernetes/apps/database/cloudnative-pg/ks.yaml @@ -0,0 +1,44 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app cloudnative-pg + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/database/cloudnative-pg/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: true + interval: 30m + retryInterval: 1m + timeout: 5m +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app cloudnative-pg-cluster + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg + path: ./kubernetes/apps/database/cloudnative-pg/cluster + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: true + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/database/dragonfly/app/helmrelease.yaml b/kubernetes/apps/database/dragonfly/app/helmrelease.yaml new file mode 100644 index 00000000..8bbe1dd4 --- /dev/null +++ b/kubernetes/apps/database/dragonfly/app/helmrelease.yaml @@ -0,0 +1,102 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app dragonfly-operator +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + dragonfly-operator: + strategy: RollingUpdate + containers: + app: + image: + repository: ghcr.io/dragonflydb/operator + tag: v1.1.2@sha256:f0d76725950095ac65b36252e0042d339d1db9b181b1d068f4b6686ea93055e4 + command: ["/manager"] + args: + - --health-probe-bind-address=:8081 + - --metrics-bind-address=:8080 + probes: + liveness: + enabled: true + custom: true + spec: + httpGet: + path: /healthz + port: &port 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + timeoutSeconds: 1 + failureThreshold: 3 + readiness: + enabled: true + custom: true + spec: + httpGet: + path: /readyz + port: *port + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + resources: + requests: + cpu: 10m + limits: + memory: 128Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + defaultPodOptions: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + seccompProfile: { type: RuntimeDefault } + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app.kubernetes.io/name: *app + service: + app: + controller: *app + ports: + http: + port: *port + metrics: + port: 8080 + serviceMonitor: + app: + serviceName: *app + endpoints: + - port: metrics + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s + serviceAccount: + create: true + name: *app diff --git a/kubernetes/apps/database/dragonfly/app/kustomization.yaml b/kubernetes/apps/database/dragonfly/app/kustomization.yaml new file mode 100644 index 00000000..639c55db --- /dev/null +++ b/kubernetes/apps/database/dragonfly/app/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + # renovate: datasource=github-releases depName=dragonflydb/dragonfly-operator + - https://raw.githubusercontent.com/dragonflydb/dragonfly-operator/v1.1.2/manifests/crd.yaml + - ./helmrelease.yaml + - ./rbac.yaml diff --git a/kubernetes/apps/database/dragonfly/app/rbac.yaml b/kubernetes/apps/database/dragonfly/app/rbac.yaml new file mode 100644 index 00000000..6e1e0920 --- /dev/null +++ b/kubernetes/apps/database/dragonfly/app/rbac.yaml @@ -0,0 +1,40 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: dragonfly-operator +rules: + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods", "services"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] + - apiGroups: ["apps"] + resources: ["statefulsets"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] + - apiGroups: ["dragonflydb.io"] + resources: ["dragonflies"] + verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] + - apiGroups: ["dragonflydb.io"] + resources: ["dragonflies/finalizers"] + verbs: ["update"] + - apiGroups: ["dragonflydb.io"] + resources: ["dragonflies/status"] + verbs: ["get", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: dragonfly-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: dragonfly-operator +subjects: + - kind: ServiceAccount + name: dragonfly-operator + namespace: database diff --git a/kubernetes/apps/database/dragonfly/cluster/cluster.yaml b/kubernetes/apps/database/dragonfly/cluster/cluster.yaml new file mode 100644 index 00000000..e06899a8 --- /dev/null +++ b/kubernetes/apps/database/dragonfly/cluster/cluster.yaml @@ -0,0 +1,25 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/dragonflydb.io/dragonfly_v1alpha1.json +apiVersion: dragonflydb.io/v1alpha1 +kind: Dragonfly +metadata: + name: dragonfly +spec: + image: ghcr.io/dragonflydb/dragonfly:v1.17.1 + replicas: 3 + env: + - name: MAX_MEMORY + valueFrom: + resourceFieldRef: + resource: limits.memory + divisor: 1Mi + args: + - --maxmemory=$(MAX_MEMORY)Mi + - --proactor_threads=2 + - --cluster_mode=emulated + - --lock_on_hashtags + resources: + requests: + cpu: 100m + limits: + memory: 512Mi diff --git a/kubernetes/apps/database/dragonfly/cluster/kustomization.yaml b/kubernetes/apps/database/dragonfly/cluster/kustomization.yaml new file mode 100644 index 00000000..6f0f305d --- /dev/null +++ b/kubernetes/apps/database/dragonfly/cluster/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./cluster.yaml + - ./podmonitor.yaml diff --git a/kubernetes/apps/database/dragonfly/cluster/podmonitor.yaml b/kubernetes/apps/database/dragonfly/cluster/podmonitor.yaml new file mode 100644 index 00000000..b26a770d --- /dev/null +++ b/kubernetes/apps/database/dragonfly/cluster/podmonitor.yaml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/monitoring.coreos.com/podmonitor_v1.json +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: dragonfly +spec: + selector: + matchLabels: + app: dragonfly + podTargetLabels: ["app"] + podMetricsEndpoints: + - port: admin diff --git a/kubernetes/apps/database/dragonfly/ks.yaml b/kubernetes/apps/database/dragonfly/ks.yaml new file mode 100644 index 00000000..90e97232 --- /dev/null +++ b/kubernetes/apps/database/dragonfly/ks.yaml @@ -0,0 +1,44 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app dragonfly + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/database/dragonfly/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: true + interval: 30m + retryInterval: 1m + timeout: 5m +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app dragonfly-cluster + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: dragonfly + path: ./kubernetes/apps/database/dragonfly/cluster + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: true + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/database/kustomization.yaml b/kubernetes/apps/database/kustomization.yaml new file mode 100644 index 00000000..1a5d02c4 --- /dev/null +++ b/kubernetes/apps/database/kustomization.yaml @@ -0,0 +1,11 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + # Pre Flux-Kustomizations + - ./namespace.yaml + # Flux-Kustomizations + - ./cloudnative-pg/ks.yaml + - ./dragonfly/ks.yaml + - ./minio/ks.yaml diff --git a/kubernetes/apps/database/minio/app/helmrelease.yaml b/kubernetes/apps/database/minio/app/helmrelease.yaml new file mode 100644 index 00000000..c362c92f --- /dev/null +++ b/kubernetes/apps/database/minio/app/helmrelease.yaml @@ -0,0 +1,117 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: minio +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + minio: + annotations: + reloader.stakater.com/auto: "true" + containers: + app: + image: + repository: quay.io/minio/minio + tag: RELEASE.2024-05-01T01-11-10Z + env: + MINIO_API_CORS_ALLOW_ORIGIN: "https://minio.${PUBLIC_DOMAIN},https://s3.${PUBLIC_DOMAIN}" + MINIO_BROWSER_REDIRECT_URL: "https://minio.${PUBLIC_DOMAIN}" + MINIO_PROMETHEUS_JOB_ID: minio + MINIO_PROMETHEUS_URL: "https://prometheus.${PUBLIC_DOMAIN}" + MINIO_PROMETHEUS_AUTH_TYPE: public + MINIO_SERVER_URL: "https://s3.${PUBLIC_DOMAIN}" + MINIO_UPDATE: "off" + envFrom: + - secretRef: + name: minio-secret + args: ["server", "/data", "--console-address", ":9001"] + probes: + liveness: &probes + enabled: true + custom: true + spec: + httpGet: + path: /minio/health/live + port: 9000 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 6 + readiness: *probes + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + resources: + requests: + cpu: 100m + limits: + memory: 2Gi + defaultPodOptions: + securityContext: + runAsNonRoot: true + runAsUser: 568 + runAsGroup: 568 + fsGroup: 568 + fsGroupChangePolicy: OnRootMismatch + supplementalGroups: [10000] + seccompProfile: { type: RuntimeDefault } + service: + app: + controller: minio + ports: + http: + port: 9001 + s3: + port: 9000 + serviceMonitor: + app: + serviceName: minio + endpoints: + - port: s3 + scheme: http + path: /minio/v2/metrics/cluster + interval: 1m + scrapeTimeout: 10s + ingress: + app: + className: internal + hosts: + - host: "minio.${PUBLIC_DOMAIN}" + paths: + - path: / + service: + identifier: app + port: http + - host: s3.${PUBLIC_DOMAIN} + paths: + - path: / + service: + identifier: app + port: s3 + persistence: + config: + type: hostPath + hostPath: "${CLUSTER_HOST_STORAGE_DIR}/minio" + hostPathType: Directory + globalMounts: + - path: /data diff --git a/kubernetes/apps/database/minio/app/kustomization.yaml b/kubernetes/apps/database/minio/app/kustomization.yaml new file mode 100644 index 00000000..16a6ce30 --- /dev/null +++ b/kubernetes/apps/database/minio/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/database/minio/app/secret.sops.yaml b/kubernetes/apps/database/minio/app/secret.sops.yaml new file mode 100644 index 00000000..c1f038a3 --- /dev/null +++ b/kubernetes/apps/database/minio/app/secret.sops.yaml @@ -0,0 +1,29 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: minio-secret +stringData: + MINIO_ROOT_USER: ENC[AES256_GCM,data:fQx0EMI=,iv:vpY4IXobFCbgeoszLc6vjmlF6ocXr4SZqys92+Irbqo=,tag:mY7LcFDu7Lw8xPZj1AAtrg==,type:str] + MINIO_ROOT_PASSWORD: ENC[AES256_GCM,data:OQsHdyM4Lxpjjn5M9Ckry1kJ8BdxtLsD0tn9gpvlVIU=,iv:0qLMz7OFrMLZhPuC5DAojN4KCrAyBWpCBVbFiUcThKo=,tag:B8VfX/VIonyYl6uzBrfTrA==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBUQVZQbG5BNnhaL3pGNnN0 + TlFzTkM1ZFdQMVZZcU1xdzVmc3kzMzlyTFh3CkZlcFZvUWtDc2VNOGNUcGw0ajRO + T3pMQ2hiYWRKQ0F6SXFaNXdtd2Jma0kKLS0tIHhEOHRGTFhPN2Z2WkZBZE9yODVx + cDB5UXluaGM5N21QK3lxSjBxM2w4UzQKHHBJiu9lyjquAlTqxUdJKpx/I1mfCSdJ + SasUl+jfy66EOKP1um4xVj6EzHhRDRXT2+Zgb160w/ECbIr8f78Oag== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-01T21:31:58Z" + mac: ENC[AES256_GCM,data:wzGYYq0a0WSaiu933bumtniHn5iWCSkJW5IJ4Qd47cY5PHeGgKGEi7KbcPsHgBlFhZltOBXP8WBzmuj+APUdnWaaNSHnP0E3YR5FURSxKDBxpTECrWlMq83PMiyonfhYPtCqcY+Kly8aH7OCtUeO/ehkzKWB2cuuIIgiL2nKR4g=,iv:HvK2/a83mwmzIPtPp/jrGnfvNqmlTBMpzCG8rtcDez4=,tag:l5MeyegXvZats3btE1GyKg==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/database/minio/ks.yaml b/kubernetes/apps/database/minio/ks.yaml new file mode 100644 index 00000000..35165b52 --- /dev/null +++ b/kubernetes/apps/database/minio/ks.yaml @@ -0,0 +1,21 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app minio + namespace: flux-system +spec: + targetNamespace: database + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/database/minio/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/database/namespace.yaml b/kubernetes/apps/database/namespace.yaml new file mode 100644 index 00000000..5cad2860 --- /dev/null +++ b/kubernetes/apps/database/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: database + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/kubernetes/apps/default/busybox/app/helmrelease.yaml b/kubernetes/apps/default/busybox/app/helmrelease.yaml index d2d1fd5d..886110fb 100644 --- a/kubernetes/apps/default/busybox/app/helmrelease.yaml +++ b/kubernetes/apps/default/busybox/app/helmrelease.yaml @@ -1,5 +1,5 @@ --- -apiVersion: helm.toolkit.fluxcd.io/v2beta1 +apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: name: busybox diff --git a/kubernetes/apps/observability/gatus/app/helmrelease.yaml b/kubernetes/apps/observability/gatus/app/helmrelease.yaml new file mode 100644 index 00000000..b403227e --- /dev/null +++ b/kubernetes/apps/observability/gatus/app/helmrelease.yaml @@ -0,0 +1,151 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: gatus +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + gatus: + annotations: + reloader.stakater.com/auto: "true" + initContainers: + init-db: + image: + repository: ghcr.io/onedr0p/postgres-init + tag: 16 + # https://github.com/onedr0p/containers/blob/main/apps/postgres-init/entrypoint.sh + env: + INIT_POSTGRES_HOST: postgres16-rw.database.svc.cluster.local + INIT_POSTGRES_DBNAME: gatus + INIT_POSTGRES_USER: + valueFrom: + secretKeyRef: + name: gatus-secret + key: POSTGRES_USER + INIT_POSTGRES_PASS: + valueFrom: + secretKeyRef: + name: gatus-secret + key: POSTGRES_PASSWORD + INIT_POSTGRES_SUPER_PASS: "${POSTGRES_SUPER_PASSWORD}" + init-config: + dependsOn: init-db + image: + repository: ghcr.io/kiwigrid/k8s-sidecar + tag: 1.26.1 + env: + FOLDER: /config + LABEL: gatus.io/enabled + NAMESPACE: ALL + RESOURCE: both + UNIQUE_FILENAMES: true + METHOD: WATCH + restartPolicy: Always + resources: &resources + requests: + cpu: 10m + limits: + memory: 256Mi + containers: + app: + image: + repository: ghcr.io/twin/gatus + tag: v5.10.0 + env: + TZ: "${TIMEZONE}" + GATUS_CONFIG_PATH: /config + GATUS_DELAY_START_SECONDS: 5 + CUSTOM_WEB_PORT: &port 80 + POSTGRES_HOST: postgres16-rw.database.svc.cluster.local + POSTGRES_DB: gatus + envFrom: + - secretRef: + name: gatus-secret + probes: + liveness: &probes + enabled: true + custom: true + spec: + httpGet: + path: /health + port: *port + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + readiness: *probes + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + resources: *resources + defaultPodOptions: + dnsConfig: + options: + - { name: ndots, value: "1" } + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + fsGroupChangePolicy: OnRootMismatch + seccompProfile: { type: RuntimeDefault } + service: + app: + controller: gatus + ports: + http: + port: *port + serviceMonitor: + app: + serviceName: gatus + endpoints: + - port: http + scheme: http + path: /metrics + interval: 1m + scrapeTimeout: 10s + ingress: + app: + className: external + annotations: + external-dns.alpha.kubernetes.io/target: "external.${PUBLIC_DOMAIN}" + hosts: + - host: "status.${PUBLIC_DOMAIN}" + paths: + - path: / + service: + identifier: app + port: http + serviceAccount: + create: true + name: gatus + persistence: + config: + type: emptyDir + config-file: + type: configMap + name: gatus-configmap + globalMounts: + - path: /config/config.yaml + subPath: config.yaml + readOnly: true diff --git a/kubernetes/apps/observability/gatus/app/kustomization.yaml b/kubernetes/apps/observability/gatus/app/kustomization.yaml new file mode 100644 index 00000000..30bf43b9 --- /dev/null +++ b/kubernetes/apps/observability/gatus/app/kustomization.yaml @@ -0,0 +1,14 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./externalsecret.yaml + - ./rbac.yaml + - ./helmrelease.yaml +configMapGenerator: + - name: gatus-configmap + files: + - config.yaml=./resources/config.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/kubernetes/apps/observability/gatus/app/rbac.yaml b/kubernetes/apps/observability/gatus/app/rbac.yaml new file mode 100644 index 00000000..0f12c439 --- /dev/null +++ b/kubernetes/apps/observability/gatus/app/rbac.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gatus +rules: + - apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: gatus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gatus +subjects: + - kind: ServiceAccount + name: gatus + namespace: observability diff --git a/kubernetes/apps/observability/gatus/app/resources/config.yaml b/kubernetes/apps/observability/gatus/app/resources/config.yaml new file mode 100644 index 00000000..22ef9a44 --- /dev/null +++ b/kubernetes/apps/observability/gatus/app/resources/config.yaml @@ -0,0 +1,43 @@ +--- +# Note: Gatus vars should be escaped with $${VAR_NAME} to avoid interpolation by Flux +web: + port: $${CUSTOM_WEB_PORT} +storage: + type: postgres + path: postgres://$${POSTGRES_USER}:$${POSTGRES_PASSWORD}@$${POSTGRES_HOST}:5432/$${POSTGRES_DB}?sslmode=disable +metrics: true +debug: false +ui: + title: Status + header: Status +alerting: + ntfy: + topic: cluster + url: http://ntfy.comms.svc.cluster.local + token: $${NTFY_TOKEN} +connectivity: + checker: + target: 1.1.1.1:53 + interval: 1m +endpoints: + - name: status + group: external + url: https://status.$${PUBLIC_DOMAIN} + interval: 1m + client: + dns-resolver: tcp://1.1.1.1:53 + conditions: + - "[STATUS] == 200" + alerts: + - type: ntfy + + - name: flux-webhook + group: external + url: https://flux-webhook.$${PUBLIC_DOMAIN} + interval: 1m + client: + dns-resolver: tcp://1.1.1.1:53 + conditions: + - "[STATUS] == 404" + alerts: + - type: ntfy diff --git a/kubernetes/apps/observability/gatus/app/secret.sops.yaml b/kubernetes/apps/observability/gatus/app/secret.sops.yaml new file mode 100644 index 00000000..e0d1c8ed --- /dev/null +++ b/kubernetes/apps/observability/gatus/app/secret.sops.yaml @@ -0,0 +1,30 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: gatus-secret +stringData: + NTFY_TOKEN: "" + POSTGRES_USER: ENC[AES256_GCM,data:tK4uZrE=,iv:C9KEk1sCBPHcY5Cc+wuZ08siznk8GVBDGgPgM198l5M=,tag:MDv9rOYh461vE+mmANiXFQ==,type:str] + POSTGRES_PASSWORD: ENC[AES256_GCM,data:RaPO4ujGDJ0rK2dYSeLUfx9Tj7I4h9mMIATTH2TkXJ8=,iv:Qa0AmjXh3inAq5KIvQc2OniSuFReoiIu6zh8kvnXXGU=,tag:QvU3lHBrtlnW/AYjsXt5Mw==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB2cThlR1FCaXVpSk90L05I + MDlYVEdpMGxQTFNkTHM0YXVkZDVxRVdOY0E0CnlONmtVeEwyY2REYWZ3eTNGakpZ + L0JGZXh2cVdCMitCTWIwMG1GSGRhSzgKLS0tIHZJZTV6R2RsdjBSV0RhUUtqMEVM + T3ZQOUtyM1VucGZLN0llTXB2eFVkRkUK09FI9v7/6YvM6pdP+Dum0npSnUhJKEF4 + wiyNFIbns432hQYnGHH6mkYs+3ZsM9iArLHW7WsvKMbC8U4Z+FtfZg== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-01T22:04:59Z" + mac: ENC[AES256_GCM,data:6CQxI+zYh7NWdA4HRAN52ppa0yIfb+YeZuPijjAf/LmSEIzUYC6TQgnJzSPbLa/9fQ+/1bBr4zoiOzn2MLu0aMBG+XqeQLAmUyu1lyvYKAtKC2FQUhxClnbjCCcdAFqCB6pHTVPs5XOsk3UBEGXcsZz8ZYkSG+dESOijPrdjV4I=,iv:5bsstw0Y4ndaF0Z/F5ikBJJu+kBN3zxgF0Cjc3f61NI=,tag:41O8sWf6wkDPWnscdEcj2w==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/observability/gatus/ks.yaml b/kubernetes/apps/observability/gatus/ks.yaml new file mode 100644 index 00000000..ea75d857 --- /dev/null +++ b/kubernetes/apps/observability/gatus/ks.yaml @@ -0,0 +1,23 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app gatus + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg-cluster + path: ./kubernetes/apps/observability/gatus/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/observability/grafana/app/helmrelease.yaml b/kubernetes/apps/observability/grafana/app/helmrelease.yaml new file mode 100644 index 00000000..7fe129af --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/helmrelease.yaml @@ -0,0 +1,423 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: grafana +spec: + interval: 30m + chart: + spec: + chart: grafana + version: 7.3.9 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: authelia + namespace: default + - name: kube-prometheus-stack + namespace: observability + - name: loki + namespace: observability + values: + extraInitContainers: + - name: 01-init-db + image: ghcr.io/onedr0p/postgres-init:16 + env: + INIT_POSTGRES_DBNAME: grafana + INIT_POSTGRES_HOST: postgres16-rw.database.svc.cluster.local + envValueFrom: + INIT_POSTGRES_USER: + secretKeyRef: + name: &secret grafana-name + key: GF_DATABASE_USER + INIT_POSTGRES_PASS: + secretKeyRef: + name: *secret + key: GF_DATABASE_USER + INIT_POSTGRES_SUPER_PASS: "${POSTGRES_SUPER_PASSWORD}" + replicas: 3 + env: + GF_AUTH_GENERIC_OAUTH_API_URL: "https://auth.${PUBLIC_DOMAIN}/api/oidc/userinfo" + GF_AUTH_GENERIC_OAUTH_AUTH_URL: "https://auth.${PUBLIC_DOMAIN}/api/oidc/authorization" + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: grafana + GF_AUTH_GENERIC_OAUTH_TOKEN_URL: "https://auth.${PUBLIC_DOMAIN}/api/oidc/token" + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: "${GRAFANA_OAUTH_CLIENT_SECRET}" + GF_DATE_FORMATS_USE_BROWSER_LOCALE: true + GF_EXPLORE_ENABLED: true + GF_FEATURE_TOGGLES_ENABLE: publicDashboards + GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS: natel-discrete-panel,pr0ps-trackmap-panel,panodata-map-panel + GF_SECURITY_ANGULAR_SUPPORT_ENABLED: true + GF_SECURITY_COOKIE_SAMESITE: grafana + GF_SERVER_ROOT_URL: "https://grafana.${PUBLIC_DOMAIN}" + GF_DATABASE_NAME: grafana + GF_DATABASE_HOST: postgres16-rw.database.svc.cluster.local:5432 + GF_DATABASE_SSL_MODE: disable + GF_DATABASE_TYPE: postgres + envFromSecrets: + - name: *secret + grafana.ini: + analytics: + check_for_updates: false + check_for_plugin_updates: false + reporting_enabled: false + auth: + oauth_auto_login: true + oauth_allow_insecure_email_lookup: true + auth.generic_oauth: + enabled: true + name: Authelia + icon: signin + scopes: openid profile email groups + empty_scopes: false + login_attribute_path: preferred_username + groups_attribute_path: groups + name_attribute_path: name + use_pkce: true + auth.generic_oauth.group_mapping: + org_id: 1 + role_attribute_path: contains(groups[*], 'admins') && 'Admin' || 'Viewer' + auth.basic: + enabled: false + auth.anonymous: + enabled: false + news: + news_feed_enabled: false + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: default + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default-folder + - name: ceph + orgId: 1 + folder: Ceph + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/ceph-folder + - name: emqx + orgId: 1 + folder: EMQX + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/emqx-folder + - name: flux + orgId: 1 + folder: Flux + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/flux-folder + - name: kubernetes + orgId: 1 + folder: Kubernetes + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/kubernetes-folder + - name: nginx + orgId: 1 + folder: Nginx + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/nginx-folder + - name: prometheus + orgId: 1 + folder: Prometheus + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/prometheus-folder + - name: thanos + orgId: 1 + folder: Thanos + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/thanos-folder + - name: unifi + orgId: 1 + folder: Unifi + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/unifi-folder + datasources: + datasources.yaml: + apiVersion: 1 + deleteDatasources: + - { name: Alertmanager, orgId: 1 } + - { name: Loki, orgId: 1 } + - { name: Prometheus, orgId: 1 } + datasources: + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://thanos-query-frontend.observability.svc.cluster.local:10902 + jsonData: + prometheusType: Thanos + timeInterval: 1m + isDefault: true + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki-gateway.observability.svc.cluster.local + jsonData: + maxLines: 250 + - name: Alertmanager + type: alertmanager + uid: alertmanager + access: proxy + url: http://alertmanager-operated.observability.svc.cluster.local:9093 + jsonData: + implementation: prometheus + dashboards: + default: + blocky: + # renovate: depName="blocky" + gnetId: 13768 + revision: 3 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + - { name: VAR_BLOCKY_URL, value: 'http:\\/\\/192.168.254.9:4000' } + cloudflared: + # renovate: depName="Cloudflare Tunnels (cloudflared)" + gnetId: 17457 + revision: 6 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + external-dns: + # renovate: depName="External-dns" + gnetId: 15038 + revision: 3 + datasource: Prometheus + minio: + # renovate: depName="MinIO Dashboard" + gnetId: 13502 + revision: 26 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + node-exporter-full: + # renovate: depName="Node Exporter Full" + gnetId: 1860 + revision: 36 + datasource: Prometheus + smart: + # renovate: depName="Dashboard for smartctl_exporter" + gnetId: 20204 + revision: 1 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + spegel: + # renovate: depName="Spegel" + gnetId: 18089 + revision: 1 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + unpackerr: + # renovate: depName="Unpackerr" + gnetId: 18817 + revision: 1 + datasource: + - { name: DS_PROMETHEUS, value: Prometheus } + zfs: + # renovate: depName="ZFS" + gnetId: 7845 + revision: 4 + datasource: Prometheus + cert-manager: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json + datasource: Prometheus + dragonfly: + url: https://raw.githubusercontent.com/dragonflydb/dragonfly/main/tools/local/monitoring/grafana/provisioning/dashboards/dashboard.json + datasource: Prometheus + node-feature-discovery: + url: https://raw.githubusercontent.com/kubernetes-sigs/node-feature-discovery/master/examples/grafana-dashboard.json + datasource: Prometheus + ceph: + ceph-cluster: + # renovate: depName="Ceph Cluster" + gnetId: 2842 + revision: 17 + datasource: Prometheus + ceph-osd: + # renovate: depName="Ceph - OSD (Single)" + gnetId: 5336 + revision: 9 + datasource: Prometheus + ceph-pools: + # renovate: depName="Ceph - Pools" + gnetId: 5342 + revision: 9 + datasource: Prometheus + flux: + flux-cluster: + url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/cluster.json + datasource: Prometheus + flux-control-plane: + url: https://raw.githubusercontent.com/fluxcd/flux2-monitoring-example/main/monitoring/configs/dashboards/control-plane.json + datasource: Prometheus + kubernetes: + kubernetes-api-server: + # renovate: depName="Kubernetes / System / API Server" + gnetId: 15761 + revision: 16 + datasource: Prometheus + kubernetes-coredns: + # renovate: depName="Kubernetes / System / CoreDNS" + gnetId: 15762 + revision: 18 + datasource: Prometheus + kubernetes-global: + # renovate: depName="Kubernetes / Views / Global" + gnetId: 15757 + revision: 37 + datasource: Prometheus + kubernetes-namespaces: + # renovate: depName="Kubernetes / Views / Namespaces" + gnetId: 15758 + revision: 34 + datasource: Prometheus + kubernetes-nodes: + # renovate: depName="Kubernetes / Views / Nodes" + gnetId: 15759 + revision: 29 + datasource: Prometheus + kubernetes-pods: + # renovate: depName="Kubernetes / Views / Pods" + gNetId: 15760 + revision: 21 + datasource: Prometheus + kubernetes-volumes: + # renovate: depName="K8s / Storage / Volumes / Cluster" + gnetId: 11454 + revision: 14 + datasource: Prometheus + nginx: + nginx: + url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/nginx.json + datasource: Prometheus + nginx-request-handling-performance: + url: https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/grafana/dashboards/request-handling-performance.json + datasource: Prometheus + prometheus: + prometheus: + # renovate: depName="Prometheus" + gnetId: 19105 + revision: 3 + datasource: Prometheus + thanos: + thanos-bucket-replicate: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/bucket-replicate.json + datasource: Prometheus + thanos-compact: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/compact.json + datasource: Prometheus + thanos-overview: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/overview.json + datasource: Prometheus + thanos-query: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query.json + datasource: Prometheus + thanos-query-frontend: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/query-frontend.json + datasource: Prometheus + thanos-receieve: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/receive.json + datasource: Prometheus + thanos-rule: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/rule.json + datasource: Prometheus + thanos-sidecar: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/sidecar.json + datasource: Prometheus + thanos-store: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/thanos/dashboards/store.json + datasource: Prometheus + unifi: + unifi-insights: + # renovate: depName="UniFi-Poller: Client Insights - Prometheus" + gnetId: 11315 + revision: 9 + datasource: Prometheus + unifi-network-sites: + # renovate: depName="UniFi-Poller: Network Sites - Prometheus" + gnetId: 11311 + revision: 5 + datasource: Prometheus + unifi-uap: + # renovate: depName="UniFi-Poller: UAP Insights - Prometheus" + gnetId: 11314 + revision: 10 + datasource: Prometheus + unifi-usw: + # renovate: depName="UniFi-Poller: USW Insights - Prometheus" + gnetId: 11312 + revision: 9 + datasource: Prometheus + sidecar: + dashboards: + enabled: true + searchNamespace: ALL + label: grafana_dashboard + folderAnnotation: grafana_folder + provider: + disableDelete: true + foldersFromFilesStructure: true + datasources: + enabled: true + searchNamespace: ALL + labelValue: "" + plugins: + - grafana-clock-panel + - grafana-piechart-panel + - grafana-worldmap-panel + - natel-discrete-panel + - pr0ps-trackmap-panel + - vonage-status-panel + serviceMonitor: + enabled: true + ingress: + enabled: true + ingressClassName: internal + hosts: ["grafana.devbu.io"] + persistence: + enabled: false + testFramework: + enabled: false + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app.kubernetes.io/name: grafana diff --git a/kubernetes/apps/observability/grafana/app/kustomization.yaml b/kubernetes/apps/observability/grafana/app/kustomization.yaml new file mode 100644 index 00000000..16a6ce30 --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/observability/grafana/app/secret.sops.yaml b/kubernetes/apps/observability/grafana/app/secret.sops.yaml new file mode 100644 index 00000000..62fbae3d --- /dev/null +++ b/kubernetes/apps/observability/grafana/app/secret.sops.yaml @@ -0,0 +1,29 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: grafana-secret +stringData: + GF_DATABASE_USER: ENC[AES256_GCM,data:fH5dA8Ulzg==,iv:A5gpKCZbNDf5hUUg5lB7Epb5SmHVQxVcLW1+gn3aKS8=,tag:vsr0GUgY+ouUheCfYDLZzA==,type:str] + GF_DATABASE_PASSWORD: ENC[AES256_GCM,data:3VYI7Yz81LjB+IVuwrlRZFwgGCxe5+LBUtmFDngfhlk=,iv:e8LiDcM7+Gj+geOkuQ1eIAIuqwuCRhpqYIDTNvgUcNQ=,tag:y4t3yCVSfj8LH81UBnO70Q==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBvVlZiSTM4SFROcGxPWHpS + SDRaOUl6V0JobFVvVTJYUXlnejh5Wk9OcG5RCnhXdmM0Z3Fhd3RYS0Z4ZGJ3azdk + Rm1xNHNIOUFzcFMwVGdoWlhueUpLeG8KLS0tIFU0T1lqK01rSEVzTW4xYXVmQWhZ + MFZHQ2lQRDhrRU5iUXFMR0hneHNuQUUK6CZLaYuSorikaqv7QYEfdL7SWu8DJG8d + +ORuTUzrOtQjHU6GtfQYhmXR+qzHUgkgQlVE7923C5jF/j+9IsWGKQ== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-02T03:29:06Z" + mac: ENC[AES256_GCM,data:fCW1ZTGq2aVcyW6zr6U//+8opOj3xp776OMB59HLnpknPo5ynBPgsH63OXJ7Kk4XfoylcK5EQPdmEZl3NQo1rMQPo3QMR/gW5IIKtUxDP6/oUDmbCB7qTR+tZoVc9FUETo+DCir+hHXf26Qt28uo8CqbKSmiSXCk8LYvn8+1cwo=,iv:Lr1Tsn37gh14Sjkh7uiys34vbJzSTX5I8kCqohbNQMM=,tag:EsPwE9Ih7ZlURwiUXnsNRg==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/observability/grafana/ks.yaml b/kubernetes/apps/observability/grafana/ks.yaml new file mode 100644 index 00000000..352d6fa4 --- /dev/null +++ b/kubernetes/apps/observability/grafana/ks.yaml @@ -0,0 +1,23 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app grafana + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg-cluster + path: ./kubernetes/apps/observability/grafana/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/observability/healthchecks/app/helmrelease.yaml b/kubernetes/apps/observability/healthchecks/app/helmrelease.yaml new file mode 100644 index 00000000..6b781594 --- /dev/null +++ b/kubernetes/apps/observability/healthchecks/app/helmrelease.yaml @@ -0,0 +1,65 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: healthchecks +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + values: + controllers: + healthchecks: + strategy: RollingUpdate + containers: + app: + image: + repository: lscr.io/linuxserver/healthchecks + tag: 33 + env: + PUID: "${SECURITY_CONTEXT_RUN_AS_USER}" + PGID: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + SITE_ROOT: "https://healthchecks.${PUBLIC_DOMAIN}" + SITE_NAME: Healthchecks + DEFAULT_FROM_EMAIL: "${SMTP_USER}" + EMAIL_HOST: maddy.comms.svc.cluster.local + EMAIL_PORT: 25 + EMAIL_USE_TLS: "False" + SUPERUSER_EMAIL: "${HEALTHCHECKS_ADMIN_EMAIL}" + SUPERUSER_PASSWORD: "${HEALTHCHECKS_ADMIN_PASSWORD}" + SECRET_KEY: "${HEALTHCHECKS_SECRET_KEY}" + INTEGRATIONS_ALLOW_PRIVATE_IPS: "True" + service: + app: + controller: healthchecks + ports: + http: + port: 8000 + ingress: + app: + className: external + annotations: + external-dns.alpha.kubernetes.io/target: "external.${PUBLIC_DOMAIN}" + hosts: + - host: &host "healthchecks.${PUBLIC_DOMAIN}" + paths: + - path: / + service: + identifier: app + port: http + tls: + - hosts: + - *host diff --git a/kubernetes/apps/observability/healthchecks/app/kustomization.yaml b/kubernetes/apps/observability/healthchecks/app/kustomization.yaml new file mode 100644 index 00000000..7a20d2a0 --- /dev/null +++ b/kubernetes/apps/observability/healthchecks/app/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] + # - ./helmrelease.yaml diff --git a/kubernetes/apps/observability/healthchecks/ks.yaml b/kubernetes/apps/observability/healthchecks/ks.yaml new file mode 100644 index 00000000..418694e6 --- /dev/null +++ b/kubernetes/apps/observability/healthchecks/ks.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app healthchecks + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: cloudnative-pg-cluster + path: ./kubernetes/apps/observability/healthchecks/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml new file mode 100644 index 00000000..2b167771 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/helmrelease.yaml @@ -0,0 +1,201 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: kube-prometheus-stack + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: kube-prometheus-stack + version: 40.0.2 + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + values: + alertmanager: + config: + global: + resolve_timeout: 5m + receivers: + - name: "null" + - name: email + email_configs: + send_resolved: false + to: "${NOTIFY_EMAIL}" + from: "AlertManager <${SMTP_USER}>" + smarthost: maddy.comms.svc.cluster.local:2525 + require_tls: false + route: + group_by: [alertname, job] + group_wait: 30s + group_interval: 5m + repeat_interval: 6h + receiver: email + routes: + - receiver: "null" + matchers: + - alertname =~ "InfoInhibitor|Watchdog" + - receiver: email + matchers: + - severity = "critical" + continue: true + inhibit_rules: + - source_matchers: + - severity = "critical" + target_matchers: + - severity = "warning" + equal: [alertname, namespace] + ingress: + enabled: true + pathType: Prefix + ingressClassName: nginx + hosts: + - &host "alert-manager.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + alertmanagerSpec: + replicas: 1 + podAntiAffinity: hard + storage: + volumeClaimTemplate: + spec: + storageClassName: local-path + resources: + requests: + storage: 1Gi + resources: + limits: + memory: 400Mi + kube-state-metrics: + metricLabelsAllowlist: + - "persistentvolumeclaims=[*]" + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + grafana: + enabled: false + forceDeployDashboards: true + sidecar: + dashboards: + multicluster: + etcd: + enabled: true + kubelet: + enabled: true + serviceMonitor: + metricRelabelings: + - action: replace + sourceLabels: + - node + targetLabel: instance + kubeApiServer: + enabled: true + kubeControllerManager: + enabled: true + endpoints: &hosts + - 10.1.2.30 + - 10.1.2.31 + # - 10.1.2.32 + # - 10.1.2.33 + # - 10.1.2.34 + kubeScheduler: + enabled: true + endpoints: *hosts + kubeProxy: + enabled: true + endpoints: *hosts + kubeEtcd: + enabled: true + endpoints: *hosts + service: + enabled: true + port: 2381 + targetPort: 2381 + prometheus: + ingress: + enabled: true + pathType: Prefix + ingressClassName: nginx + hosts: + - &host "prometheus.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + thanosService: + enabled: true + thanosServiceMonitor: + enabled: true + thanosIngress: + enabled: true + pathType: Prefix + ingressClassName: nginx + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: GRPC + hosts: + - &host "thanos-sidecar.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + prometheusSpec: + replicas: 3 + replicaExternalLabelName: __replica__ + externalLabels: + cluster: home + thanos: + image: quay.io/thanos/thanos:v0.28.0 + # renovate: datasource=docker depName=quay.io/thanos/thanos + version: "v0.28.0" + podAntiAffinity: hard + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + retention: 14d + retentionSize: 10GB + enableAdminAPI: true + walCompression: true + allowOverlappingBlocks: true + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: local-path + resources: + requests: + storage: 10Gi + # Prometheus is memory hungry! 😔 + resources: + requests: + memory: 64Mi + limits: + memory: 512Mi + additionalScrapeConfigs: + - job_name: node-exporter + scrape_interval: 5m + scrape_timeout: 10s + honor_timestamps: true + static_configs: + - targets: + - "opnsense.${PRIVATE_DOMAIN}:9100" diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml new file mode 100644 index 00000000..63362132 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml + - ./prometheusrule.yaml + - ./scrapeconfig.yaml diff --git a/kubernetes/apps/observability/kube-prometheus-stack/app/secret.sops.yaml b/kubernetes/apps/observability/kube-prometheus-stack/app/secret.sops.yaml new file mode 100644 index 00000000..8df4db16 --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/app/secret.sops.yaml @@ -0,0 +1,28 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: alertmanager-secret +stringData: + test: ENC[AES256_GCM,data:4BzxNw==,iv:/0HCmCqcLWVzdTg1EOpTQ9w0scMs2Y7M8UQkk3CvOEE=,tag:O5eoBEffhxYgHCxfEG0W6w==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBIOElCSklOaERiZmFHUjhX + VFVDSGlpRjlsNWpURmVZUW81bW5HdFk4NUU0CkN4aFY0K1phQk9nUFVqWkpJcGtC + ZW8rUkFTWC9kcnhBUmFyZTQyYzVmcHcKLS0tIEVkOEljRmx5ZnBYTlZZbUpnMUNT + YnkxNERlKzdrVXlLNHc4WDNGNHBqc0kKB+3w+F3UrfF5z8GJTfZ8c5PEWi/tvnKi + /Pm3jhhKHmv3/VuOr8o7hdm8D7nF0BXsxHIlxFMc6PqACLX0t+vZFw== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-02T05:26:54Z" + mac: ENC[AES256_GCM,data:xr734AwqlKZvjkrf5zOlfeAK3aa2Hzu8GR3tDM++o//Is/ZLiNEhk2vrQXEp047m7DrTrnVz4ai4rlEPXSpJKLobz9tXeTkyBoBldIRyx9Nsss265hHyQ5kvBc3QSSQNZBJV8Fof9F8B0HeNINTQ9foHCa9+uzK0Z3M17BGn8oM=,iv:mQspUcM5Z4Vr1I2eNzmz07DkGTO8uV3xtrHzWxpEo2g=,tag:YOzjynSJYk27Lx+FZxd3SQ==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/observability/kube-prometheus-stack/ks.yaml b/kubernetes/apps/observability/kube-prometheus-stack/ks.yaml new file mode 100644 index 00000000..d1cc928b --- /dev/null +++ b/kubernetes/apps/observability/kube-prometheus-stack/ks.yaml @@ -0,0 +1,25 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app kube-prometheus-stack + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/observability/kube-prometheus-stack/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m + postBuild: + substitute: + # renovate: datasource=docker depName=quay.io/thanos/thanos + THANOS_VERSION: v0.34.1 diff --git a/kubernetes/apps/observability/kubernetes-dashboard/app/helmrelease.yaml b/kubernetes/apps/observability/kubernetes-dashboard/app/helmrelease.yaml index 3c92a29b..4b6eed56 100644 --- a/kubernetes/apps/observability/kubernetes-dashboard/app/helmrelease.yaml +++ b/kubernetes/apps/observability/kubernetes-dashboard/app/helmrelease.yaml @@ -1,5 +1,5 @@ --- -apiVersion: helm.toolkit.fluxcd.io/v2beta1 +apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: name: kubernetes-dashboard diff --git a/kubernetes/apps/observability/kustomization.yaml b/kubernetes/apps/observability/kustomization.yaml index 447592a1..b054371c 100644 --- a/kubernetes/apps/observability/kustomization.yaml +++ b/kubernetes/apps/observability/kustomization.yaml @@ -3,9 +3,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ./namespace.yaml + # - ./gatus/ks.yaml + # - ./healthchecks/ks.yaml + # - ./grafana/ks.yaml - ./kubernetes-dashboard/ks.yaml - # - ./grafana - # - ./kube-prometheus-stack - # - ./loki - # - ./node-problem-detector - # - ./prometheus + # - ./kube-prometheus-stack/ks.yaml + # - ./loki/ks.yaml + # - ./thanos/ks.yaml + # - ./vector/ks.yaml diff --git a/kubernetes/apps/observability/loki/app/helmrelease.yaml b/kubernetes/apps/observability/loki/app/helmrelease.yaml new file mode 100644 index 00000000..854b6562 --- /dev/null +++ b/kubernetes/apps/observability/loki/app/helmrelease.yaml @@ -0,0 +1,140 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: loki +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: loki + version: 6.4.2 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + install: + crds: Skip + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + crds: Skip + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: openebs + namespace: openebs-system + - name: rook-ceph-cluster + namespace: rook-ceph + - name: vector-agent + namespace: observability + - name: vector-aggregator + namespace: observability + valuesFrom: + - targetPath: loki.storage.bucketNames.chunks + kind: ConfigMap + name: &cephBucket loki-bucket + valuesKey: BUCKET_NAME + - targetPath: loki.storage.s3.endpoint + kind: ConfigMap + name: *cephBucket + valuesKey: BUCKET_HOST + - targetPath: loki.storage.s3.region + kind: ConfigMap + name: *cephBucket + valuesKey: BUCKET_REGION + values: + deploymentMode: SimpleScalable + loki: + podAnnotations: + configmap.reloader.stakater.com/reload: *cephBucket + secret.reloader.stakater.com/reload: *cephBucket + ingester: + chunk_encoding: snappy + storage: + type: s3 + s3: + s3ForcePathStyle: true + insecure: true + schemaConfig: + configs: + - from: "2024-04-01" # quote + store: tsdb + object_store: s3 + schema: v13 + index: + prefix: loki_index_ + period: 24h + structuredConfig: + auth_enabled: false + server: + log_level: info + http_listen_port: 3100 + grpc_listen_port: 9095 + grpc_server_max_recv_msg_size: 8388608 + grpc_server_max_send_msg_size: 8388608 + limits_config: + ingestion_burst_size_mb: 128 + ingestion_rate_mb: 64 + max_query_parallelism: 100 + per_stream_rate_limit: 64M + per_stream_rate_limit_burst: 128M + reject_old_samples: true + reject_old_samples_max_age: 168h + retention_period: 30d + shard_streams: + enabled: true + split_queries_by_interval: 1h + query_scheduler: + max_outstanding_requests_per_tenant: 4096 + frontend: + max_outstanding_per_tenant: 4096 + ruler: + enable_api: true + enable_alertmanager_v2: true + alertmanager_url: http://alertmanager-operated.observability.svc.cluster.local:9093 + storage: + type: local + local: + directory: /rules + rule_path: /rules/fake + analytics: + reporting_enabled: false + backend: + replicas: 3 + persistence: + size: 20Gi + storageClass: openebs-hostpath + gateway: + replicas: 3 + image: + registry: ghcr.io + ingress: + enabled: true + ingressClassName: internal + hosts: + - host: "loki.${PUBLIC_DOMAIN}" + paths: + - path: / + pathType: Prefix + read: + replicas: 3 + write: + replicas: 3 + persistence: + size: 20Gi + storageClass: openebs-hostpath + sidecar: + image: + repository: ghcr.io/kiwigrid/k8s-sidecar + rules: + searchNamespace: ALL + folder: /rules/fake + lokiCanary: + enabled: false + test: + enabled: false diff --git a/kubernetes/apps/observability/loki/app/kustomization.yaml b/kubernetes/apps/observability/loki/app/kustomization.yaml new file mode 100644 index 00000000..5e098843 --- /dev/null +++ b/kubernetes/apps/observability/loki/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./prometheusrule.yaml diff --git a/kubernetes/apps/observability/loki/app/prometheusrule.yaml b/kubernetes/apps/observability/loki/app/prometheusrule.yaml new file mode 100644 index 00000000..bd847034 --- /dev/null +++ b/kubernetes/apps/observability/loki/app/prometheusrule.yaml @@ -0,0 +1,64 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: loki-minio +spec: + groups: + - name: minio + rules: + - alert: MinioAbsent + annotations: + description: MinIO component has dissapeared from Prometheus service discovery. + summary: MinIO component has disappeared. + expr: | + absent(up{job=~".*minio.*"} == 1) + for: 10m + labels: + severity: critical + - alert: MinioDiskOffline + annotations: + description: MinIO is reporting {{ $value }} disk(s) offline + on server {{ $labels.server }} + summary: MinIO disk(s) offline. + expr: | + minio_cluster_disk_offline_total != 0 + for: 1m + labels: + severity: critical + - alert: MinioNodeOffline + annotations: + description: MinIO is reporting that node {{ $labels.server }} is offline. + summary: MinIO node is offline. + expr: | + minio_cluster_nodes_offline_total != 0 + for: 1m + labels: + severity: critical + - alert: MinioClusterUsage + annotations: + description: MinIO cluster is reporting less than 10% storage free. + {{ $value | humanizePercentage }} of cluster storage is available. + summary: MinIO cluster is low on capacity. + expr: | + sum(minio_cluster_capacity_usable_free_bytes) + / + sum(minio_cluster_capacity_usable_total_bytes) + < .10 + for: 15m + labels: + severity: warning + - alert: MinioNodeDiskUsage + annotations: + description: + MinIO node is reporting less than 10% disk space available. + {{ $value | humanizePercentage }} of disk space available on node {{ $labels.server }} + summary: MinIO node is low on disk space. + expr: | + sum(minio_node_disk_free_bytes) by (server) + / + sum(minio_node_disk_total_bytes) by (server) + < .10 + for: 15m + labels: + severity: warning diff --git a/kubernetes/apps/observability/loki/ks.yaml b/kubernetes/apps/observability/loki/ks.yaml new file mode 100644 index 00000000..8cefe44c --- /dev/null +++ b/kubernetes/apps/observability/loki/ks.yaml @@ -0,0 +1,21 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app loki + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/observability/loki/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m diff --git a/kubernetes/apps/observability/thanos/app/helmrelease.yaml b/kubernetes/apps/observability/thanos/app/helmrelease.yaml new file mode 100644 index 00000000..be922c2e --- /dev/null +++ b/kubernetes/apps/observability/thanos/app/helmrelease.yaml @@ -0,0 +1,124 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: thanos +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: thanos + version: 1.16.5 + sourceRef: + kind: HelmRepository + name: stevehipwell + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: openebs + namespace: openebs-system + - name: rook-ceph-cluster + namespace: rook-ceph + valuesFrom: + - targetPath: objstoreConfig.value.config.bucket + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_NAME + - targetPath: objstoreConfig.value.config.endpoint + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_HOST + - targetPath: objstoreConfig.value.config.region + kind: ConfigMap + name: thanos-bucket + valuesKey: BUCKET_REGION + - targetPath: objstoreConfig.value.config.access_key + kind: Secret + name: thanos-bucket + valuesKey: AWS_ACCESS_KEY_ID + - targetPath: objstoreConfig.value.config.secret_key + kind: Secret + name: thanos-bucket + valuesKey: AWS_SECRET_ACCESS_KEY + values: + objstoreConfig: + value: + type: s3 + config: + insecure: true + additionalEndpoints: + - dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery.observability.svc.cluster.local + additionalReplicaLabels: ["__replica__"] + serviceMonitor: + enabled: true + compact: + enabled: true + extraArgs: + - --compact.concurrency=4 + - --delete-delay=30m + - --retention.resolution-raw=14d + - --retention.resolution-5m=30d + - --retention.resolution-1h=60d + persistence: &persistence + enabled: true + storageClass: openebs-hostpath + size: 10Gi + query: + replicas: 3 + extraArgs: ["--alert.query-url=https://thanos.devbu.io"] + additionalStores: ["thanos.turbo.ac:10901"] + queryFrontend: + enabled: true + replicas: 3 + extraEnv: &extraEnv + - name: THANOS_CACHE_CONFIG + valueFrom: + configMapKeyRef: + name: &configMap thanos-cache-configmap + key: cache.yaml + extraArgs: ["--query-range.response-cache-config=$(THANOS_CACHE_CONFIG)"] + ingress: + enabled: true + ingressClassName: internal + hosts: + - thanos.devbu.io + podAnnotations: &podAnnotations + configmap.reloader.stakater.com/reload: *configMap + rule: + enabled: true + replicas: 3 + extraArgs: ["--web.prefix-header=X-Forwarded-Prefix"] + alertmanagersConfig: + value: |- + alertmanagers: + - api_version: v2 + static_configs: + - dnssrv+_http-web._tcp.alertmanager-operated.observability.svc.cluster.local + rules: + value: |- + groups: + - name: PrometheusWatcher + rules: + - alert: PrometheusDown + annotations: + summary: A Prometheus has disappeared from Prometheus target discovery + expr: absent(up{job="kube-prometheus-stack-prometheus"}) + for: 5m + labels: + severity: critical + persistence: *persistence + storeGateway: + replicas: 3 + extraEnv: *extraEnv + extraArgs: ["--index-cache.config=$(THANOS_CACHE_CONFIG)"] + persistence: *persistence + podAnnotations: *podAnnotations diff --git a/kubernetes/apps/observability/thanos/ks.yaml b/kubernetes/apps/observability/thanos/ks.yaml new file mode 100644 index 00000000..7c22b299 --- /dev/null +++ b/kubernetes/apps/observability/thanos/ks.yaml @@ -0,0 +1,21 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app thanos + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/observability/thanos/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m diff --git a/kubernetes/apps/observability/vector/app/agent/helmrelease.yaml b/kubernetes/apps/observability/vector/app/agent/helmrelease.yaml new file mode 100644 index 00000000..fd9a06f3 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/agent/helmrelease.yaml @@ -0,0 +1,103 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: vector-agent +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: vector-aggregator + namespace: observability + values: + controllers: + vector-agent: + type: daemonset + strategy: RollingUpdate + annotations: + reloader.stakater.com/auto: "true" + containers: + app: + image: + repository: docker.io/timberio/vector + tag: 0.37.1-alpine@sha256:ced16088cdcfcadd2f471c5760ea3b08bec82b9be00a8b90173b9ade7d001607 + env: + PROCFS_ROOT: /host/proc + SYSFS_ROOT: /host/sys + VECTOR_SELF_NODE_NAME: + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + VECTOR_SELF_POD_NAME: + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + VECTOR_SELF_POD_NAMESPACE: + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + args: ["--config", "/etc/vector/vector.yaml"] + securityContext: + privileged: true + serviceAccount: + create: true + name: vector-agent + persistence: + config: + enabled: true + type: configMap + name: vector-agent-configmap + globalMounts: + - path: /etc/vector/vector.yaml + subPath: vector.yaml + readOnly: true + data: + type: emptyDir + globalMounts: + - path: /vector-data-dir + procfs: + type: hostPath + hostPath: /proc + hostPathType: Directory + globalMounts: + - path: /host/proc + readOnly: true + sysfs: + type: hostPath + hostPath: /sys + hostPathType: Directory + globalMounts: + - path: /host/sys + readOnly: true + var-lib: + type: hostPath + hostPath: /var/lib + hostPathType: Directory + globalMounts: + - readOnly: true + var-log: + type: hostPath + hostPath: /var/log + hostPathType: Directory + globalMounts: + - readOnly: true diff --git a/kubernetes/apps/observability/vector/app/agent/kustomization.yaml b/kubernetes/apps/observability/vector/app/agent/kustomization.yaml new file mode 100644 index 00000000..cad3d529 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/agent/kustomization.yaml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./rbac.yaml +configMapGenerator: + - name: vector-agent-configmap + files: + - vector.yaml=./resources/vector.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/kubernetes/apps/observability/vector/app/agent/rbac.yaml b/kubernetes/apps/observability/vector/app/agent/rbac.yaml new file mode 100644 index 00000000..a088f8d1 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/agent/rbac.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vector-agent +rules: + - apiGroups: [""] + resources: ["namespaces", "nodes", "pods"] + verbs: ["list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vector-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vector-agent +subjects: + - kind: ServiceAccount + name: vector-agent + namespace: observability diff --git a/kubernetes/apps/observability/vector/app/agent/resources/vector.yaml b/kubernetes/apps/observability/vector/app/agent/resources/vector.yaml new file mode 100644 index 00000000..f3a7565c --- /dev/null +++ b/kubernetes/apps/observability/vector/app/agent/resources/vector.yaml @@ -0,0 +1,25 @@ +--- +data_dir: /vector-data-dir + +sources: + kubernetes_source: + type: kubernetes_logs + use_apiserver_cache: true + pod_annotation_fields: + container_image: container_image + container_name: container_name + pod_labels: pod_labels + pod_name: pod_name + pod_annotations: "" + namespace_annotation_fields: + namespace_labels: "" + node_annotation_fields: + node_labels: "" + +sinks: + kubernetes: + type: vector + compression: true + version: "2" + address: vector-aggregator.observability.svc.cluster.local:6010 + inputs: ["kubernetes_source"] diff --git a/kubernetes/apps/observability/vector/app/aggregator/helmrelease.yaml b/kubernetes/apps/observability/vector/app/aggregator/helmrelease.yaml new file mode 100644 index 00000000..dcff6c09 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/aggregator/helmrelease.yaml @@ -0,0 +1,96 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2beta2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app vector-aggregator +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: app-template + version: 3.1.0 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + vector-aggregator: + replicas: 3 + strategy: RollingUpdate + annotations: + reloader.stakater.com/auto: "true" + initContainers: + init-geoip: + image: + repository: ghcr.io/maxmind/geoipupdate + tag: v7.0.1@sha256:80c57598a9ff552953e499cefc589cfe7b563d64262742ea42f2014251b557b0 + env: + GEOIPUPDATE_EDITION_IDS: GeoLite2-City + GEOIPUPDATE_FREQUENCY: "0" + GEOIPUPDATE_VERBOSE: "1" + envFrom: + - secretRef: + name: vector-aggregator-secret + containers: + app: + image: + repository: docker.io/timberio/vector + tag: 0.37.1-alpine@sha256:ced16088cdcfcadd2f471c5760ea3b08bec82b9be00a8b90173b9ade7d001607 + args: ["--config", "/etc/vector/vector.yaml"] + probes: + liveness: + enabled: true + readiness: + enabled: true + defaultPodOptions: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app.kubernetes.io/name: *app + service: + app: + controller: vector-aggregator + type: LoadBalancer + annotations: + external-dns.alpha.kubernetes.io/hostname: "vector.${PUBLIC_DOMAIN}" + io.cilium/lb-ipam-ips: "${LB_VECTOR_ADDR}" + ports: + http: + primary: true + port: 8686 + journald: + port: 6000 + kubernetes: + port: 6010 + vyos: + port: 6020 + persistence: + config: + type: configMap + name: vector-aggregator-configmap + globalMounts: + - path: /etc/vector/vector.yaml + subPath: vector.yaml + readOnly: true + data: + type: emptyDir + globalMounts: + - path: /vector-data-dir + geoip: + type: emptyDir + globalMounts: + - path: /usr/share/GeoIP diff --git a/kubernetes/apps/observability/vector/app/aggregator/kustomization.yaml b/kubernetes/apps/observability/vector/app/aggregator/kustomization.yaml new file mode 100644 index 00000000..ae1a5735 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/aggregator/kustomization.yaml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml +configMapGenerator: + - name: vector-aggregator-configmap + files: + - vector.yaml=./resources/vector.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/kubernetes/apps/observability/vector/app/aggregator/resources/vector.yaml b/kubernetes/apps/observability/vector/app/aggregator/resources/vector.yaml new file mode 100644 index 00000000..fa8793c4 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/aggregator/resources/vector.yaml @@ -0,0 +1,84 @@ +--- +data_dir: /vector-data-dir +api: + enabled: true + address: 0.0.0.0:8686 + +enrichment_tables: + geoip_table: + type: geoip + path: /usr/share/GeoIP/GeoLite2-City.mmdb + +# +# Sources +# + +sources: + journald_source: + type: vector + version: "2" + address: 0.0.0.0:6000 + + kubernetes_source: + type: vector + version: "2" + address: 0.0.0.0:6010 + + vyos_source: + type: syslog + address: 0.0.0.0:6020 + mode: tcp + +# +# Transforms +# + +transforms: + kubernetes_remap: + type: remap + inputs: ["kubernetes_source"] + source: | + # Standardize 'app' index + .custom_app_name = .pod_labels."app.kubernetes.io/name" || .pod_labels.app || .pod_labels."k8s-app" || "unknown" + # Drop pod_labels + del(.pod_labels) + +# +# Sinks +# + +sinks: + journald: + inputs: ["journald_source"] + type: loki + endpoint: http://loki-gateway.observability.svc.cluster.local + encoding: { codec: json } + out_of_order_action: accept + remove_label_fields: true + remove_timestamp: true + labels: + hostname: '{{ host }}' + + kubernetes: + inputs: ["kubernetes_remap"] + type: loki + endpoint: http://loki-gateway.observability.svc.cluster.local + encoding: { codec: json } + out_of_order_action: accept + remove_label_fields: true + remove_timestamp: true + labels: + app: '{{ custom_app_name }}' + namespace: '{{ kubernetes.pod_namespace }}' + node: '{{ kubernetes.pod_node_name }}' + + vyos: + inputs: ["vyos_source", "vyos_firewall_geoip_remap"] + type: loki + endpoint: http://loki-gateway.observability.svc.cluster.local + encoding: { codec: json } + out_of_order_action: accept + remove_label_fields: true + remove_timestamp: true + labels: + hostname: '{{ host }}' diff --git a/kubernetes/apps/observability/vector/app/aggregator/secret.sops.yaml b/kubernetes/apps/observability/vector/app/aggregator/secret.sops.yaml new file mode 100644 index 00000000..5cb39b02 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/aggregator/secret.sops.yaml @@ -0,0 +1,30 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: vector-aggregator-secret +stringData: + #ENC[AES256_GCM,data:dGKDvorcuOFAsUockelUdQ==,iv:Nc4szslExBsID+OIfor1uYJbK0WQumebtMbq9Oz1DCc=,tag:fHA5ahwgbyvZtWyepv4oWg==,type:comment] + GEOIPUPDATE_ACCOUNT_ID: ENC[AES256_GCM,data:kQKmNprZ,iv:uPGTDATxV6GUS6Lj48kU4No2gpFxXpBL1nDsgCXjr6A=,tag:HJW3p7bR5PMJLTeJglAcLA==,type:str] + GEOIPUPDATE_LICENSE_KEY: ENC[AES256_GCM,data:xpniZy/Haf5a3SeGAVLPQQ==,iv:xVcazU2D6eTjFSS7LptKjHQKuwbBKoGUGK34z9xZBL8=,tag:rtYbbSg2EUcLgdWM9nGJnA==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBRalJyWWlrcmpkQ1VPRnhw + QkV0Z1EvT280VEdpajhpTnFpZ25qMzJNT0ZVCjRYb3hkYXY5RXBZUEppTnJ3Z0hx + bG91V2RjN3hxcC9OU21OKzNmdnUyd3cKLS0tIFcyV1Ezc05acy8xaU9VVUdxcDV1 + RGpJUVNnVGQwMWk5dnlJMmFnSTVZZmcKVK2NJTks+ZoT/N+vC5PQid+S4YeYRQHG + mycaN9ZYrI8Eapt/066vvqhBsRDxWkTIDI36BfNLPc1hqyWLGZhkxw== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-01T23:15:41Z" + mac: ENC[AES256_GCM,data:Hbzo/NXBDEaGfHmPys/ipwBPgdFhqnNoCTkrNJWE4WyySodxC8jvoa7L7srDwLDr8jdTJYQe719EvpNMBBpBDXhhKYf5yQ8oBu/1hBPqG9b6lFDLBUMtPT0mHyJPjld1Fjp7fnNMHAvLqJ86XrI8MWk20Q3uH7oyJmtxhyHDNvo=,iv:cT6B8yG7X4bPoRaz2aPKxto1EWuUk+JbmCEFBonvyCE=,tag:u3ocdceb8ybhMeioifBcnA==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/observability/vector/app/kustomization.yaml b/kubernetes/apps/observability/vector/app/kustomization.yaml new file mode 100644 index 00000000..54568aa0 --- /dev/null +++ b/kubernetes/apps/observability/vector/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./agent + - ./aggregator diff --git a/kubernetes/apps/observability/vector/ks.yaml b/kubernetes/apps/observability/vector/ks.yaml new file mode 100644 index 00000000..6a82774c --- /dev/null +++ b/kubernetes/apps/observability/vector/ks.yaml @@ -0,0 +1,21 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app vector + namespace: flux-system +spec: + targetNamespace: observability + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/observability/vector/app + prune: true + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m diff --git a/kubernetes/apps/openebs-system/openebs/app/helmrelease.yaml b/kubernetes/apps/openebs-system/openebs/app/helmrelease.yaml index e7c1b45c..c64decd7 100644 --- a/kubernetes/apps/openebs-system/openebs/app/helmrelease.yaml +++ b/kubernetes/apps/openebs-system/openebs/app/helmrelease.yaml @@ -33,4 +33,4 @@ spec: enabled: true name: openebs-hostpath isDefaultClass: false - basePath: /var/openebs/local + basePath: "${CLUSTER_HOST_STORAGE_DIR}" diff --git a/kubernetes/apps/rook-ceph/kustomization.yaml b/kubernetes/apps/rook-ceph/kustomization.yaml new file mode 100644 index 00000000..488e24aa --- /dev/null +++ b/kubernetes/apps/rook-ceph/kustomization.yaml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: [] + # # Pre Flux-Kustomizations + # - ./namespace.yaml + # # Flux-Kustomizations + # - ./rook-ceph/ks.yaml diff --git a/kubernetes/apps/rook-ceph/namespace.yaml b/kubernetes/apps/rook-ceph/namespace.yaml new file mode 100644 index 00000000..4f4d74a8 --- /dev/null +++ b/kubernetes/apps/rook-ceph/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: rook-ceph + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/kubernetes/apps/rook-ceph/rook-ceph/app/helmrelease.yaml b/kubernetes/apps/rook-ceph/rook-ceph/app/helmrelease.yaml new file mode 100644 index 00000000..1a2b523a --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/app/helmrelease.yaml @@ -0,0 +1,38 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: rook-ceph-operator +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: rook-ceph + version: v1.14.2 + sourceRef: + kind: HelmRepository + name: rook-ceph + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + csi: + cephFSKernelMountOptions: ms_mode=prefer-crc + enableLiveness: true + serviceMonitor: + enabled: true + monitoring: + enabled: true + resources: + requests: + memory: 128Mi # unchangable + cpu: 100m # unchangable + limits: {} diff --git a/kubernetes/apps/rook-ceph/rook-ceph/app/kustomization.yaml b/kubernetes/apps/rook-ceph/rook-ceph/app/kustomization.yaml new file mode 100644 index 00000000..53bc9471 --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/app/kustomization.yaml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + # - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/kubernetes/apps/rook-ceph/rook-ceph/app/secret.sops.yaml b/kubernetes/apps/rook-ceph/rook-ceph/app/secret.sops.yaml new file mode 100644 index 00000000..c0d4b588 --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/app/secret.sops.yaml @@ -0,0 +1,28 @@ +# yamllint disable +kind: Secret +apiVersion: v1 +type: Opaque +metadata: + name: rook-ceph-dashboard +stringData: + ROOK_DASHBOARD_PASSWORD: ENC[AES256_GCM,data:N/WgHc1GjUqO5yZJ2MlXCUDbu1PBHmEFGIvaIBuK/Wk=,iv:9eCgK9RzFEYougNJIPHFWYYVGZiEVmxlb3two4Y0jpk=,tag:PZ1U9+GDrVtQcWwMFd9bNQ==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBKK1l2STBGc080Y0x6MkZx + KzNMVnR4UkRaK21BNG91alUvWlFqWS9lZTBZCjUrcG04bzFBYlRxbXRrM29sNzNY + eEdYWGVud0hRY2FHa1g2YXdYUXBqaE0KLS0tIE1yMk5Ub1kzd0dqUHNOd0xoZkN3 + amxPUzBUVFRrMENqZnAvWG03eDlHS3cKpMRKGXaysTRcEmYClgGlv1vU2tn3cvJe + nu5d8ebc1/cLtJbeX2qf3BIeF5UP2TryTCu3rLXN4To3WhM7wqd1Rg== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2024-05-01T16:55:24Z" + mac: ENC[AES256_GCM,data:pQQsuwLPegw6vVqxZ+YU1Ctu+l972auEAMvxIRklXBWEBQqQ6hmzo3GAfbno08pF0+3Vonl08UnTAgrTefAgcbxEz7fs9PdjKl9yYOJ4MwL/65pkVvGUMCmIz0E/Vh7IiOP51wD+dBTHC85Pm9lwkdTzI+Xvb36wiPFL/0bLkUE=,iv:sSlHdTZBGlJv6C4FtOa7hiLutpohTHXaPFZsulv+8ZA=,tag:Hf7q16WB0IP6fwh9oWUIGg==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.8.1 diff --git a/kubernetes/apps/rook-ceph/rook-ceph/cluster/helmrelease.yaml b/kubernetes/apps/rook-ceph/rook-ceph/cluster/helmrelease.yaml new file mode 100644 index 00000000..b5cf6d34 --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/cluster/helmrelease.yaml @@ -0,0 +1,219 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/helm.toolkit.fluxcd.io/helmrelease_v2beta2.json +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: rook-ceph-cluster +spec: + interval: 30m + timeout: 15m + chart: + spec: + chart: rook-ceph-cluster + version: v1.14.2 + sourceRef: + kind: HelmRepository + name: rook-ceph + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + dependsOn: + - name: rook-ceph-operator + namespace: rook-ceph + values: + monitoring: + enabled: true + createPrometheusRules: true + ingress: + dashboard: + ingressClassName: internal + host: + name: rook.devbu.io + path: / + toolbox: + enabled: true + configOverride: | + [global] + bdev_enable_discard = true + bdev_async_discard = true + osd_class_update_on_start = false + cephClusterSpec: + crashCollector: + disable: false + dashboard: + enabled: true + urlPrefix: / + ssl: false + prometheusEndpoint: http://thanos-query-frontend.observability.svc.cluster.local:10902 + mgr: + modules: + - name: pg_autoscaler + enabled: true + network: + provider: host + connections: + requireMsgr2: true + storage: + useAllNodes: true + useAllDevices: true + deviceFilter: nvme0n1 + config: + osdsPerDevice: "1" + placement: + mgr: &placement + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + mon: *placement + resources: + mgr: + requests: + cpu: 100m + memory: 512Mi + limits: + memory: 2Gi + mon: + requests: + cpu: 50m + memory: 512Mi + limits: + memory: 1Gi + osd: + requests: + cpu: 500m + memory: 2Gi + limits: + memory: 6Gi + mgr-sidecar: + requests: + cpu: 50m + memory: 128Mi + limits: + memory: 256Mi + cephBlockPools: + - name: ceph-blockpool + spec: + failureDomain: host + replicated: + size: 3 + storageClass: + enabled: true + name: ceph-block + isDefault: true + reclaimPolicy: Delete + allowVolumeExpansion: true + volumeBindingMode: Immediate + parameters: + imageFormat: "2" + imageFeatures: layering + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + csi.storage.k8s.io/fstype: ext4 + cephBlockPoolsVolumeSnapshotClass: + enabled: true + name: csi-ceph-blockpool + isDefault: false + deletionPolicy: Delete + cephFileSystems: + - name: &cephFileSystemName ceph-filesystem + spec: + metadataPool: + replicated: + size: 3 + dataPools: + - failureDomain: host + replicated: + size: 3 + name: data0 + metadataServer: + activeCount: 1 + activeStandby: true + priorityClassName: system-cluster-critical + placement: + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + app.kubernetes.io/name: ceph-mds + app.kubernetes.io/part-of: *cephFileSystemName + resources: + requests: + cpu: 100m + memory: 1Gi + limits: + memory: 4Gi + storageClass: + enabled: true + isDefault: false + name: ceph-filesystem + pool: data0 + reclaimPolicy: Delete + allowVolumeExpansion: true + volumeBindingMode: Immediate + parameters: + csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + csi.storage.k8s.io/fstype: ext4 + cephFileSystemVolumeSnapshotClass: + enabled: true + name: csi-ceph-filesystem + isDefault: false + deletionPolicy: Delete + cephObjectStores: + - name: ceph-objectstore + spec: + metadataPool: + failureDomain: host + replicated: + size: 3 + dataPool: + failureDomain: host + erasureCoded: + dataChunks: 2 + codingChunks: 1 + preservePoolsOnDelete: true + gateway: + port: 80 + resources: + requests: + cpu: 100m + memory: 1Gi + limits: + memory: 2Gi + instances: 2 + priorityClassName: system-cluster-critical + healthCheck: + bucket: + interval: 60s + storageClass: + enabled: true + name: ceph-bucket + reclaimPolicy: Delete + volumeBindingMode: Immediate + parameters: + region: us-east-1 + ingress: + enabled: true + ingressClassName: internal + host: + name: "rgw.${PUBLIC_DOMAIN}" + path: / diff --git a/kubernetes/apps/rook-ceph/rook-ceph/cluster/kustomization.yaml b/kubernetes/apps/rook-ceph/rook-ceph/cluster/kustomization.yaml new file mode 100644 index 00000000..17cbc72b --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/cluster/kustomization.yaml @@ -0,0 +1,6 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml diff --git a/kubernetes/apps/rook-ceph/rook-ceph/ks.yaml b/kubernetes/apps/rook-ceph/rook-ceph/ks.yaml new file mode 100644 index 00000000..8598b017 --- /dev/null +++ b/kubernetes/apps/rook-ceph/rook-ceph/ks.yaml @@ -0,0 +1,42 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app rook-ceph + namespace: flux-system +spec: + targetNamespace: rook-ceph + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/rook-ceph/rook-ceph/app + prune: false + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 5m +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/kustomize.toolkit.fluxcd.io/kustomization_v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app rook-ceph-cluster + namespace: flux-system +spec: + targetNamespace: rook-ceph + commonMetadata: + labels: + app.kubernetes.io/name: *app + path: ./kubernetes/apps/rook-ceph/rook-ceph/cluster + prune: false + sourceRef: + kind: GitRepository + name: home-kubernetes + wait: false + interval: 30m + retryInterval: 1m + timeout: 15m diff --git a/kubernetes/apps/sites/landing/app/helmrelease.yaml b/kubernetes/apps/sites/landing/app/helmrelease.yaml index e4bc61f0..b7fef34f 100644 --- a/kubernetes/apps/sites/landing/app/helmrelease.yaml +++ b/kubernetes/apps/sites/landing/app/helmrelease.yaml @@ -3,7 +3,7 @@ apiVersion: helm.toolkit.fluxcd.io/v2beta2 kind: HelmRelease metadata: - name: &app landing + name: landing spec: interval: 30m chart: @@ -34,6 +34,29 @@ spec: tag: alpine env: TZ: "${TIMEZONE}" + probes: + liveness: &probes + enabled: true + custom: true + spec: + httpGet: + path: / + port: &port 80 + initialDelaySeconds: 0 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + readiness: *probes + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + defaultPodOptions: + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + seccompProfile: { type: RuntimeDefault } service: app: controller: landing diff --git a/kubernetes/apps/sites/landing/app/kustomization.yaml b/kubernetes/apps/sites/landing/app/kustomization.yaml index 01533301..57bbd5ea 100644 --- a/kubernetes/apps/sites/landing/app/kustomization.yaml +++ b/kubernetes/apps/sites/landing/app/kustomization.yaml @@ -6,7 +6,7 @@ resources: configMapGenerator: - name: landing-configmap files: - - ./site/index.html - - ./site/favicon.ico + - ./resources/index.html + - ./resources/favicon.ico generatorOptions: disableNameSuffixHash: true diff --git a/kubernetes/apps/sites/landing/app/site/favicon.ico b/kubernetes/apps/sites/landing/app/resources/favicon.ico similarity index 100% rename from kubernetes/apps/sites/landing/app/site/favicon.ico rename to kubernetes/apps/sites/landing/app/resources/favicon.ico diff --git a/kubernetes/apps/sites/landing/app/site/index.html b/kubernetes/apps/sites/landing/app/resources/index.html similarity index 100% rename from kubernetes/apps/sites/landing/app/site/index.html rename to kubernetes/apps/sites/landing/app/resources/index.html diff --git a/kubernetes/flux/repositories/git/kustomization.yaml b/kubernetes/flux/repositories/git/kustomization.yaml index bb78ecf0..fe0f332a 100644 --- a/kubernetes/flux/repositories/git/kustomization.yaml +++ b/kubernetes/flux/repositories/git/kustomization.yaml @@ -1,5 +1,4 @@ --- apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -resources: - - ./local-path-provisioner.yaml +resources: [] diff --git a/kubernetes/flux/repositories/git/local-path-provisioner.yaml b/kubernetes/flux/repositories/git/local-path-provisioner.yaml deleted file mode 100644 index 669fb053..00000000 --- a/kubernetes/flux/repositories/git/local-path-provisioner.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -apiVersion: source.toolkit.fluxcd.io/v1 -kind: GitRepository -metadata: - name: local-path-provisioner - namespace: flux-system -spec: - interval: 30m - url: https://github.com/rancher/local-path-provisioner - ref: - tag: v0.0.24 - ignore: | - # exclude all - /* - # include kubernetes directory - !/deploy/chart/local-path-provisioner diff --git a/kubernetes/flux/repositories/helm/bjw-s.yaml b/kubernetes/flux/repositories/helm/bjw-s.yaml index df0c6474..a3765390 100644 --- a/kubernetes/flux/repositories/helm/bjw-s.yaml +++ b/kubernetes/flux/repositories/helm/bjw-s.yaml @@ -1,4 +1,5 @@ --- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/source.toolkit.fluxcd.io/helmrepository_v1beta2.json apiVersion: source.toolkit.fluxcd.io/v1beta2 kind: HelmRepository metadata: diff --git a/kubernetes/flux/repositories/helm/cloudnative-pg.yaml b/kubernetes/flux/repositories/helm/cloudnative-pg.yaml new file mode 100644 index 00000000..b7076f1d --- /dev/null +++ b/kubernetes/flux/repositories/helm/cloudnative-pg.yaml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/source.toolkit.fluxcd.io/helmrepository_v1beta2.json +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: cloudnative-pg + namespace: flux-system +spec: + interval: 2h + url: https://cloudnative-pg.github.io/charts diff --git a/kubernetes/flux/repositories/helm/kustomization.yaml b/kubernetes/flux/repositories/helm/kustomization.yaml index f805a068..53e9681e 100644 --- a/kubernetes/flux/repositories/helm/kustomization.yaml +++ b/kubernetes/flux/repositories/helm/kustomization.yaml @@ -5,6 +5,7 @@ resources: - ./bjw-s.yaml - ./brettinternet.yaml - ./cilium.yaml + - ./cloudnative-pg.yaml - ./descheduler.yaml - ./external-dns.yaml - ./grafana.yaml @@ -16,4 +17,5 @@ resources: - ./metrics-server.yaml - ./openebs.yaml - ./prometheus-community.yaml + - ./rook-ceph.yaml - ./stakater.yaml diff --git a/kubernetes/flux/repositories/helm/rook-ceph.yaml b/kubernetes/flux/repositories/helm/rook-ceph.yaml new file mode 100644 index 00000000..fbe61780 --- /dev/null +++ b/kubernetes/flux/repositories/helm/rook-ceph.yaml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/source.toolkit.fluxcd.io/helmrepository_v1beta2.json +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: HelmRepository +metadata: + name: rook-ceph + namespace: flux-system +spec: + interval: 2h + url: https://charts.rook.io/release diff --git a/kubernetes/flux/vars/cluster-secrets.sops.yaml b/kubernetes/flux/vars/cluster-secrets.sops.yaml index 4261822d..803275b8 100644 --- a/kubernetes/flux/vars/cluster-secrets.sops.yaml +++ b/kubernetes/flux/vars/cluster-secrets.sops.yaml @@ -5,13 +5,14 @@ metadata: name: cluster-secrets namespace: flux-system stringData: - PUBLIC_DOMAIN: ENC[AES256_GCM,data:zC5oOL7Z/HnCiBARhSjXwb0=,iv:XrE7dnl5osta7aZUKVt8Ax8ix5BL+yZ3rO8OSoRnnM8=,tag:81fxToq3/TqiVNTn1mnH6w==,type:str] - CLOUDFLARE_EMAIL: ENC[AES256_GCM,data:/EC3G5kYMDjqggHp9ZkXH+ofmC7Hm8Q=,iv:V2UqdDZsi4qqL4q/+5kQHfjuD4BffczWEKoKT3AKK3Q=,tag:zgSg8OEDYq/AMc3lD/47jA==,type:str] - CLOUDFLARE_TUNNEL_ID: ENC[AES256_GCM,data:5ox/Byqc0lIbFKvO2keUXKi1KL0aEo+1bAUMq4wLbJo17Y+d,iv:1lI20TAs0BGqAQWrTQWD67LK29gMBqManzBAuq6+vfI=,tag:a5wjRYNwxGRytvjqgzvJkQ==,type:str] - SMTP_USER: ENC[AES256_GCM,data:boTe8GD3ykMJcv8mR/v1WvwG0oQPEv4=,iv:XPekuqRBlOm/JIY0peOURWMQm6KXfhKYaLT/i3bYjPY=,tag:o4TEG4iQWYR9cCIC95s/Ow==,type:str] - NOTIFY_EMAIL: ENC[AES256_GCM,data:GqDxBZm9WBScfQTffDOye9mJ/1I=,iv:OzMGKSQJXAd0fiBnfO9iMvurxXonon683lGw98WUZoY=,tag:OFNWNRRPDOfKwQzTV2Uopg==,type:str] - GRAFANA_OAUTH_CLIENT_SECRET: ENC[AES256_GCM,data:Ohj3TMH3FZRAcN8oLKrWRdTTSqo9ewwaIBCWoAvv3VA=,iv:/ZbcdDjS5We3oF/I96tB0sNWhN4GtYE4dgm8IZi2xxg=,tag:xVaj7BpXHTGSTq9LGNDN8w==,type:str] - THANOS_MINIO_OAUTH_CLIENT_SECRET: ENC[AES256_GCM,data:D7P2hl7sjt7Ytt5YexDFkTrbIDWMiWWjvXxeFxeb1qE=,iv:OG8fVnjiimijWb4jsBbkRVZgYZlLGrxF1sIq73QB/Cw=,tag:f8U0elf+4YUhxRRlbUnC0w==,type:str] + PUBLIC_DOMAIN: ENC[AES256_GCM,data:E4m4Ed7ymrnKU+55FIeKBqE=,iv:JAYELFgroBUnitIYUoXvZqzBT+HwstbkbVcvG7z/qp8=,tag:hdhhVCT88jh+NZ1gE9qc2A==,type:str] + CLOUDFLARE_EMAIL: ENC[AES256_GCM,data:G4yBZdcOXdUBxnyUYXgX8Za+BMPVVK4=,iv:BigIpDR+/61B/GQCohH83KNImjeismZm0KhB83QK5xk=,tag:8xADK7W7mmcDBp5Vs06naw==,type:str] + CLOUDFLARE_TUNNEL_ID: ENC[AES256_GCM,data:47GYn6M9pUM1/YHNogyGubSuf2sCPr8NMtrPbeG+S6Y6KH9S,iv:tbZ2B/tZ4ATLJqkQpyxSHFvdIpF1X/EqFvN4b+VBOuc=,tag:yuOZ4rz0mzOTWojkRgvSug==,type:str] + SMTP_USER: ENC[AES256_GCM,data:xJwVmOX32rxSMaFiIqQVJV+TjnS6ooU=,iv:otnsDjx02Tc7dk6FEFKNJ2JVgN6Nr8iEfgKHm9OcDJI=,tag:W2A8qD0ttX3Dgg0YCmD1Cw==,type:str] + NOTIFY_EMAIL: ENC[AES256_GCM,data:QPG5TsHryDtPlh+8brauG7c7wAM=,iv:qaxGBUEcMQVOe5VmLGHfXY9r8wKGMZXwRQCHFLpa7o4=,tag:KXcGIOSYYYdFgHqiWs/SJQ==,type:str] + GRAFANA_OAUTH_CLIENT_SECRET: ENC[AES256_GCM,data:fAdmEN5wGwxGOfgEgiqQ01jw1fsbHi8v+mXrjPXjLRI=,iv:/YxgxawjI9S/DevovTR+9czS4q0ba0ZE4bcJFNyU9WM=,tag:Zht9cd4LwQM6HAetnU9m5A==,type:str] + THANOS_MINIO_OAUTH_CLIENT_SECRET: ENC[AES256_GCM,data:PjD2krpwUSDQcr/yPE2hhcDJk4PtAee+6zX8xqRGr8w=,iv:d+RUkrmZoxBKbwfjzdbR6OnMkttISx6L5UsIv8tJHOc=,tag:OLpqJlRh0VPfCz+x1d4BnA==,type:str] + POSTGRES_SUPER_PASSWORD: ENC[AES256_GCM,data:ml7ftY62RjQvDxCVg7yqLcX7s1V0Qyx+mClkt0aoqH8=,iv:Q4MqleiBlgF9Wqbm8fE5rLdGp96Cn4yRrcUz189hFsE=,tag:hAQiW4Xkd5zAB4jQoUBrLQ==,type:str] sops: kms: [] gcp_kms: [] @@ -21,14 +22,14 @@ sops: - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs enc: | -----BEGIN AGE ENCRYPTED FILE----- - YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSB2RjFYbS85VXcwdndndHNP - U3IwSGV4Z3BPMjM3VnF0Z3dpYWU4NUQ1UVRjClRvQnAvV3dBMG9SNkhMWWhZMC9S - QU5XbHFkL1h4QVpacXJrL0NuSkNrQTQKLS0tIHRXcGUyWk5tbmt4blNXdEdsREpr - ZHdTRW9rdTR5NWUvN0hwQVJWNE5ZUHMKrrwIoC2u8ifnbSKceEvXNX1ktlAggMmb - QYrfm8TawcSEqhHfSlN9VcUtusT/jo6dm5Pb2zoHB8NRDOsz/QcyWw== + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBYd2w2SnJxR2I3bDV1U1NR + VVFrM254WHJTbGd5K0lIR0MzMVJGbUF0SmhRCkUwSnROTW9CcWNPODN4aFVkektN + K083VGtTaFkrNFZXQ1JpYlpVcmhRZncKLS0tIHJ6OVN6RHlqb1VKUGljOUg2M0M3 + dEkvdHNiKzdRMFpXaXF2K1N2ZWtaejgKpq5hOgyrzFy80XzWkPL818bJNuMPAXPd + LqLGfVphAFBhG4U3JcW6ENajN3x2YEdkAo4GJsZJPbNZdNhEHmCrHw== -----END AGE ENCRYPTED FILE----- - lastmodified: "2024-04-29T05:46:14Z" - mac: ENC[AES256_GCM,data:6gnXkVqrX4/tb+EP48rZQqETDekiZaXbzgusExF+Vm+gwcVgoJI6GuavLpo4Q2FJdQk8D1HFBYaSlZE9AxgrcIfnHPVKq6bA3Ymet2QqiTp6kzl78VO9prTW+Gp0pUDFkL4G6EHrtQbmYA71glmj8cKT83pCSP2zfuArbjIUs18=,iv:0R+ZQ6QY38BPgtQamEyQSbYC2g88jchiuvSQZZw2uas=,tag:+RkXWsM3p7MpUGWo06pVqw==,type:str] + lastmodified: "2024-05-02T05:26:54Z" + mac: ENC[AES256_GCM,data:yZS0iP69g8YX1FcXtKF7YA4VoJTesHVsw9O6fVF+w8V+rMdGUoUb9SCO47l2N2M0KeS475mPZJouLChfUsosoJBCyEioVfHWN1DJT4jllRVLgIA3n9iXifIXQcjMTXhb42P/3nQHKWhB6R4nMzJpGTjADEQDHPNJgtd8hcWQNFY=,iv:rsdm13qNRVjHYHl0VnQT+mqPYYwEE03zd7mdQSXDX8I=,tag:SVADH4AFJKro3EtL0jSUog==,type:str] pgp: [] encrypted_regex: ^(data|stringData)$ version: 3.8.1 diff --git a/kubernetes/flux/vars/cluster-settings.yaml b/kubernetes/flux/vars/cluster-settings.yaml index c053a73c..694167c6 100644 --- a/kubernetes/flux/vars/cluster-settings.yaml +++ b/kubernetes/flux/vars/cluster-settings.yaml @@ -5,10 +5,11 @@ metadata: name: cluster-settings namespace: flux-system data: - CLUSTER_NAME: cluster TIMEZONE: America/Denver + CLUSTER_HOST_STORAGE_DIR: /cluster CLUSTER_CIDR: "10.69.0.0/16" NODE_CIDR: "10.1.2.0/24" + HOME_CIDR: "10.1.1.0/24" # Value also in `provision/ansible/inventory/group_vars/kubernetes/kube-vip.yaml` KUBE_VIP_ADDR: 10.1.2.200 @@ -18,6 +19,8 @@ data: LB_K8S_GATEWAY_ADDR: 10.1.2.203 LB_DNS_ADDR: 10.1.2.204 LB_MADDY_ADDR: 10.1.2.205 + LB_POSTGRES_ADDR: 10.1.2.206 + LB_VECTOR_ADDR: 10.1.2.207 UPSTREAM_DNS_ADDR: 10.1.0.5:53 # fallback/alternate diff --git a/kubernetes/templates/gatus/external/configmap.yaml b/kubernetes/templates/gatus/external/configmap.yaml new file mode 100644 index 00000000..c81629c2 --- /dev/null +++ b/kubernetes/templates/gatus/external/configmap.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "${APP}-gatus-ep" + labels: + gatus.io/enabled: "true" +data: + config.yaml: | + endpoints: + - name: "${APP}" + group: external + url: "https://${GATUS_SUBDOMAIN:-${APP}}.devbu.io${GATUS_PATH:-/}" + interval: 1m + client: + dns-resolver: tcp://1.1.1.1:53 + conditions: + - "[STATUS] == ${GATUS_STATUS:-200}" + alerts: + - type: pushover diff --git a/kubernetes/templates/gatus/external/kustomization.yaml b/kubernetes/templates/gatus/external/kustomization.yaml new file mode 100644 index 00000000..e09060b9 --- /dev/null +++ b/kubernetes/templates/gatus/external/kustomization.yaml @@ -0,0 +1,6 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./configmap.yaml diff --git a/kubernetes/templates/gatus/guarded/configmap.yaml b/kubernetes/templates/gatus/guarded/configmap.yaml new file mode 100644 index 00000000..02c861fe --- /dev/null +++ b/kubernetes/templates/gatus/guarded/configmap.yaml @@ -0,0 +1,24 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "${APP}-gatus-ep" + labels: + gatus.io/enabled: "true" +data: + config.yaml: | + endpoints: + - name: "${APP}" + group: guarded + url: 1.1.1.1 + interval: 1m + ui: + hide-hostname: true + hide-url: true + dns: + query-name: "${GATUS_SUBDOMAIN:-${APP}}.devbu.io" + query-type: A + conditions: + - "len([BODY]) == 0" + alerts: + - type: pushover diff --git a/kubernetes/templates/gatus/guarded/kustomization.yaml b/kubernetes/templates/gatus/guarded/kustomization.yaml new file mode 100644 index 00000000..e09060b9 --- /dev/null +++ b/kubernetes/templates/gatus/guarded/kustomization.yaml @@ -0,0 +1,6 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./configmap.yaml diff --git a/provision/ansible/playbooks/destroy.yaml b/provision/ansible/playbooks/destroy.yaml index 2d5ff334..77b56ec6 100644 --- a/provision/ansible/playbooks/destroy.yaml +++ b/provision/ansible/playbooks/destroy.yaml @@ -2,6 +2,8 @@ - name: Uninstall kubernetes hosts: kubernetes become: true + vars: + allow_storage_cleanup: false vars_prompt: - name: uninstall_confirmation prompt: |- @@ -138,10 +140,18 @@ state: absent loop: ["kubectl", "crictl", "ctr"] - - name: Remove local storage path - ansible.builtin.file: - path: /var/openebs/local - state: absent + - name: Cleanup storage + when: allow_storage_cleanup + block: + - name: Remove local openebs storage path + ansible.builtin.file: + path: /var/openebs/local + state: absent + + - name: Remove local custom storage path + ansible.builtin.file: + path: /cluster + state: absent - name: Reboot ansible.builtin.reboot: diff --git a/wip/auth/authelia/app/configuration.yaml b/wip/auth/authelia/app/configuration.yaml new file mode 100644 index 00000000..23e3efa3 --- /dev/null +++ b/wip/auth/authelia/app/configuration.yaml @@ -0,0 +1,135 @@ +# https://github.com/authelia/authelia/blob/master/config.template.yml +--- +theme: light + +default_redirection_url: "https://auth.${PUBLIC_DOMAIN}" + +log: + # info, debug, trace + level: trace + +server: + port: 80 + +session: + domain: "${PUBLIC_DOMAIN}" + redis: + host: authelia-redis.auth.svc.cluster.local + port: 6379 + +storage: + postgres: + username: authelia + host: authelia-postgres.auth.svc.cluster.local + database: authelia + port: 5432 + +notifier: + # filesystem: + # filename: /config/notification.txt + smtp: + host: maddy.comms.svc.cluster.local + port: 2525 + sender: "Authelia <${SMTP_USER}>" + disable_require_tls: true + +# With LLDAP: https://github.com/nitnelave/lldap/blob/main/example_configs/authelia_config.yml +authentication_backend: + password_reset: + disable: false + refresh_interval: 1m + ldap: + implementation: custom + url: ldap://lldap.auth.svc.cluster.local:3890 + timeout: 5s + start_tls: false + base_dn: dc=home,dc=arpa + username_attribute: uid + additional_users_dn: ou=people + users_filter: "(&({username_attribute}={input})(objectClass=person))" + additional_groups_dn: ou=groups + groups_filter: (member={dn}) + group_name_attribute: cn + display_name_attribute: displayName + mail_attribute: mail + user: cn=admin,ou=people,dc=home,dc=arpa + +duo_api: + disable: true + +webauthn: + disable: true + +# https://www.authelia.com/configuration/security/access-control/ +access_control: + # default_policy: two_factor + default_policy: one_factor + networks: + - name: internal + networks: ["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"] + rules: + - domain: "grafana.${PUBLIC_DOMAIN}" + policy: bypass + - domain: + - "radarr.${PUBLIC_DOMAIN}" + - "sonarr.${PUBLIC_DOMAIN}" + - "lidarr.${PUBLIC_DOMAIN}" + - "readarr.${PUBLIC_DOMAIN}" + - "prowlarr.${PUBLIC_DOMAIN}" + - "nzbget.${PUBLIC_DOMAIN}" + policy: bypass + resources: ["^/api([?/].*)?$"] + # - domain: "search.${PUBLIC_DOMAIN}" + # policy: deny + # - domain: "search.${PUBLIC_DOMAIN}" + # policy: one_factor + # subject: + # - ['group:home'] + + +identity_providers: + oidc: + # openssl genrsa -out oidc.key 4096 + cors: + endpoints: ["authorization", "token", "revocation", "introspection"] + allowed_origins: + - https://${PUBLIC_DOMAIN} + # allowed_origins_from_client_redirect_uris: true + clients: + - id: grafana + description: Grafana + secret: "${GRAFANA_OAUTH_CLIENT_SECRET}" + public: false + authorization_policy: one_factor + pre_configured_consent_duration: 1y + scopes: ["openid", "profile", "groups", "email"] + redirect_uris: ["https://grafana.${PUBLIC_DOMAIN}/login/generic_oauth"] + userinfo_signing_algorithm: none + - id: miniflux + description: Miniflux + secret: "${MINIFLUX_OAUTH_CLIENT_SECRET}" + public: false + authorization_policy: one_factor + pre_configured_consent_duration: 1y + scopes: ["openid", "profile", "groups", "email"] + redirect_uris: ["https://rss.${PUBLIC_DOMAIN}/oauth2/oidc/callback"] + userinfo_signing_algorithm: none + - id: wikijs + description: Wiki.js + secret: "${WIKIJS_OAUTH_CLIENT_SECRET}" + public: false + authorization_policy: one_factor + pre_configured_consent_duration: 1y + scopes: ["openid", "profile", "groups", "email"] + redirect_uris: ["https://rss.${PUBLIC_DOMAIN}/login/f3fec479-555f-4594-bdee-b777b6951c52/callback"] + userinfo_signing_algorithm: none + response_modes: [form_post] + - id: thanos-minio + description: Thanos Minio S3 + secret: "${THANOS_MINIO_OAUTH_CLIENT_SECRET}" + public: false + authorization_policy: one_factor + pre_configured_consent_duration: 1y + scopes: ["openid", "profile", "email"] + redirect_uris: ["https://thanos-s3.${PUBLIC_DOMAIN}/oauth_callback"] + userinfo_signing_algorithm: none diff --git a/wip/auth/authelia/app/helm-release.yaml b/wip/auth/authelia/app/helm-release.yaml new file mode 100644 index 00000000..cb25ccce --- /dev/null +++ b/wip/auth/authelia/app/helm-release.yaml @@ -0,0 +1,280 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app authelia + namespace: auth +spec: + interval: 15m + chart: + spec: + # official Authelia chart is still in beta + # https://github.com/authelia/chartrepo + chart: app-template + version: 1.5.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + values: + controller: + replicas: 1 + strategy: RollingUpdate + image: + repository: ghcr.io/authelia/authelia + tag: 4.36.6 + # https://www.authelia.com/configuration/methods/secrets/ + env: + AUTHELIA_JWT_SECRET_FILE: /app/secrets/JWT_SECRET + AUTHELIA_AUTHENTICATION_BACKEND_LDAP_PASSWORD_FILE: /app/secrets/LDAP_PASSWORD + AUTHELIA_IDENTITY_PROVIDERS_OIDC_HMAC_SECRET_FILE: /app/secrets/OIDC_HMAC_SECRET + AUTHELIA_IDENTITY_PROVIDERS_OIDC_ISSUER_PRIVATE_KEY_FILE: /app/secrets/OIDC_ISSUER_PRIVATE_KEY + AUTHELIA_SESSION_REDIS_PASSWORD_FILE: /app/secrets/REDIS_PASSWORD + AUTHELIA_SESSION_SECRET_FILE: /app/secrets/SESSION_SECRET + AUTHELIA_STORAGE_ENCRYPTION_KEY_FILE: /app/secrets/STORAGE_ENCRYPTION_KEY + AUTHELIA_STORAGE_POSTGRES_PASSWORD_FILE: /app/secrets/POSTGRES_PASSWORD + # AUTHELIA_NOTIFIER_SMTP_PASSWORD_FILE: /app/secrets/SMTP_PASSWORD + persistence: + # Base configuration + configuration: + enabled: true + type: configMap + name: authelia-config + subPath: configuration.yaml + mountPath: /config/configuration.yml + readOnly: true + # Not pretty, but recommended: + # https://github.com/authelia/authelia/blob/352b360a50cf32897649315cd1907e5f44f2cb73/docs/content/en/integration/kubernetes/secrets.md + postgres-password: + enabled: true + type: secret + name: authelia + subPath: POSTGRES_PASSWORD + mountPath: /app/secrets/POSTGRES_PASSWORD + readOnly: true + redis-password: + enabled: true + type: secret + name: authelia + subPath: REDIS_PASSWORD + mountPath: /app/secrets/REDIS_PASSWORD + readOnly: true + jwt-secret: + enabled: true + type: secret + name: authelia + subPath: JWT_SECRET + mountPath: /app/secrets/JWT_SECRET + readOnly: true + session-secret: + enabled: true + type: secret + name: authelia + subPath: SESSION_SECRET + mountPath: /app/secrets/SESSION_SECRET + readOnly: true + storage-encryption-key: + enabled: true + type: secret + name: authelia + subPath: STORAGE_ENCRYPTION_KEY + mountPath: /app/secrets/STORAGE_ENCRYPTION_KEY + readOnly: true + ldap-password: + enabled: true + type: secret + name: authelia + subPath: LDAP_PASSWORD + mountPath: /app/secrets/LDAP_PASSWORD + readOnly: true + oidc-hmac-secret: + enabled: true + type: secret + name: authelia + subPath: OIDC_HMAC_SECRET + mountPath: /app/secrets/OIDC_HMAC_SECRET + readOnly: true + oidc-issuer-private-key: + enabled: true + type: secret + name: authelia + subPath: OIDC_ISSUER_PRIVATE_KEY + mountPath: /app/secrets/OIDC_ISSUER_PRIVATE_KEY + readOnly: true + enableServiceLinks: false + service: + main: + ports: + http: + port: 80 + metrics: + enabled: false + port: 8080 + # Discovery endpoint https://www.authelia.com/integration/openid-connect/introduction/#well-known-discovery-endpoints + ingress: + main: + enabled: true + ingressClassName: nginx + annotations: + external-dns.home.arpa/enabled: "true" + nginx.ingress.kubernetes.io/configuration-snippet: | + add_header Cache-Control "no-store"; + add_header Pragma "no-cache"; + add_header X-Frame-Options "SAMEORIGIN"; + add_header X-XSS-Protection "1; mode=block"; + hosts: + - host: &host "auth.${PUBLIC_DOMAIN}" + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - *host + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + podAnnotations: + configmap.reloader.stakater.com/reload: authelia-config + secret.reloader.stakater.com/reload: *app + # topologySpreadConstraints: + # - maxSkew: 1 + # topologyKey: kubernetes.io/hostname + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app.kubernetes.io/name: *app + # resources: + # requests: + # cpu: 5m + # memory: 10Mi + # limits: + # memory: 100Mi + +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: authelia-postgres + namespace: auth +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 1.5.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + createNamespace: true + remediation: + retries: 5 + upgrade: + remediation: + retries: 3 + values: + image: + repository: library/postgres + tag: "14" + env: + - name: POSTGRES_USER + value: authelia + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: authelia + key: POSTGRES_PASSWORD + - name: POSTGRES_DB + value: authelia + - name: PGDATA + value: /var/lib/postgresql/data + podSecurityContext: + runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" + runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" + service: + main: + enabled: true + ports: + http: + enabled: false + primary: false + sql: + enabled: true + primary: true + port: 5432 + protocol: TCP + persistence: + data: + enabled: true + mountPath: /var/lib/postgresql/data + existingClaim: appdata + subPath: authelia_db + +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: authelia-redis + namespace: auth +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 1.5.1 + interval: 15m + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + values: + controller: + strategy: RollingUpdate + image: + repository: docker.io/library/redis + tag: 7.0.4 + command: ["redis-server", "--requirepass", "$(REDIS_PASSWORD)"] + env: + REDIS_REPLICATION_MODE: master + envFrom: + - secretRef: + name: authelia + persistence: + data: + enabled: true + path: /data + existingClaim: appdata + subPath: authelia_redis + service: + main: + ports: + http: + enabled: false + primary: false + redis: + enabled: true + primary: true + port: 6379 + # resources: + # requests: + # cpu: 15m + # memory: 64M + # limits: + # memory: 128M + podAnnotations: + secret.reloader.stakater.com/reload: authelia diff --git a/wip/auth/authelia/app/kustomization.yaml b/wip/auth/authelia/app/kustomization.yaml new file mode 100644 index 00000000..7753fe81 --- /dev/null +++ b/wip/auth/authelia/app/kustomization.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: auth +resources: + - secret.sops.yaml + - helmrelease.yaml +configMapGenerator: + - name: authelia-config + files: + - configuration.yaml +generatorOptions: + disableNameSuffixHash: true diff --git a/wip/auth/authelia/app/secret.sops.yaml b/wip/auth/authelia/app/secret.sops.yaml new file mode 100644 index 00000000..0152ec3e --- /dev/null +++ b/wip/auth/authelia/app/secret.sops.yaml @@ -0,0 +1,37 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: authelia + namespace: auth +type: Opaque +stringData: + POSTGRES_PASSWORD: ENC[AES256_GCM,data:LhOnhTmaP2qu+XhWkY71BJJ2LlyMfiY37WdI7XmiCmA=,iv:JpsuXL/MfT15FWVzg5iZe/Cnj3LXzrmeAlP2dILrHVY=,tag:kHAQj9YWtThZX6ymCOG3Dw==,type:str] + REDIS_PASSWORD: ENC[AES256_GCM,data:2lyalttHDvboWT2ffmMZhl5H7FuJd1PEsqJvsrer4m4=,iv:FlIVfuf1XZHiETHpLofoUptm7QreDIAT+ivMHc4xU3k=,tag:GV1mAucSK9ZI5ygRCaRp8g==,type:str] + JWT_SECRET: ENC[AES256_GCM,data:HthTO5psHWLdYZMMVvaqupgCilkQ0hH2VsZsDtjNU4s=,iv:TrYAzi8MzvDeUnyX4NVKmbokLpi7MbL09YmhHAdSDgk=,tag:OzKaaPmGxX+9+Z1iAdSydg==,type:str] + SESSION_SECRET: ENC[AES256_GCM,data:lTWlRxrHP2J2cLuW9WvW2rkgCa4k7JF7iNSKZyEM6MA=,iv:1CFxitPeGjJZ8PDfc3Ws6KMiyyATsVUi/ExDqP0dfx8=,tag:+PFEhcZcwQ9kfVuhqFhLfw==,type:str] + STORAGE_ENCRYPTION_KEY: ENC[AES256_GCM,data:kJknnuBiBNEwdMYVePTi+1hAPfI3TTyPhif1wFWB1pM=,iv:K4d1pXfQJ5NCbePpdygtqILeZQgfCL9gPhVYjPDKUmQ=,tag:BcTZLmTCGktB6xbLYjpNlQ==,type:str] + LDAP_PASSWORD: ENC[AES256_GCM,data:+XeCOE5wgM9FgZPNQoVOSZnUsi0yqsqEXxWExa7LxxM=,iv:4yXlVheJTqyaPMAfaGFjPZAgUGG+LI3sqC9OCIT4lN0=,tag:wPOjQ32YEmSFVlmi7JGkIw==,type:str] + OIDC_HMAC_SECRET: ENC[AES256_GCM,data:MLQAPZs8ddntlANL3Ty2Tkqx91N3UOYZAxxhgrIOyhM=,iv:9W10/Xdt5xzNjixo9Bbuj10jwHgxK8B4xkLAEVjkSiA=,tag:FI1uyIE2sdrodYE6WT8JMw==,type:str] + #ENC[AES256_GCM,data:p8qBrcgvG2YxHGcOcB6eMi24RsWUgBrvwu/KOX7569Qj/g==,iv:iljL3Qsnfk1OxU6HaRk1HGDa6LkNFLqts/P+FnXXXTM=,tag:ChwS4+jtP4iT4seQ4e1Skg==,type:comment] + OIDC_ISSUER_PRIVATE_KEY: ENC[AES256_GCM,data:AcvDCOYDN/rQa0e9GoV3JqM/f6KRqkXpE+9dudX/MbTphiZvUckDcNazEek9AFV1V8s5s9Jf2TBe3q9U7h29OwY2QwbMkznTXCN0C6Mxlqd/UTtCmILp8I3YkdHt2rYUoFCDNNckXmIcYOsHKgofil0IycQFT2AaGy0/3/3847X+bZiGrzgZgR7r1tAsp5RfB8veK3bdc88TcryT5ROejKYp9DgoUm0sIBBiiA7iHSSViFQ6tgX+92LDIVKaED0EUZuPWnPL1h6d+8LC3SD2RIv+tHGyRw/TpUqLPNRQ7YjjT4226bK2lFliY/OJ0m7ngE65wqtdv8qrhFqr+8k0beHNO2WcKncyOqhAK95ACrLn8pSmWjG5TT3fqsxOSKtTDcHTljVxVLH0YmXRoweJ9jtgkKGI5uFZ18fqjAMNN9uUO6e1RJBZOtpxCpfmp8Mt8UXZBlvw4vy81Vo7D9nMFbbVIfTQfysErMyqXJa3OUekuEEdT7P9tu6K2BuiBHWVrSo6kyvwJTI2Z+dH/fHRnrt9P/iFCgcO16pS9opmXhz+4VReDSWyRtHOVM/VV7ybZ4QJwFvHzqolkNnVbLhjpt8azVZlYhIowhcwjkL+KW18ufhtSUvKOmx2dDyh7iwb3abmJRgdBWDRx7Hcjq2KlnlMC/Ic5+3gFwMIBThwrPPuWNm4FYrJQrHn6v2DXcokL14Y4uh26ZfGunMghe4CnBonNXNvyWYFOmR8hMe/kQFJxNDIO6adeSoA19/B9JrTs0+5gTwWuYPJM9WS22Ack+tjpUECYzmj/X6/wsc70rlWVC35F14KC8rCmeJ61TkmjQ3cNTw4NUpIeloOI00vK9ZsLQjqG5PtLyBfhFPFNbWQDx97zlIsvncjInb/hu9hSt1xQF2vzBJV33x6voVaue2MzXt7IkDHWzqVsE4BVjU/DnchBNXodEh5ddbhZgcboYbaoALg6CiM4wOF24wOLSkQSYJWCIuHgUADNNxinH2X6Jr9g0iGMPZGZ/vqU1ztNQ3lrf7xRDgO/7MVHpBJSwFRLYwdXd5uE+CQEjb6O0Nw4qQ3R+dDVcsZ1y4gpmACAVMAKnkzr6rABNavQBqqwRFWuZZ/0wZ26WIitbZ3+lSlDNCuIEavp23CbLfyru7tU/xxJgfqDW7cCCYLquPC2BGBb5BEVoza/5U5mkeyws2JN6FsGjWeGZRfBwd0nnQjY2uwZVA14zAIS/yrtM06gGgMwy9nfRnOcv//aOeycvnE+YvvKzPDoS1VKnr2NRILLVdY7qYEhBikT9woc/qCjdaIlRibmvKa7UFuv0j1ctyYaY7rZ/+I8MRTJbEsclVBEsl/azy2+lCh5Ly6sUj7mUtZYRoIw36D4A9xGIgc+uC5btC+tDJJmFEwyC5+/38coTU1pKkGiFufI+QO7ihJ6GRBlCVzn5XM5ISsOuCF1hi1febOgnnBJO3FJ3/WynkMtPMnFoW8ut6ywE9GWRFCyX+3+5LS817d+GoUCoy2A7fE5p3yRM+8ltDc+6ebWC6yZxnjQzTJmehY7CXCG0topbe8omBWIIndpXowZ/iMC9Lsprtq5gcP5u9a+gB2C3RALUd7YYkRklcKGjqv5gkj9t9jF7qMSC6zHtFCgacuLzY/wPRUfPiMP0hXMW8s3yNwPtHSZ8U0XKlKdD+QNP9aToV8dDTsq1e2yPxu8Jr1HMDsmrWMe9G887ekTr+041d5ywhuTBng96gwJjcnOCv2IRaX7csI+jqtHuntZ/wIRHS5zrZFp7PgSl0JIb1sjUkYyfS/Fe8lxB1Ski2pjZ7MMDkm0Np7+4fJQOQ/YJl/mgWqpGm5Y+nuqg5mEOsHAadxovS/pIAPvlVacSyfOnOz/i+c00nV1xuSNdjStVKNKQvlk3ge1GNh7JcEdH0MOvCoYEGMY2KrVXBiyzfojrgCBLRGW823WTvJNjCutRqoegNp77aDfDiVaYlqffShukTTMNCdccjduD+Oew6CULvkOVxhXV0so+a2nlhXhsJkRU+NZdPP74jeoLR8TV0D0rctCemf1KRanPH/X/myx9q8OFRjGUJC50N9vUZ7VE4Ymv8LEFxnSe1FLQkU+8GFbQFkCFj9juBHAWFguQIqC3KH99ZKuF4Weu1rqVyu4OuppzpF178mHQQb1yCc06mnSQzXtoaYX+D4Unq3KzA89N8K+IcnAr5Ec0Q5ni64SfXEjxDIVfjUteAlh5EVzW9kFQN+TFysKgO3caPMCfb/pdhRbbiCrppOerN+4TlQflIuIm14+evqrtAgU9YI1LhFhXw6NuCQVSLA0mUNKyjLiiwWhUaKUXHmX3Y3P0CDAwKisimDlYzURHy5ziZW4EyqLf5lbnwO56SAap9zSCOaj5G+WqRIMtEb4FIZBHgo5if7gOnCp/VeyNh2V8sUDUGAbcpNb+EWhOuPg6MwnTW7yjknZOOneIP40yi+oT7dZ4C0C+AVfP7uXTWU4W+u2aKOSMiCqdBUOeoHsPXSpvRTd9reXsK+JSi8ST+0yN0ang5plPHCwNozGDFjLME8wIhHmeOJLynOTCfRQo0KjbiTs/HIdMcfx/hwGZ/twUPzz3iZjuZzx9Pvu/O90nmpZXw9Ed6svNHEDkKzbnYJ2uvAP00HnTsJG34w+UGAktEv4KQT2hRVauaRccWv1bGrSDFMreRcUe5Ckv/aN7uA3DjWtVuUaSTffVsoviO1sLzpgV5Um0DiRTRi2poPP6PhlSlVg0GviXQCVF7WoBqlLFlxWUhjz95mVxrTt15KdbVCf3gPg7sJihuoXtGkAdMx6bxq295UWwpbSpQDq2ldWbyIxSpj8iHoQwunJVtXJC8ytO0j0rmhiDuTstIGim7E9oucNVeuH1Y/3S7u7MwaSO8VpytonmDnMOGsNckdx+zmfktQ2nkX53Is3QwOyJfQG34D0Pvnac6VJWYgoi8nFUSge5nEZXeT6yVpShx5tiwfPb0zdy0LBKxT6TOZsR99au/c2FeqUvD2/UzdJSz46QTP/Emo/OgBsXBpTemd+nN2A8z6A5ERHeQgEYLt2fCo6mNpH2ruytbFwvauNbYAjNd3hwl2+Dbc4JrcxNCh9ZqXju75AIkr+LfiHrDhBvxnWdC341E8S6hMHiM6iqWgOtW7q+GST4f7ndUeHneXAr99NnbuEF/NSPr3MMp2obdg50VKsmp34xZ7M5CooPiYuardiUgfykD4dKx7cFVQ2dQ0Agl+Y82Cxm+wm58rj5HVVh3svOiDXV0w14QoRcp4+WzoipDQ1EdoYIO19ahilFBBtsdtvOQBZW68NVur5KHJLmD1xxdUTfWIGsmIOcbwaVlUQRMQjIWOH+fIyhRdFqXM8II7/LqOFNHvXK7eoMz+rnrY6Dp7CaPwgOapLu9XOJT61V2q6872WIRUcfUSOwP2Bl08ZpB8wNZob1PrM6zX67l5D96lBPZl1rcCwqOy2VIGhwuEkVVcxZ/AwvskxIO6ohmzmH88N879PqC4+eb0pmrVYzYt3qRist+fiWgl/F7vOmeYOd1QfPcoZLaINk5UJymYpqEDBIjdRQSSovDmcMvPHfQasONO8mGs5wysih3k1N0J3vfXoGyjFqi8PZ6Ba4v+Xb8yE3NSEq5YU1TzB4ZEhv1+kP064kypuBd5Ul9LrkxS4f8bWxRRXu4s/Sm4dACEd7vLiXvj+cOyVl+ag6ipMqjoNukzD/DqypFW2B/e4Lq4zK09FE92jNHMzobUDy8h2+OnphtYTZlzDweqnDJx1dB7omon96wFF9k7QLX01m/f9Ap+s1yhi5wBRsnxBRct9RdZcza7zyJf+nrJOncfjuDcNpamB3DE96ZT8dDp1y+cKb4MKhWeT9J/sY9dai7kR5DNRkuy9h/WmMybi860cg7AgWHybi6c1FiwOWGKrU8/m4EdrCNKfke5ja5Ovkw4wvE7HM/q13Kkbh7EuHpR3tS687VU+KItzckod8Gzpq7wnxYT3cLqvmdMZ52EUGK99oIU2FRbcVaA/abh3UIPWJ11+5rsGIZkqxf0BwAOnpvm01nmlOh05gp+2Kz05//XX60Vf8HQFEkyjHc2E8Fg1xi/3MOaBM5JDpskyasi7RhZ4+h0XFcHeWH3dDc1DBbc5NaT8OWg05hQLWwStlOi0gsm3FiI0D7Uq6sExCtfQ46/BrAt+PrenbiDlR0jJY4Dc63HriDtTkcvs4EOwKBfGfCZlF49qug459Acx2area/wCCWaPISJA8ACkp0t6r5rAry11l6P/R2Ee9z3/BBykXvH7jk4sh1AwubYj9v5ozOa2un2+B1/KNtdVlpb,iv:Uyzw3b7s/Cv3YfzuBpNZ3TizMUBn2hvLJ30JzhoUKk0=,tag:XrUqd1q52vVMCvoOvQFFaw==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAzTi8xNjh4Vm5tdXdKMVRD + RHZ6eWZTZXJ5M2ppZTRLU1JMWWg3SktXVHdVCmJzeHlVSFJNZ0EzZzhiT0xqOEor + blNaU3M4VFdwV05sZTRRT25oVm8vQ0kKLS0tIENjaXU0ZkRXMHJWcDBWRG5kdVBn + QTladjNNRGxGUTVaeHJMVmU3KzRPT0UKP5LuA/pzKo0ohRjDDU4Ok+Z6ynfvX0QM + e4cx4CjAHrxArDc/zwW/gkncJRubYyoYTCDUpmVzCqLgiAG5r5NvEA== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2023-02-07T05:55:44Z" + mac: ENC[AES256_GCM,data:tnGidF1LYDtpVJEV/7pmyX5XegdUFAp71WtvIJzy7ZTrcdW9sDKNP3YYA+YGl9z8gY7vZhleQz519tqnN3C3Cc7yKRh90tEaHlEo4kP6t0WUGW+a79RBVlneY3tW+TUqiJKCxXeSG0RjskaGRzzMJsxX13zPGsOSVhXZ5K0qcHM=,iv:HYrfDKqvWft9bXsMP2cK9Gy4NPzrt0BdkVo7Bo3SF80=,tag:4eTOVoh+I3svSe+YoYPyFQ==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.7.3 diff --git a/wip/auth/authelia/ks.yaml b/wip/auth/authelia/ks.yaml new file mode 100644 index 00000000..cbc7bff9 --- /dev/null +++ b/wip/auth/authelia/ks.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: apps-authelia + namespace: flux-system +spec: + path: ./kubernetes/apps/auth/authelia/app + prune: true + sourceRef: + kind: GitRepository + name: homelab + wait: false # no flux ks dependents + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/wip/auth/kustomization.yaml b/wip/auth/kustomization.yaml new file mode 100644 index 00000000..3b3822ab --- /dev/null +++ b/wip/auth/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./namespace.yaml + # - ./authelia/ks.yaml + # - ./lldap/ks.yaml diff --git a/wip/auth/lldap/app/helm-release.yaml b/wip/auth/lldap/app/helm-release.yaml new file mode 100644 index 00000000..b2aeaedd --- /dev/null +++ b/wip/auth/lldap/app/helm-release.yaml @@ -0,0 +1,78 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app lldap + namespace: auth +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 1.5.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + interval: 15m + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + dependsOn: + - name: ingress-nginx + namespace: network + values: + image: + # https://hub.docker.com/r/nitnelave/lldap + repository: docker.io/nitnelave/lldap + tag: latest + env: + TZ: "${TIMEZONE}" + LLDAP_LDAP_BASE_DN: "dc=home,dc=arpa" + UID: "${SECURITY_CONTEXT_RUN_AS_USER}" + GID: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + envFrom: + - secretRef: + name: *app + service: + main: + ports: + http: + port: 17170 + ldap: + enabled: true + port: 3890 + ingress: + main: + enabled: true + ingressClassName: nginx + hosts: + - host: &host "ldap.${PUBLIC_DOMAIN}" + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - *host + persistence: + data: + enabled: true + existingClaim: appdata + mountPath: /data + subPath: lldap + # resources: + # requests: + # cpu: 10m + # memory: 100Mi + # limits: + # memory: 500Mi + podAnnotations: + secret.reloader.stakater.com/reload: *app diff --git a/wip/auth/lldap/app/kustomization.yaml b/wip/auth/lldap/app/kustomization.yaml new file mode 100644 index 00000000..6e4891d2 --- /dev/null +++ b/wip/auth/lldap/app/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - secret.sops.yaml + - helmrelease.yaml diff --git a/wip/auth/lldap/app/secret.sops.yaml b/wip/auth/lldap/app/secret.sops.yaml new file mode 100644 index 00000000..5593a50a --- /dev/null +++ b/wip/auth/lldap/app/secret.sops.yaml @@ -0,0 +1,30 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: lldap + namespace: auth +type: Opaque +stringData: + LLDAP_LDAP_USER_PASS: ENC[AES256_GCM,data:na4X1zo6wVgFXy3FPJNErXe3CCGtprv7W8nSkhsDo0M=,iv:I9jlN5G5g73LgDWKHc49SC+n4O/H8Qp0nj23c/EVNeE=,tag:t2ogMoxvuHxR6ZN085FUaw==,type:str] + LLDAP_JWT_SECRET: ENC[AES256_GCM,data:vYOUK/fZvHNrNgr3DeTDi1ypAvjwrsr2DUnPlEGshXc=,iv:pjtBlXL6JcGRWoaW4Der4haegscJC9e1B9/ieopgxIY=,tag:JKjipUsCCJ+v7xB7F+1f+Q==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBBNVJjdmJ2YmpFUitMeWdz + eHhUQXU2cDVlNDFlcFBZSVgzdFNmWW5YSERrCitqeFFDaUprV1FSRjYybjZac1o1 + QXVOby9iUEl1QU10RDQzTHArYVI5SzQKLS0tIEtTcmhKNnJ2N1pYd2ZBeUxkaDlM + eEVhWkFtRFYzbVBpRThxNE9XOEVCNkEKL92VHY3B3Vp3ts1NQYVNz1kehAFYxATx + CbKAvBsqa4DdglTI8hjlliFIVkM5G/O5LSG+EhR7wWBmFvhYX3vN4g== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2023-02-07T05:54:14Z" + mac: ENC[AES256_GCM,data:byw+g/dIs5BagqDDCSQEUuRaixZMMfgAFIK4dEDeY+I0P3oiFLa08thGcHn5Kp4VXRNW2ixQO3VsNNsVJqmApIAqwXIK/ug2w/ZHv6GAAzpiEh1iM4BPIGrQJc0t//JoCH0fi69mgFPr7RYfHJVik4kfNdAK3f67jPFh00A1mCk=,iv:p9tTjsi/z9rTbPuHKYNvs919TG9IKaAX6owIU9ZkFME=,tag:9eXaA7QVlWlfDdgGye7LdQ==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.7.3 diff --git a/wip/auth/lldap/ks.yaml b/wip/auth/lldap/ks.yaml new file mode 100644 index 00000000..7d68727d --- /dev/null +++ b/wip/auth/lldap/ks.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: apps-lldap + namespace: flux-system +spec: + path: ./kubernetes/apps/auth/lldap/app + prune: true + sourceRef: + kind: GitRepository + name: homelab + wait: false # no flux ks dependents + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/wip/auth/namespace.yaml b/wip/auth/namespace.yaml new file mode 100644 index 00000000..14c7f8bd --- /dev/null +++ b/wip/auth/namespace.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: auth + labels: + kustomize.toolkit.fluxcd.io/prune: disabled diff --git a/wip/dnsutils/app/helm-release.yaml b/wip/dnsutils/app/helm-release.yaml new file mode 100644 index 00000000..ab30343f --- /dev/null +++ b/wip/dnsutils/app/helm-release.yaml @@ -0,0 +1,38 @@ +# Debugging DNS https://kubernetes.io/docs/tasks/administer-cluster/dns-debugging-resolution/#create-a-simple-pod-to-use-as-a-test-environment +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: dnsutils + namespace: default +spec: + interval: 15m + chart: + spec: + chart: app-template + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + interval: 15m + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + values: + service: + main: + enabled: false + image: + repository: gcr.io/kubernetes-e2e-test-images/jessie-dnsutils + tag: "1.0" + command: + - sleep + - "3600" diff --git a/wip/dnsutils/app/kustomization.yaml b/wip/dnsutils/app/kustomization.yaml new file mode 100644 index 00000000..dbc604ed --- /dev/null +++ b/wip/dnsutils/app/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helmrelease.yaml diff --git a/wip/grafana/app/helm-release.yaml b/wip/grafana/app/helm-release.yaml new file mode 100644 index 00000000..36079adf --- /dev/null +++ b/wip/grafana/app/helm-release.yaml @@ -0,0 +1,227 @@ +# Dashboards: +# - https://github.com/BSmithIO/OPNsense-Dashboard +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: grafana + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: grafana + version: 6.58.4 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + dependsOn: + - name: grafana-postgres + namespace: monitoring + # https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml + values: + replicas: 1 + admin: + existingSecret: grafana + userKey: GF_SECURITY_ADMIN_USER + passwordKey: GF_SECURITY_ADMIN_PASSWORD + env: + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/ + GF_ANALYTICS_CHECK_FOR_UPDATES: false + GF_DATABASE_HOST: grafana-postgres.monitoring.svc.cluster.local:5432 + GF_DATABASE_NAME: grafana + GF_DATABASE_USER: grafana + GF_DATABASE_SSL_MODE: disable + GF_DATABASE_TYPE: postgres + GF_DATE_FORMATS_FULL_DATE: "MMM Do, YYYY hh:mm:ss a" + GF_EXPLORE_ENABLED: true + GF_GRAFANA_NET_URL: https://grafana.net + GF_LOG_FILTERS: rendering:debug + GF_LOG_MODE: console + GF_PANELS_DISABLE_SANITIZE_HTML: true + GF_PATHS_DATA: /var/lib/grafana/data + GF_PATHS_LOGS: /var/log/grafana + GF_PATHS_PLUGINS: /var/lib/grafana/plugins + GF_PATHS_PROVISIONING: /etc/grafana/provisioning + GF_SECURITY_ALLOW_EMBEDDING: true + GF_SECURITY_COOKIE_SAMESITE: grafana + GF_SERVER_ROOT_URL: "https://grafana.${PUBLIC_DOMAIN}" + envFromSecrets: + - name: grafana + grafana.ini: + auth: + signout_redirect_url: "https://auth.${PUBLIC_DOMAIN}/logout" + oauth_auto_login: false + auth.generic_oauth: + enabled: true + name: Authelia + client_id: grafana + client_secret: "${GRAFANA_OAUTH_CLIENT_SECRET}" + scopes: "openid profile email groups" + empty_scopes: false + auth_url: "https://auth.${PUBLIC_DOMAIN}/api/oidc/authorization" + token_url: "https://auth.${PUBLIC_DOMAIN}/api/oidc/token" + api_url: "https://auth.${PUBLIC_DOMAIN}/api/oidc/userinfo" + login_attribute_path: preferred_username + groups_attribute_path: groups + name_attribute_path: name + use_pkce: true + auth.generic_oauth.group_mapping: + role_attribute_path: | + contains(groups[*], 'admins') && 'Admin' || contains(groups[*], 'people') && 'Viewer' + org_id: 1 + auth.basic: + enabled: false + # disable_login_form: false + auth.anonymous: + enabled: true + org_name: Home + org_id: 1 + org_role: Viewer + analytics: + check_for_updates: false + check_for_plugin_updates: false + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: "default" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default + datasources: + datasources.yaml: + apiVersion: 1 + deleteDatasources: + # - name: Loki + # orgId: 1 + datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090 + isDefault: true + # - name: Loki + # type: loki + # access: proxy + # url: http://loki-gateway.monitoring.svc.cluster.local:80 + dashboards: + default: + cert-manager: + url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json + datasource: Prometheus + flux-cluster: + url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/cluster.json + datasource: Prometheus + flux-control-plane: + url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/control-plane.json + datasource: Prometheus + sidecar: + dashboards: + enabled: true + searchNamespace: ALL + datasources: + enabled: true + searchNamespace: ALL + plugins: + - natel-discrete-panel + - pr0ps-trackmap-panel + - grafana-piechart-panel + - vonage-status-panel + - grafana-worldmap-panel + - grafana-clock-panel + serviceMonitor: + enabled: false + ingress: + enabled: true + ingressClassName: nginx + annotations: + external-dns.home.arpa/enabled: "true" + hosts: + - &host "grafana.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + persistence: + enabled: true + storageClassName: local-path + testFramework: + enabled: false + +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: grafana-postgres + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 1.5.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + createNamespace: true + remediation: + retries: 5 + upgrade: + remediation: + retries: 3 + values: + image: + repository: library/postgres + tag: "14" + env: + - name: POSTGRES_USER + value: grafana + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: grafana + key: GF_DATABASE_PASSWORD + - name: POSTGRES_DB + value: grafana + - name: PGDATA + value: /var/lib/postgresql/data + podSecurityContext: + runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" + runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" + service: + main: + enabled: true + ports: + http: + enabled: false + primary: false + sql: + enabled: true + primary: true + port: 5432 + protocol: TCP + persistence: + data: + enabled: true + mountPath: /var/lib/postgresql/data + existingClaim: appdata + subPath: grafana_db diff --git a/wip/grafana/app/kustomization.yaml b/wip/grafana/app/kustomization.yaml new file mode 100644 index 00000000..95bf4747 --- /dev/null +++ b/wip/grafana/app/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./secret.sops.yaml + - ./helmrelease.yaml diff --git a/wip/grafana/app/secret.sops.yaml b/wip/grafana/app/secret.sops.yaml new file mode 100644 index 00000000..30010c10 --- /dev/null +++ b/wip/grafana/app/secret.sops.yaml @@ -0,0 +1,30 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: grafana + namespace: monitoring +stringData: + GF_SECURITY_ADMIN_USER: ENC[AES256_GCM,data:Qc4tMBI=,iv:vsS9oxvqDGAmquYqFkae0FrhMtOMPLGvlGXqkAtWQ7M=,tag:rjp1+wuRI06bBFmpd1B7Aw==,type:str] + GF_SECURITY_ADMIN_PASSWORD: ENC[AES256_GCM,data:Q0l0/BiFwTGdb3lgXB/iev5JPJGT9ScgsqV0sXPKf1k=,iv:0BsE7Lp8FIvS/jw+zqtt8MUZsC0e1oE0m5AuYwi4eq4=,tag:9m0ZZ2AJdIHECZNkUf63nA==,type:str] + GF_DATABASE_PASSWORD: ENC[AES256_GCM,data:dar4mZQL0hCdCt46NrrhVQWzOUPdGCVuKECEDFjkFbI=,iv:EiE9/DlVoGvPpsH+MnBxNAgTNIeu/ovhWTeUHCfJBPk=,tag:HcPF+0Pyus3Xwaf8WWH7Gg==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBVRTlHUElMb0JPSU5zWEg0 + bUl5L01ldmxGam1rSEtCOSswdGQrT2VnNmw0CkM2U0VxUGgrWTlIWnBGSHc4OTlQ + ODlvUlJEbFFmbGJsSG8yRStHbkEzc2MKLS0tIFFJUUo5Yi95WU1zcjlENWtxMm9O + S1ZoOFJGczN5dGNxaXlJcGpMN1VKZG8Kgg9gbRG45XSU6X/dZAE27acFdp8HSJad + BpiQCudXm5cT1+gKbDCzlU5BoCNgwrx3B1vgH+ssEm/OF2VuE1LT9Q== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-09-11T16:31:36Z" + mac: ENC[AES256_GCM,data:JwHbE5UekxLxtD1rI4YenihY++JbgUR/l3KZW00Tv+yFtJQmsLfw12O8G8OLyn7n6MG90+XfjVThc/bnptxsszscjFWWwqlmYBE1S1IgKdBNE/FTgJzE3yxs0XY16ZLShEFOdj6ikgChWtlK/JIyDWDXkoKjPpr9QFEpicdmmrs=,iv:qFY9bZl34Eg+nsGUFhRymiZRoYrauVWIc2pr4AxjofM=,tag:r1skOniR4YwJ68wb6m68cg==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.7.3 diff --git a/wip/grafana/ks.yaml b/wip/grafana/ks.yaml new file mode 100644 index 00000000..5a10a68a --- /dev/null +++ b/wip/grafana/ks.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: apps-grafana + namespace: flux-system +spec: + path: ./kubernetes/apps/monitoring/grafana/app + prune: true + sourceRef: + kind: GitRepository + name: homelab + wait: false # no flux ks dependents + interval: 30m + retryInterval: 1m + timeout: 5m diff --git a/wip/loki/config-map.yaml b/wip/loki/config-map.yaml new file mode 100644 index 00000000..6e7ade74 --- /dev/null +++ b/wip/loki/config-map.yaml @@ -0,0 +1,130 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-alerting-rules + namespace: monitoring +data: + loki-alerting-rules.yaml: |- + groups: + # + # SMART Failures + # + - name: smart-failure + rules: + - alert: SmartFailures + expr: | + sum by (hostname) (count_over_time({hostname=~".+"} | json | _SYSTEMD_UNIT = "smartmontools.service" !~ "(?i)previous self-test completed without error" !~ "(?i)Prefailure" |~ "(?i)(error|fail)"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "SMART has reported failures on host {{ $labels.hostname }}" + # + # zigbee2mqtt + # + - name: zigbee2mqtt + rules: + - alert: ZigbeeUnableToReachMQTT + expr: | + sum(count_over_time({app="zigbee2mqtt"} |~ "(?i)not connected to mqtt server"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "Zigbee2mqtt is unable to reach MQTT" + # + # zwavejs2mqtt + # + - name: zwavejs2mqtt + rules: + - alert: ZwaveUnableToReachMQTT + expr: | + sum(count_over_time({app="zwavejs2mqtt"} |~ "(?i)error while connecting mqtt"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "Zwavejs2mqtt is unable to reach MQTT" + # + # frigate + # + - name: frigate + rules: + - alert: FrigateUnableToReachMQTT + expr: | + sum(count_over_time({app="frigate"} |~ "(?i)unable to connect to mqtt server"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "Frigate is unable to reach MQTT" + # + # *arr + # + - name: arr + rules: + - alert: ArrDatabaseIsLocked + expr: | + sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database is locked"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "{{ $labels.app }} is experiencing locked database issues" + - alert: ArrDatabaseIsMalformed + expr: | + sum by (app) (count_over_time({app=~".*arr"} |~ "(?i)database disk image is malformed"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "{{ $labels.app }} is experiencing malformed database disk image issues" + # + # home-assistant + # + - name: home-assistant + rules: + - alert: HomeAssistantUnableToReachPostgresql + expr: | + sum by (app) (count_over_time({app="home-assistant"} |~ "(?i)error in database connectivity"[2m])) > 0 + for: 2m + labels: + severity: critical + category: logs + annotations: + summary: "Home Assistant is unable to connect to postgresql" +# # +# # valetudo +# # +# - name: valetudo +# rules: +# - alert: ValetudoUnableToReachMQTT +# expr: | +# sum by (hostname) (count_over_time({hostname="valetudo"} |~ "(?i).*error.*mqtt.*"[2m])) > 0 +# for: 2m +# labels: +# severity: critical +# category: logs +# annotations: +# summary: "Valetudo is unable to connect to mqtt" +# # +# # node-red +# # +# - name: node-red +# rules: +# - alert: NodeRedUnableToReachHomeAssistant +# expr: | +# sum by (app) (count_over_time({app="node-red"} |~ "(?i)home assistant.*connecting to undefined"[2m])) > 0 +# for: 2m +# labels: +# severity: critical +# category: logs +# annotations: +# summary: "Node-Red is unable to connect to Home Assistant" diff --git a/wip/loki/helmrelease.yaml b/wip/loki/helmrelease.yaml new file mode 100644 index 00000000..43763349 --- /dev/null +++ b/wip/loki/helmrelease.yaml @@ -0,0 +1,234 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: loki + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: loki + version: 3.2.0 + sourceRef: + kind: HelmRepository + name: grafana + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + # https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml + values: + loki: + structuredConfig: + auth_enabled: false + server: + log_level: info + http_listen_port: 3100 + grpc_listen_port: 9095 + memberlist: + join_members: ["loki-memberlist"] + limits_config: + retention_period: 14d + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + max_cache_freshness_per_query: 10m + split_queries_by_interval: 15m + ingestion_rate_mb: 8 + ingestion_burst_size_mb: 16 + schema_config: + configs: + - from: "2021-08-01" + store: boltdb-shipper + object_store: s3 + schema: v11 + index: + prefix: loki_index_ + period: 24h + common: + path_prefix: /var/loki + replication_factor: 3 + storage: + s3: + s3: null + insecure: true + s3forcepathstyle: true + ring: + kvstore: + store: memberlist + ruler: + enable_api: true + enable_alertmanager_v2: true + alertmanager_url: http://kube-prometheus-stack-alertmanager.monitoring:9093 + storage: + type: local + local: + directory: /rules + rule_path: /tmp/scratch + ring: + kvstore: + store: memberlist + distributor: + ring: + kvstore: + store: memberlist + compactor: + working_directory: /var/loki/boltdb-shipper-compactor + shared_store: s3 + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 + ingester: + max_chunk_age: 1h + lifecycler: + ring: + kvstore: + store: memberlist + analytics: + reporting_enabled: false + gateway: + enabled: true + replicas: 3 + ingress: + enabled: true + ingressClassName: nginx + hosts: + - host: &host "loki.${PUBLIC_DOMAIN}" + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - *host + write: + replicas: 3 + persistence: + size: 10Gi + existingClaim: appdata + subPath: loki + read: + replicas: 3 + extraVolumeMounts: + - name: loki-rules + mountPath: /rules/fake + - name: loki-rules-tmp + mountPath: /tmp/scratch + - name: loki-tmp + mountPath: /tmp/loki-tmp + extraVolumes: + - name: loki-rules + configMap: + name: loki-alerting-rules + - name: loki-rules-tmp + emptyDir: {} + - name: loki-tmp + emptyDir: {} + persistence: + size: 10Gi + existingClaim: appdata + subPath: loki + monitoring: + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false + valuesFrom: + - targetPath: loki.structuredConfig.common.storage.s3.bucketnames + kind: ConfigMap + name: loki-chunks-bucket-v1 + valuesKey: BUCKET_NAME + - targetPath: loki.structuredConfig.common.storage.s3.endpoint + kind: ConfigMap + name: loki-chunks-bucket-v1 + valuesKey: BUCKET_HOST + - targetPath: loki.structuredConfig.common.storage.s3.access_key_id + kind: Secret + name: loki-chunks-bucket-v1 + valuesKey: AWS_ACCESS_KEY_ID + - targetPath: loki.structuredConfig.common.storage.s3.secret_access_key + kind: Secret + name: loki-chunks-bucket-v1 + valuesKey: AWS_SECRET_ACCESS_KEY + +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: loki-minio + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: app-template + version: 1.5.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + interval: 15m + values: + image: + repository: quay.io/minio/minio + tag: RELEASE.2022-05-26T05-48-41Z + env: + TZ: "${TIMEZONE}" + MINIO_PROMETHEUS_AUTH_TYPE: public + MINIO_UPDATE: "off" + envFrom: + - secretRef: + name: loki + args: + - server + - /data + - --address + - :9000 + - --console-address + - :9001 + service: + main: + enabled: true + ports: + http: + enabled: true + port: 9001 + api: + enabled: true + port: 9000 + ingress: + main: + enabled: true + ingressClassName: nginx + hosts: + - host: &host "loki-s3.${PUBLIC_DOMAIN}" + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - *host + persistence: + data: + enabled: true + existingClaim: appdata + mountPath: /data + subPath: loki_s3 + podAnnotations: + secret.reloader.stakater.com/reload: loki + # resources: + # requests: + # cpu: 22m + # memory: 1500M + # limits: + # # cpu: 49m + # memory: 2000M diff --git a/wip/loki/kustomization.yaml b/wip/loki/kustomization.yaml new file mode 100644 index 00000000..4932aa76 --- /dev/null +++ b/wip/loki/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - config-map.yaml + - helmrelease.yaml diff --git a/wip/loki/prometheus-rule.yaml b/wip/loki/prometheus-rule.yaml new file mode 100644 index 00000000..94188af5 --- /dev/null +++ b/wip/loki/prometheus-rule.yaml @@ -0,0 +1,65 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: loki-minio + namespace: dbms +spec: + groups: + - name: minio + rules: + - alert: MinioAbsent + annotations: + description: MinIO component has dissapeared from Prometheus service discovery. + summary: MinIO component has disappeared. + expr: | + absent(up{job=~".*minio.*"} == 1) + for: 10m + labels: + severity: critical + - alert: MinioDiskOffline + annotations: + description: MinIO is reporting {{ $value }} disk(s) offline + on server {{ $labels.server }} + summary: MinIO disk(s) offline. + expr: | + minio_cluster_disk_offline_total != 0 + for: 1m + labels: + severity: critical + - alert: MinioNodeOffline + annotations: + description: MinIO is reporting that node {{ $labels.server }} is offline. + summary: MinIO node is offline. + expr: | + minio_cluster_nodes_offline_total != 0 + for: 1m + labels: + severity: critical + - alert: MinioClusterUsage + annotations: + description: MinIO cluster is reporting less than 10% storage free. + {{ $value | humanizePercentage }} of cluster storage is available. + summary: MinIO cluster is low on capacity. + expr: | + sum(minio_cluster_capacity_usable_free_bytes) + / + sum(minio_cluster_capacity_usable_total_bytes) + < .10 + for: 15m + labels: + severity: warning + - alert: MinioNodeDiskUsage + annotations: + description: + MinIO node is reporting less than 10% disk space available. + {{ $value | humanizePercentage }} of disk space available on node {{ $labels.server }} + summary: MinIO node is low on disk space. + expr: | + sum(minio_node_disk_free_bytes) by (server) + / + sum(minio_node_disk_total_bytes) by (server) + < .10 + for: 15m + labels: + severity: warning diff --git a/wip/loki/secret.sops.yaml b/wip/loki/secret.sops.yaml new file mode 100644 index 00000000..b5d6af12 --- /dev/null +++ b/wip/loki/secret.sops.yaml @@ -0,0 +1,29 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: loki + namespace: monitoring +stringData: + MINIO_ROOT_USER: ENC[AES256_GCM,data:6lxJYps=,iv:krRN+LXw8cEdlYxoSE/wXs8eyX348ORTVPSHVWCpBIs=,tag:8PJPjC9SU/pO4EZ/HM8nuA==,type:str] + MINIO_ROOT_PASSWORD: ENC[AES256_GCM,data:oX5j5bBnQ9rE1LWfjcbBpooBnvldyK0EWi3kxasv5ic=,iv:Buy1Yf0Q83ZT3y8BO0dAvAgtJ9w36MiLvry0quRhxEc=,tag:oL055aMkOnkp/xyxM7ciRw==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAydDVqekEvVHNnMUdrNVFT + QjV5MFZHTFBMMUVnbDRvUHJKU1NvSi9DVUNNCkl0MFhDdzZpSUdGeXluU1BGWUlS + a3Rrd0o2QnRzYnFHb2ZRWG96OW5nN1UKLS0tIHVQQUd4UGxNMmszMHZxMkd0TlBI + cU5VSC9UYVJjanhGT2lPQitNNnhTdlEKsrB2qp+0UZ24b/C/6JMuG8LOt9jeWZrs + dI0MqDhc+RyDe7P3NUEqf0nLlJxQ1jRuZZ3Wnn9WnPeHFnKKDVclxA== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-09-11T16:30:27Z" + mac: ENC[AES256_GCM,data:/nj9TH9NwC9q7fyPsDqvmqpMHoB8yAFnYttC8Yin5Gzt3tCjHLWTNLP+u1LY2DDRdUzYQZsCyt5KjYFomlz5LzdBTSNn9dY9dpe3HeeYLOx1BMPX0/JDFcSsDFZQTDCtlh7zRdIE7VfUkdPKvc03A0/hK+xTirGSUCWuAuQy/P0=,iv:NE01ISm+Rou5Pu9GpBz6Vq3Vsr2pGUg7Smu3QjjtgHg=,tag:zUO+jyxIfs3lmksZ9lwYxw==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.7.3 diff --git a/wip/loki/service-monitor.yaml b/wip/loki/service-monitor.yaml new file mode 100644 index 00000000..063a99a0 --- /dev/null +++ b/wip/loki/service-monitor.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: loki-minio + namespace: dbms +spec: + selector: + matchLabels: + app.kubernetes.io/instance: loki-minio + app.kubernetes.io/name: loki-minio + endpoints: + - port: api + scheme: http + path: /minio/v2/metrics/cluster diff --git a/wip/node-problem-detector/helm-release.yaml b/wip/node-problem-detector/helm-release.yaml new file mode 100644 index 00000000..1d644799 --- /dev/null +++ b/wip/node-problem-detector/helm-release.yaml @@ -0,0 +1,36 @@ +# https://github.com/kubernetes/node-problem-detector +# https://github.com/onedr0p/home-ops/blob/main/archive/node-problem-detector/helmrelease.yaml +# https://github.com/toboshii/home-ops/blob/main/cluster/apps/monitoring/node-problem-detector/helmrelease.yaml +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: node-problem-detector + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: node-problem-detector + version: 2.3.4 + sourceRef: + kind: HelmRepository + name: deliveryhero + namespace: flux-system + interval: 15m + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + values: + metrics: + enabled: true + serviceMonitor: + enabled: true diff --git a/wip/node-problem-detector/kustomization.yaml b/wip/node-problem-detector/kustomization.yaml new file mode 100644 index 00000000..dbc604ed --- /dev/null +++ b/wip/node-problem-detector/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helmrelease.yaml diff --git a/wip/prometheus-pushgateway/helm-release.yaml b/wip/prometheus-pushgateway/helm-release.yaml new file mode 100644 index 00000000..670368d3 --- /dev/null +++ b/wip/prometheus-pushgateway/helm-release.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: &app prometheus-pushgateway + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: prometheus-pushgateway + version: 1.18.2 + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + values: + fullnameOverride: *app + image: + repository: quay.io/prometheus/pushgateway + serviceMonitor: + enabled: true diff --git a/wip/prometheus-pushgateway/kustomization.yaml b/wip/prometheus-pushgateway/kustomization.yaml new file mode 100644 index 00000000..dbc604ed --- /dev/null +++ b/wip/prometheus-pushgateway/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helmrelease.yaml diff --git a/wip/prometheus/helm-release.yaml b/wip/prometheus/helm-release.yaml new file mode 100644 index 00000000..bd75819f --- /dev/null +++ b/wip/prometheus/helm-release.yaml @@ -0,0 +1,348 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2beta2 +kind: HelmRelease +metadata: + name: prometheus + namespace: monitoring +spec: + interval: 15m + chart: + spec: + chart: prometheus + version: 15.12.0 + sourceRef: + kind: HelmRepository + name: prometheus-community + namespace: flux-system + maxHistory: 2 + install: + createNamespace: true + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + retries: 3 + uninstall: + keepHistory: false + # https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus/values.yaml + values: + serviceAccounts: + alertmanager: + create: true + name: + annotations: {} + nodeExporter: + create: true + name: + annotations: {} + pushgateway: + create: true + name: + annotations: {} + server: + create: true + name: + annotations: {} + alertmanager: + replicaCount: 1 + ingress: + enabled: true + pathType: Prefix + ingressClassName: nginx + hosts: + - &host "alert-manager.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + securityContext: + runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" + runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" + runAsNonRoot: false + persistentVolume: + enabled: true + accessModes: [ReadWriteMany] + mountPath: /data + existingClaim: appdata + subPath: prometheus/alertmanager + alertmanagerFiles: + alertmanager.yml: + receivers: + - name: "null" + - name: email + email_configs: + - send_resolved: false + to: "${NOTIFY_EMAIL}" + from: "AlertManager <${SMTP_USER}>" + smarthost: maddy.comms.svc.cluster.local:2525 + require_tls: false + route: + group_by: [alertname, job] + group_wait: 30s + group_interval: 5m + repeat_interval: 6h + receiver: email + routes: + - receiver: "null" + matchers: + - alertname =~ "InfoInhibitor|Watchdog" + - receiver: email + matchers: + - severity = "critical" + continue: true + inhibit_rules: + - source_matchers: + - severity = "critical" + target_matchers: + - severity = "warning" + equal: [alertname, namespace] + kube-state-metrics: + enabled: true + # metricLabelsAllowlist: + # - "persistentvolumeclaims=[*]" + # prometheus: + # monitor: + # enabled: true + # relabelings: + # - action: replace + # regex: (.*) + # replacement: $1 + # sourceLabels: + # - __meta_kubernetes_pod_node_name + # targetLabel: kubernetes_node + nodeExporter: + enabled: true + server: + name: prometheus + statefulSet: + enabled: false + extraArgs: + log.level: debug + # https://github.com/thanos-io/thanos/blob/0d659bf171afa6bdf5c5ece3033df3a7e8245d8c/tutorials/kubernetes-helm/README.md + storage.tsdb.min-block-duration: 2h + storage.tsdb.max-block-duration: 2h + ingress: + enabled: true + pathType: Prefix + ingressClassName: nginx + annotations: + external-dns.home.arpa/enabled: "true" + hosts: + - &host "prometheus.${PUBLIC_DOMAIN}" + tls: + - hosts: + - *host + securityContext: + runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" + runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" + runAsNonRoot: false + persistentVolume: + enabled: true + accessModes: [ReadWriteMany] + mountPath: /data + existingClaim: appdata + subPath: prometheus/server + # ### Start Thanos ### + # # https://github.com/thanos-io/thanos/blob/0d659bf171afa6bdf5c5ece3033df3a7e8245d8c/tutorials/kubernetes-helm/README.md + # configPath: /etc/prometheus-shared/prometheus.yml + # extraVolumes: + # - name: prometheus-config-shared + # emptyDir: {} + # extraVolumeMounts: + # - name: prometheus-config-shared + # mountPath: /etc/prometheus-shared/ + # global: + # scrape_interval: 5s + # scrape_timeout: 4s + # external_labels: + # prometheus_group: "${CLUSTER_NAME}" + # prometheus_replica: '$(HOSTNAME)' + # evaluation_interval: 5s + # # extraSecretMounts: + # # - name: thanos-storage-secret + # # mountPath: /etc/secret/ + # # subPath: sa + # # readOnly: false + # # secretName: thanos + # service: + # gRPC: + # enabled: true + # annotations: + # prometheus.io/scrape: "true" + # prometheus.io/port: "9090" + # podAnnotations: + # prometheus.io/scrape: "true" + # prometheus.io/port: "10902" + # secret.reloader.stakater.com/reload: thanos + # sidecarContainers: + # thanos-sidecar: + # # https://quay.io/repository/thanos/thanos?tab=tags + # image: quay.io/thanos/thanos:v0.28.0 + # env: + # # https://thanos.io/tip/thanos/storage.md/#s3 + # - name: AWS_ACCESS_KEY_ID + # valueFrom: + # secretKeyRef: + # name: thanos-minio + # key: MINIO_ROOT_USER + # optional: false + # - name: AWS_SECRET_ACCESS_KEY + # valueFrom: + # secretKeyRef: + # name: thanos-minio + # key: MINIO_ROOT_PASSWORD + # optional: false + # args: + # - "sidecar" + # - "--log.level=debug" + # - "--tsdb.path=/data/" + # - "--prometheus.url=http://127.0.0.1:9090" + # # https://thanos.io/tip/thanos/storage.md/#s3 + # - | + # --objstore.config={ + # type: S3, + # config: { + # bucket: thanos, + # endpoint: thanos-minio.monitoring.svc.cluster.local:9000, + # insecure: true + # } + # } + # - "--reloader.config-file=/etc/prometheus-config/prometheus.yml" + # - "--reloader.config-envsubst-file=/etc/prometheus-shared/prometheus.yml" + # - "--reloader.rule-dir=/etc/prometheus-config/rules" + # ports: + # - name: sidecar-http + # containerPort: 10902 + # - name: grpc + # containerPort: 10901 + # - name: cluster + # containerPort: 10900 + # volumeMounts: + # - name: storage-volume + # mountPath: /data + # - name: config-volume + # mountPath: /etc/prometheus-config + # readOnly: false + # - name: prometheus-config-shared + # mountPath: /etc/prometheus-shared/ + # readOnly: false + # configmapReload: + # image: + # # This image changed to just pause since there's no option to + # # disable configmapReload container in chart, but thanos-sidecar + # # overtakes this functionality. So basically we don't need another reloader + # repository: gcr.io/google-containers/pause-amd64 + # tag: 3.1 + # # resources: + # # limits: + # # cpu: 20m + # # memory: 20Mi + # # requests: + # # cpu: 20m + # # memory: 20Mi + # ### End Thanos ### + pushgateway: + securityContext: + runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" + runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" + fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" + runAsNonRoot: false + persistentVolume: + enabled: true + mountPath: /data + existingClaim: appdata + subPath: prometheus/pushgateway + extraScrapeConfigs: | + - job_name: node-exporter + scrape_interval: 1m + scrape_timeout: 10s + honor_timestamps: true + static_configs: + - targets: + - "opnsense.${PRIVATE_DOMAIN}:9100" + +# --- +# apiVersion: helm.toolkit.fluxcd.io/v2beta2 +# kind: HelmRelease +# metadata: +# name: thanos-minio +# namespace: monitoring +# spec: +# interval: 15m +# chart: +# spec: +# chart: app-template +# version: 1.0.1 +# sourceRef: +# kind: HelmRepository +# name: bjw-s +# namespace: flux-system +# values: +# image: +# repository: quay.io/minio/minio +# tag: RELEASE.2022-09-07T22-25-02Z +# env: +# TZ: "${TIMEZONE}" +# MINIO_UPDATE: "off" +# MINIO_SERVER_URL: "https://thanos-s3.${PUBLIC_DOMAIN}" +# MINIO_PROMETHEUS_URL: http://prometheus-prometheus.monitoring.svc.cluster.local:9090 +# MINIO_PROMETHEUS_JOB_ID: minio +# MINIO_BROWSER_REDIRECT_URL: "https://minio.${PUBLIC_DOMAIN}" +# MINIO_IDENTITY_OPENID_CONFIG_URL: "https://auth.${PUBLIC_DOMAIN}/.well-known/openid-configuration" +# MINIO_IDENTITY_OPENID_CLIENT_ID: thanos-minio +# MINIO_IDENTITY_OPENID_CLIENT_SECRET: "${THANOS_MINIO_OAUTH_CLIENT_SECRET}" +# MINIO_IDENTITY_OPENID_SCOPES: "openid,profile,email" +# MINIO_IDENTITY_OPENID_REDIRECT_URI: "https://thanos-s3.${PUBLIC_DOMAIN}/oauth_callback" +# envFrom: +# - secretRef: +# name: thanos-minio +# args: +# - server +# - /data +# - --address +# - :9000 +# - --console-address +# - :9001 +# service: +# main: +# enabled: true +# ports: +# http: +# enabled: true +# port: 9001 +# api: +# enabled: true +# port: 9000 +# ingress: +# main: +# enabled: true +# ingressClassName: nginx +# hosts: +# - host: &host "thanos-s3.${PUBLIC_DOMAIN}" +# paths: +# - path: / +# pathType: Prefix +# tls: +# - hosts: +# - *host +# podSecurityContext: +# runAsUser: "${SECURITY_CONTEXT_RUN_AS_USER}" +# runAsGroup: "${SECURITY_CONTEXT_RUN_AS_GROUP}" +# fsGroup: "${SECURITY_CONTEXT_FS_GROUP}" +# persistence: +# data: +# enabled: true +# existingClaim: appdata +# mountPath: /data +# subPath: thanos_s3 +# podAnnotations: +# secret.reloader.stakater.com/reload: thanos +# # resources: +# # requests: +# # cpu: 22m +# # memory: 1500M +# # limits: +# # # cpu: 49m +# # memory: 2000M diff --git a/wip/prometheus/kustomization.yaml b/wip/prometheus/kustomization.yaml new file mode 100644 index 00000000..6e4891d2 --- /dev/null +++ b/wip/prometheus/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - secret.sops.yaml + - helmrelease.yaml diff --git a/wip/prometheus/secret.sops.yaml b/wip/prometheus/secret.sops.yaml new file mode 100644 index 00000000..73d9310f --- /dev/null +++ b/wip/prometheus/secret.sops.yaml @@ -0,0 +1,31 @@ +# yamllint disable +apiVersion: v1 +kind: Secret +metadata: + name: thanos-minio + namespace: monitoring +stringData: + #ENC[AES256_GCM,data:ErDNK6rOSNEQdBI=,iv:MTLpOxQrMPqyCWTOsJrinSR0oKcqSryB+TJXIOZHf4E=,tag:28kZCN9sy5wEfsXnHkKzkQ==,type:comment] + MINIO_ROOT_USER: ENC[AES256_GCM,data:6xruH2c=,iv:FdsXJ3B6zktUekXjOXiwlJ4UUMvkwRlmOs7hkNttMXk=,tag:grn/AvfT5bWaVFXXnMTf/w==,type:str] + #ENC[AES256_GCM,data:Al7DDSI47WyPeMs=,iv:UGr/P66IeegYRl1kvVLx6gNN2muYOmngt+6IT3pIK1I=,tag:QbzZFQNkUbNimSuDrM0dQw==,type:comment] + MINIO_ROOT_PASSWORD: ENC[AES256_GCM,data:/yX0mfW43XDAZqrMnNBQXQYrdw9buoE0aK8owhF5sj8=,iv:HAzZZIaeR8e3Rn4S+ROlEpkOs7PEUcNEQ1l7zB5owSA=,tag:qHlIBwmdfDdena+t44GFmw==,type:str] +sops: + kms: [] + gcp_kms: [] + azure_kv: [] + hc_vault: [] + age: + - recipient: age148wprsnqjq8jughvywnzmvs8gffhrkendpr7g60q8u4rdsj4jvuqk7ltrs + enc: | + -----BEGIN AGE ENCRYPTED FILE----- + YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBwZ0h6aUpIR2NQSm9rZUc0 + YTBpRWdTdFpPbnU2RnpOTmxtNGRWdkR0OWc0CmVXaHNkWVczZW9UN09Ua3VtekNR + OXVJeHpaN1JQT3h1cXdaYUhCRmVwV0UKLS0tIFhpdjU3cG9OWTdNMmxTTHNCd2pM + VVdMVUw4akhBMEZLWFNzdDBhditUMkUKP1pDB28CvmQul8SdDBZ5CwPCXXk/4PF9 + EwmGaOdBhy568GcHdlCAohXOC60ih0pw06W5/YJwMb1RZJJcNJxyCA== + -----END AGE ENCRYPTED FILE----- + lastmodified: "2022-09-15T04:15:04Z" + mac: ENC[AES256_GCM,data:dyu0DvD7ojhU95Yyj3S2KschibaFIKZDCtzOQ0uZ3AQPX7abc6XTvlx+AbdLfv0Q0h/oWsHXIfTteevEW5zHiXt1tZj6Bw5wjPqUvtRZYXlkoyZCxRnWs7qXfXJwRnJKxs7bjEzKk+PU82gjsQIKC8uxi9s8UPDtiAbXv21T9r8=,iv:Mu5mtpCYWqj4yIeNw5JQYIr3oBzluP6UHV0vXLMrFJw=,tag:x6TJ2uGcomX154U3m4qH3w==,type:str] + pgp: [] + encrypted_regex: ^(data|stringData)$ + version: 3.7.3