diff --git a/config/hosts/athame/nixos.nix b/config/hosts/athame/nixos.nix index 33ad1ed3..020fdf7e 100644 --- a/config/hosts/athame/nixos.nix +++ b/config/hosts/athame/nixos.nix @@ -11,18 +11,13 @@ with lib; users.kat.services.weechat services.filehost services.gitea - services.grafana services.logrotate services.loki services.mail services.matrix services.murmur - services.netdata services.nginx - services.node-exporter services.postgres - services.prometheus - services.promtail services.radicale services.restic services.syncplay @@ -34,6 +29,10 @@ with lib; services.znc ]; + kw.monitoring = { + server.enable = true; + }; + # Terraform deploy.tf = { diff --git a/config/hosts/beltane/nixos.nix b/config/hosts/beltane/nixos.nix index 2c2d908f..9170e39f 100644 --- a/config/hosts/beltane/nixos.nix +++ b/config/hosts/beltane/nixos.nix @@ -12,12 +12,9 @@ with lib; services.fusionpbx services.jellyfin services.kattv-ingest - services.promtail services.postgres - services.netdata services.nfs services.nginx - services.node-exporter services.transmission services.tvheadend services.zfs diff --git a/config/hosts/samhain/nixos.nix b/config/hosts/samhain/nixos.nix index 4fde1b2d..5b586c29 100644 --- a/config/hosts/samhain/nixos.nix +++ b/config/hosts/samhain/nixos.nix @@ -15,11 +15,8 @@ in profiles.gui profiles.vfio users.kat.guiFull - services.netdata services.nginx services.katsplash - services.node-exporter - services.promtail services.restic services.zfs ]; diff --git a/config/hosts/yule/nixos.nix b/config/hosts/yule/nixos.nix index ba8f22df..af8a798c 100644 --- a/config/hosts/yule/nixos.nix +++ b/config/hosts/yule/nixos.nix @@ -9,10 +9,7 @@ with lib; profiles.hardware.v330-14arr profiles.gui users.kat.guiFull - services.netdata services.nginx - services.node-exporter - services.promtail services.restic services.zfs ]; diff --git a/config/modules/nixos/default.nix b/config/modules/nixos/default.nix index ad9e4a80..7664658f 100644 --- a/config/modules/nixos/default.nix +++ b/config/modules/nixos/default.nix @@ -7,6 +7,7 @@ (import (sources.katexprs + "/modules")).nixos (import (sources.impermanence + "/nixos.nix")) ./deploy.nix + ./monitoring.nix ./dyndns.nix ./secrets.nix (sources.tf-nix + "/modules/nixos/secrets.nix") diff --git a/config/modules/nixos/monitoring.nix b/config/modules/nixos/monitoring.nix new file mode 100644 index 00000000..61d82971 --- /dev/null +++ b/config/modules/nixos/monitoring.nix @@ -0,0 +1,220 @@ +{ config, lib, meta, kw, tf, ... }: with lib; + +let + cfg = config.kw.monitoring; + prom_configs = + (mapAttrs (hostName: host: host.services.prometheus.exporters.node) + (filterAttrs + (_: host: host.services.prometheus.exporters.node.enable) + meta.network.nodes)); + nd_configs = (mapAttrs (hostName: host: host.services.netdata) + (filterAttrs (_: host: host.services.netdata.enable) meta.network.nodes)); +in { + options.kw.monitoring = { + server = { + enable = mkEnableOption "Monitoring Stack Server"; + loki = mkEnableOption "Loki"; + domainPrefix = mkOption { + type = types.nullOr types.str; + }; + }; + client = { + enable = mkEnableOption "Monitoring Stack Client" // { + default = config.network.yggdrasil.enable && config.services.nginx.enable; + }; + }; + }; + config = mkMerge [ + ({ + kw.monitoring.server.domainPrefix = ".${config.network.addresses.yggdrasil.prefix}.${config.network.dns.domain}:19999"; + }) + (mkIf cfg.server.loki { + network.firewall.private.tcp.ports = [ 3100 ]; + services.loki = { + enable = true; + configuration = { + auth_enabled = false; + chunk_store_config = { max_look_back_period = "0s"; }; + ingester = { + chunk_idle_period = "1h"; + chunk_retain_period = "30s"; + chunk_target_size = 1048576; + lifecycler = { + address = "0.0.0.0"; + final_sleep = "0s"; + ring = { + kvstore = { store = "inmemory"; }; + replication_factor = 1; + }; + }; + max_chunk_age = "1h"; + max_transfer_retries = 0; + }; + limits_config = { + reject_old_samples = true; + reject_old_samples_max_age = "168h"; + }; + schema_config = { + configs = [{ + from = "2020-10-24"; + index = { + period = "24h"; + prefix = "index_"; + }; + object_store = "filesystem"; + schema = "v11"; + store = "boltdb-shipper"; + }]; + }; + compactor = { + working_directory = "/tmp/loki-compactor-boltdb"; + shared_store = "filesystem"; + }; + server = { http_listen_port = 3100; }; + storage_config = { + boltdb_shipper = { + active_index_directory = "/var/lib/loki/boltdb-shipper-active"; + cache_location = "/var/lib/loki/boltdb-shipper-cache"; + cache_ttl = "24h"; + shared_store = "filesystem"; + }; + filesystem = { directory = "/var/lib/loki/chunks"; }; + }; + table_manager = { + retention_deletes_enabled = false; + retention_period = "0s"; + }; + }; + }; + }) + (mkIf cfg.server.enable { + kw.secrets = [ + "grafana-admin-pass" + ]; + + secrets.files.grafana-admin-pass = { + text = "${tf.variables.grafana-admin-pass.ref}"; + owner = "grafana"; + group = "grafana"; + }; + + services.grafana.security.adminPasswordFile = + config.secrets.files.grafana-admin-pass.path; + + services.postgresql = { + ensureDatabases = [ "grafana" ]; + ensureUsers = [{ + name = "grafana"; + ensurePermissions."DATABASE grafana" = "ALL PRIVILEGES"; + }]; + }; + + services.grafana = { + enable = true; + port = 3001; + domain = "graph.${config.network.dns.domain}"; + rootUrl = "https://graph.${config.network.dns.domain}/"; + database = { + type = "postgres"; + host = "/run/postgresql/"; + user = "grafana"; + name = "grafana"; + }; + }; + + services.nginx.virtualHosts."graph.${config.network.dns.domain}" = { + enableACME = true; + forceSSL = true; + locations = { "/".proxyPass = "http://127.0.0.1:3001"; }; + }; + + deploy.tf.dns.records.services_grafana = { + tld = config.network.dns.tld; + domain = "graph"; + cname.target = "${config.networking.hostName}.${config.network.dns.tld}"; + }; + + services.prometheus = { + enable = true; + scrapeConfigs = mapAttrsToList + (hostName: prom: { + job_name = "${hostName}-nd"; + metrics_path = "/api/v1/allmetrics"; + honor_labels = true; + params = { format = [ "prometheus" ]; }; + static_configs = singleton { targets = singleton "${hostName}.${cfg.server.domainPrefix}:19999"; }; + }) + nd_configs ++ mapAttrsToList + (hostName: prom: { + job_name = hostName; + static_configs = singleton { + targets = [ "${hostName}.${cfg.server.domainPrefix}:${toString prom.port}" ]; + }; + }) + prom_configs; + }; + }) + (mkIf cfg.client.enable { + network.firewall.private.tcp.ports = [ 9002 19999 ]; + + services.netdata.enable = true; + + services.nginx.virtualHosts = kw.virtualHostGen { + networkFilter = singleton "yggdrasil"; + block = { + locations."/netdata" = { + proxyPass = "http://[::1]:19999/"; + }; + }; + }; + + systemd.services.promtail = { + enable = any id (attrValues (mapAttrs (node: conf: conf.kw.monitoring.server.loki ) meta.network.nodes)); + description = "Promtail service for Loki"; + wantedBy = [ "multi-user.target" ]; + wants = [ "yggdrassil.service" ]; + + serviceConfig = mkIf (any id (attrValues (mapAttrs (node: conf: conf.kw.monitoring.server.loki ) meta.network.nodes))) { + ExecStart = let + serverNode = head (attrNames (filterAttrs (node: enabled: enabled == true) (mapAttrs (node: conf: conf.kw.monitoring.server.loki) meta.network.nodes))); + promtailConfig = pkgs.writeText "prom-config.json" (builtins.toJSON { + clients = + [{ url = "http://${serverNode}${cfg.server.domainPrefix}:3100/loki/api/v1/push"; }]; + positions = { filename = "/tmp/positions.yaml"; }; + scrape_configs = [{ + job_name = "journal"; + journal = { + labels = { + host = config.networking.hostName; + job = "systemd-journal"; + }; + max_age = "12h"; + }; + relabel_configs = [{ + source_labels = [ "__journal__systemd_unit" ]; + target_label = "unit"; + }]; + }]; + server = { + grpc_listen_port = 0; + http_listen_port = 28183; + }; + }); + in '' + ${pkgs.grafana-loki}/bin/promtail --config.file ${promtailConfig} + ''; + }; + }; + + services.prometheus = { + exporters = { + node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + }; + }; + }; + }) + ]; + } diff --git a/config/services/grafana/default.nix b/config/services/grafana/default.nix deleted file mode 100644 index a98a45d9..00000000 --- a/config/services/grafana/default.nix +++ /dev/null @@ -1,49 +0,0 @@ -{ config, tf, ... }: - -{ - kw.secrets = [ - "grafana-admin-pass" - ]; - - secrets.files.grafana-admin-pass = { - text = "${tf.variables.grafana-admin-pass.ref}"; - owner = "grafana"; - group = "grafana"; - }; - - services.grafana.security.adminPasswordFile = - config.secrets.files.grafana-admin-pass.path; - - services.postgresql = { - ensureDatabases = [ "grafana" ]; - ensureUsers = [{ - name = "grafana"; - ensurePermissions."DATABASE grafana" = "ALL PRIVILEGES"; - }]; - }; - - services.grafana = { - enable = true; - port = 3001; - domain = "graph.${config.network.dns.domain}"; - rootUrl = "https://graph.${config.network.dns.domain}/"; - database = { - type = "postgres"; - host = "/run/postgresql/"; - user = "grafana"; - name = "grafana"; - }; - }; - - services.nginx.virtualHosts."graph.${config.network.dns.domain}" = { - enableACME = true; - forceSSL = true; - locations = { "/".proxyPass = "http://127.0.0.1:3001"; }; - }; - - deploy.tf.dns.records.services_grafana = { - tld = config.network.dns.tld; - domain = "graph"; - cname.target = "${config.networking.hostName}.${config.network.dns.tld}"; - }; -} diff --git a/config/services/netdata/default.nix b/config/services/netdata/default.nix deleted file mode 100644 index 9d8b8c90..00000000 --- a/config/services/netdata/default.nix +++ /dev/null @@ -1,15 +0,0 @@ -{ config, kw, ... }: - -{ - network.firewall.private.tcp.ports = [ 19999 ]; - - services.netdata = { enable = true; }; - - services.nginx.virtualHosts = kw.virtualHostGen { - block = { - locations."/netdata" = { - proxyPass = "http://[::1]:19999/"; - }; - }; - }; -} diff --git a/config/services/node-exporter/default.nix b/config/services/node-exporter/default.nix deleted file mode 100644 index d03264a4..00000000 --- a/config/services/node-exporter/default.nix +++ /dev/null @@ -1,15 +0,0 @@ -{ config, ... }: - -{ - network.firewall.private.tcp.ports = [ 9002 ]; - - services.prometheus = { - exporters = { - node = { - enable = true; - enabledCollectors = [ "systemd" ]; - port = 9002; - }; - }; - }; -} diff --git a/config/services/prometheus/default.nix b/config/services/prometheus/default.nix deleted file mode 100644 index 294bc528..00000000 --- a/config/services/prometheus/default.nix +++ /dev/null @@ -1,45 +0,0 @@ -{ config, hosts, lib, ... }: - -with lib; - -let - prom_configs = - (mapAttrs (hostName: host: host.config.services.prometheus.exporters.node) - (filterAttrs - (_: host: host.config.services.prometheus.exporters.node.enable) - hosts)); - nd_configs = (mapAttrs (hostName: host: host.config.services.netdata) - (filterAttrs (_: host: host.config.services.netdata.enable) hosts)); -in -{ - services.prometheus = { - enable = true; - scrapeConfigs = [ - { - job_name = "boline"; - static_configs = [{ targets = [ "boline.${config.network.addresses.yggdrasil.prefix}.${config.network.dns.domain}:8002" ]; }]; - } - { - job_name = "samhain-vm"; - metrics_path = "/metrics"; - static_configs = [{ targets = [ "samhain.${config.network.addresses.yggdrasil.prefix}.${config.network.dns.domain}:10445" ]; }]; - } - ] ++ mapAttrsToList - (hostName: prom: { - job_name = "${hostName}-nd"; - metrics_path = "/api/v1/allmetrics"; - honor_labels = true; - params = { format = [ "prometheus" ]; }; - static_configs = [{ targets = [ "${hostName}.${config.network.addresses.yggdrasil.prefix}.${config.network.dns.domain}:19999" ]; }]; - }) - nd_configs ++ mapAttrsToList - (hostName: prom: { - job_name = hostName; - static_configs = [{ - targets = [ "${hostName}.${config.network.addresses.yggdrasil.prefix}.${config.network.dns.domain}:${toString prom.port}" ]; - }]; - }) - prom_configs; - }; -} - diff --git a/config/services/promtail/default.nix b/config/services/promtail/default.nix deleted file mode 100644 index 0ae78f02..00000000 --- a/config/services/promtail/default.nix +++ /dev/null @@ -1,43 +0,0 @@ -{ config, lib, pkgs, ... }: - -with lib; - -let - promtail_config = pkgs.writeText "prom-config.json" (builtins.toJSON { - clients = - [{ url = "http://${config.network.addresses.yggdrasil.domain}:3100/loki/api/v1/push"; }]; - positions = { filename = "/tmp/positions.yaml"; }; - scrape_configs = [{ - job_name = "journal"; - journal = { - labels = { - host = config.networking.hostName; - job = "systemd-journal"; - }; - max_age = "12h"; - }; - relabel_configs = [{ - source_labels = [ "__journal__systemd_unit" ]; - target_label = "unit"; - }]; - }]; - server = { - grpc_listen_port = 0; - http_listen_port = 28183; - }; - }); -in -{ - systemd.services.promtail = { - enable = false; - description = "Promtail service for Loki"; - wantedBy = [ "multi-user.target" ]; - wants = [ "yggdrassil.service" ]; - - serviceConfig = { - ExecStart = '' - ${pkgs.grafana-loki}/bin/promtail --config.file ${promtail_config} - ''; - }; - }; -}