feat(monitoring): gatus, grafana alerting to discord

This commit is contained in:
Kat Inskip 2024-05-31 14:16:21 -07:00
parent cee397d774
commit 79ba879e6d
Signed by: kat
GPG key ID: 465E64DECEA8CF0F
8 changed files with 769 additions and 0 deletions

View file

@ -0,0 +1,86 @@
{ config, ... }: {
sops.secrets.gatus_environment_file = {
sopsFile = ../secrets/gatus.yaml;
};
services.gatus = {
enable = true;
environmentFile = config.sops.secrets.gatus_environment_file.path;
settings = let
# Common interval for refreshing all basic HTTP endpoints
gatusCommonHTTPInterval = "30s";
# Shared between all endpoints
commonAlertingConfig = {
alerts = [
{
type = "discord";
send-on-resolved = true;
description = "Healthcheck failed.";
failure-threshold = 1;
success-threshold = 3;
}
];
};
# Used wherever a basic HTTP 200 up-check is required.
basicHTTPCheck = url: {
inherit url;
interval = gatusCommonHTTPInterval;
conditions = [
"[STATUS] == 200"
];
};
in {
# Environment variables are pulled in to be usable within the config.
alerting.discord = {
webhook-url = "\${DISCORD_WEBHOOK_URL}";
};
# Endpoint configuration
endpoints = {
# Home Assistant uses the common alerting config, combined with a basic HTTP check for its domain.
"Home Assistant" = commonAlertingConfig // (basicHTTPCheck "https://home.local.gensokyo.zone");
};
# The actual status page configuration
ui = {
title = "Gensokyo Zone Status";
description = "The status of the various girls in Gensokyo!";
header = "Gensokyo Zone Status";
};
# Prometheus metrics...!
metrics = true;
# We could've used Postgres, but it seems like less moving parts if our status page
# doesn't depend upon another service, internal or external, other than what gets it to the internet.
storage = {
type = "sqlite";
path = "/var/lib/gatus/data.db";
};
# Bind on the local address for now, on the port after the last one allocated for the monitoring project.
web = {
address = "10.1.1.38";
port = 9095;
};
};
};
/* services.nginx.virtualHosts."status.gensokyo.zone" = let
gatusWebCfg = config.services.gatus.settings.web;
upstream = "${gatusWebCfg.address}:${toString gatusWebCfg.port}";
in {
forceSSL = true;
useACMEHost = serverName;
kTLS = true;
locations."/" = {
proxyPass = "http://${upstream}";
proxyWebsockets = true;
};
}; */
networking.firewall.interfaces.local.allowedTCPPorts = [
config.services.gatus.settings.web.port
];
}

View file

@ -0,0 +1,26 @@
{ config, ... }: {
sops.secrets.grafana_discord_webhook_url = {
sopsFile = ../secrets/grafana.yaml;
owner = "grafana";
};
services.grafana.provision.alerting.contactPoints.settings = {
apiVersion = 1;
contactPoints = [
{
orgId = 1;
name = "Discord";
receivers = [
{
uid = "discord_alerting";
type = "discord";
disableResolveMessage = false;
settings = {
url = "$__file{${config.sops.secrets.grafana_discord_webhook_url.path}}";
#avatar_url = "";
};
}
];
}
];
};
}

View file

@ -0,0 +1,36 @@
{
config,
lib,
...
}: let
inherit (lib.modules) mkIf mkMerge;
inherit (config.services) grafana loki prometheus;
in {
services = {
grafana = {
enable = true;
settings = {
"auth.anonymous" = {
enabled = true;
# org_name = domain;
# org_role = "Viewer"? "Editor"?
org_role = "Admin";
};
metrics = {
enabled = true;
disable_total_stats = true;
};
};
};
loki.enable = true;
prometheus.enable = true;
};
networking.firewall.interfaces.lan.allowedTCPPorts = mkMerge [
(mkIf grafana.enable [grafana.settings.server.http_port])
(mkIf loki.enable [
loki.configuration.server.http_listen_port
(mkIf (loki.configuration.server.grpc_listen_port != 0) loki.configuration.server.grpc_listen_port)
])
(mkIf prometheus.enable [prometheus.port])
];
}