web/app/api/status/route.tsblame
View source
27902ea1import { NextResponse } from "next/server";
27902ea2
27902ea3interface ServiceCheck {
27902ea4 name: string;
27902ea5 status: "operational" | "degraded" | "down";
27902ea6 latency: number | null;
27902ea7 detail?: string;
27902ea8}
27902ea9
27902ea10interface ContainerStats {
27902ea11 name: string;
27902ea12 status: string;
27902ea13 cpu_percent: number;
27902ea14 mem_usage_mb: number;
27902ea15 mem_limit_mb: number;
27902ea16 mem_percent: number;
27902ea17 net_rx_mb: number;
27902ea18 net_tx_mb: number;
27902ea19 uptime: string;
27902ea20}
27902ea21
27902ea22interface SystemMetrics {
27902ea23 cpu_percent: number;
27902ea24 mem_total_mb: number;
27902ea25 mem_used_mb: number;
27902ea26 mem_percent: number;
27902ea27 disk_total_gb: number;
27902ea28 disk_used_gb: number;
27902ea29 disk_percent: number;
27902ea30 load_avg: number[];
27902ea31 uptime: string;
27902ea32}
27902ea33
27902ea34async function checkService(
27902ea35 name: string,
27902ea36 url: string,
27902ea37 timeout = 5000
27902ea38): Promise<ServiceCheck> {
27902ea39 const start = Date.now();
27902ea40 try {
27902ea41 const controller = new AbortController();
27902ea42 const timer = setTimeout(() => controller.abort(), timeout);
27902ea43 const res = await fetch(url, { signal: controller.signal, cache: "no-store" });
27902ea44 clearTimeout(timer);
27902ea45 const latency = Date.now() - start;
27902ea46 if (res.ok || res.status === 401) {
27902ea47 return { name, status: "operational", latency };
27902ea48 }
27902ea49 return { name, status: "degraded", latency, detail: `HTTP ${res.status}` };
27902ea50 } catch (err: unknown) {
27902ea51 return {
27902ea52 name,
27902ea53 status: "down",
27902ea54 latency: null,
27902ea55 detail: err instanceof Error ? err.message : "Connection failed",
27902ea56 };
27902ea57 }
27902ea58}
27902ea59
27902ea60async function getDockerStats(): Promise<ContainerStats[]> {
27902ea61 try {
27902ea62 // Docker socket — works when running inside Docker on the host
27902ea63 const controller = new AbortController();
27902ea64 const timer = setTimeout(() => controller.abort(), 5000);
27902ea65 const res = await fetch("http://localhost:2375/containers/json?all=false", {
27902ea66 signal: controller.signal,
27902ea67 cache: "no-store",
27902ea68 });
27902ea69 clearTimeout(timer);
27902ea70 if (!res.ok) return [];
27902ea71
27902ea72 const containers: any[] = await res.json();
27902ea73 const stats: ContainerStats[] = [];
27902ea74
27902ea75 for (const c of containers) {
27902ea76 const name = (c.Names?.[0] ?? "").replace(/^\//, "").replace(/^grove-/, "").replace(/-1$/, "");
27902ea77 const upSince = c.Created ? new Date(c.Created * 1000) : null;
27902ea78 const uptimeMs = upSince ? Date.now() - upSince.getTime() : 0;
27902ea79
27902ea80 try {
27902ea81 const sRes = await fetch(`http://localhost:2375/containers/${c.Id}/stats?stream=false`, {
27902ea82 cache: "no-store",
27902ea83 });
27902ea84 if (sRes.ok) {
27902ea85 const s = await sRes.json();
27902ea86 const cpuDelta = s.cpu_stats.cpu_usage.total_usage - s.precpu_stats.cpu_usage.total_usage;
27902ea87 const sysDelta = s.cpu_stats.system_cpu_usage - s.precpu_stats.system_cpu_usage;
27902ea88 const cpuCount = s.cpu_stats.online_cpus || 1;
27902ea89 const cpuPercent = sysDelta > 0 ? (cpuDelta / sysDelta) * cpuCount * 100 : 0;
27902ea90
27902ea91 const memUsage = s.memory_stats.usage - (s.memory_stats.stats?.cache || 0);
27902ea92 const memLimit = s.memory_stats.limit;
27902ea93
27902ea94 const netRx = Object.values(s.networks || {}).reduce((a: number, n: any) => a + (n.rx_bytes || 0), 0);
27902ea95 const netTx = Object.values(s.networks || {}).reduce((a: number, n: any) => a + (n.tx_bytes || 0), 0);
27902ea96
27902ea97 stats.push({
27902ea98 name,
27902ea99 status: c.State,
27902ea100 cpu_percent: Math.round(cpuPercent * 100) / 100,
27902ea101 mem_usage_mb: Math.round(memUsage / 1024 / 1024),
27902ea102 mem_limit_mb: Math.round(memLimit / 1024 / 1024),
27902ea103 mem_percent: Math.round((memUsage / memLimit) * 10000) / 100,
27902ea104 net_rx_mb: Math.round(netRx / 1024 / 1024 * 100) / 100,
27902ea105 net_tx_mb: Math.round(netTx / 1024 / 1024 * 100) / 100,
27902ea106 uptime: formatUptime(uptimeMs),
27902ea107 });
27902ea108 continue;
27902ea109 }
27902ea110 } catch {}
27902ea111
27902ea112 stats.push({
27902ea113 name,
27902ea114 status: c.State,
27902ea115 cpu_percent: 0,
27902ea116 mem_usage_mb: 0,
27902ea117 mem_limit_mb: 0,
27902ea118 mem_percent: 0,
27902ea119 net_rx_mb: 0,
27902ea120 net_tx_mb: 0,
27902ea121 uptime: formatUptime(uptimeMs),
27902ea122 });
27902ea123 }
27902ea124
27902ea125 return stats;
27902ea126 } catch {
27902ea127 return [];
27902ea128 }
27902ea129}
27902ea130
27902ea131async function getSystemMetrics(): Promise<SystemMetrics | null> {
27902ea132 try {
27902ea133 // Use /proc on Linux hosts
27902ea134 const [memInfo, loadAvg, uptime, diskStat] = await Promise.allSettled([
27902ea135 fetchFile("/proc/meminfo"),
27902ea136 fetchFile("/proc/loadavg"),
27902ea137 fetchFile("/proc/uptime"),
27902ea138 fetchFile("/proc/diskstats"),
27902ea139 ]);
27902ea140
27902ea141 let cpuPercent = 0;
27902ea142 let memTotal = 0;
27902ea143 let memUsed = 0;
27902ea144 let memPercent = 0;
27902ea145 let loads: number[] = [];
27902ea146 let uptimeStr = "";
27902ea147
27902ea148 if (memInfo.status === "fulfilled" && memInfo.value) {
27902ea149 const lines = memInfo.value.split("\n");
27902ea150 const get = (key: string) => {
27902ea151 const line = lines.find((l: string) => l.startsWith(key));
27902ea152 return line ? parseInt(line.split(/\s+/)[1]) : 0;
27902ea153 };
27902ea154 memTotal = Math.round(get("MemTotal:") / 1024);
27902ea155 const memFree = get("MemFree:");
27902ea156 const buffers = get("Buffers:");
27902ea157 const cached = get("Cached:");
27902ea158 const available = get("MemAvailable:");
27902ea159 memUsed = Math.round((get("MemTotal:") - available) / 1024);
27902ea160 memPercent = Math.round((memUsed / memTotal) * 100);
27902ea161 }
27902ea162
27902ea163 if (loadAvg.status === "fulfilled" && loadAvg.value) {
27902ea164 const parts = loadAvg.value.trim().split(/\s+/);
27902ea165 loads = parts.slice(0, 3).map(Number);
27902ea166 // rough CPU% from 1-min load avg
27902ea167 const { cpus } = await import("os");
27902ea168 const numCpus = cpus().length;
27902ea169 cpuPercent = Math.min(100, Math.round((loads[0] / numCpus) * 100));
27902ea170 }
27902ea171
27902ea172 if (uptime.status === "fulfilled" && uptime.value) {
27902ea173 const secs = parseFloat(uptime.value.split(" ")[0]);
27902ea174 uptimeStr = formatUptime(secs * 1000);
27902ea175 }
27902ea176
27902ea177 // Disk usage via statfs-like approach (use os module)
27902ea178 let diskTotal = 0;
27902ea179 let diskUsed = 0;
27902ea180 let diskPercent = 0;
27902ea181 try {
27902ea182 const { execSync } = await import("child_process");
27902ea183 const df = execSync("df -BG / 2>/dev/null || df -g / 2>/dev/null", { encoding: "utf-8" });
27902ea184 const line = df.trim().split("\n")[1];
27902ea185 if (line) {
27902ea186 const parts = line.split(/\s+/);
27902ea187 diskTotal = parseInt(parts[1]);
27902ea188 diskUsed = parseInt(parts[2]);
27902ea189 diskPercent = Math.round((diskUsed / diskTotal) * 100);
27902ea190 }
27902ea191 } catch {}
27902ea192
27902ea193 return {
27902ea194 cpu_percent: cpuPercent,
27902ea195 mem_total_mb: memTotal,
27902ea196 mem_used_mb: memUsed,
27902ea197 mem_percent: memPercent,
27902ea198 disk_total_gb: diskTotal,
27902ea199 disk_used_gb: diskUsed,
27902ea200 disk_percent: diskPercent,
27902ea201 load_avg: loads,
27902ea202 uptime: uptimeStr,
27902ea203 };
27902ea204 } catch {
27902ea205 return null;
27902ea206 }
27902ea207}
27902ea208
27902ea209async function fetchFile(path: string): Promise<string | null> {
27902ea210 try {
27902ea211 const { readFileSync } = await import("fs");
27902ea212 return readFileSync(path, "utf-8");
27902ea213 } catch {
27902ea214 return null;
27902ea215 }
27902ea216}
27902ea217
27902ea218function formatUptime(ms: number): string {
27902ea219 const secs = Math.floor(ms / 1000);
27902ea220 const days = Math.floor(secs / 86400);
27902ea221 const hours = Math.floor((secs % 86400) / 3600);
27902ea222 const mins = Math.floor((secs % 3600) / 60);
27902ea223 if (days > 0) return `${days}d ${hours}h`;
27902ea224 if (hours > 0) return `${hours}h ${mins}m`;
27902ea225 return `${mins}m`;
27902ea226}
27902ea227
27902ea228export async function GET() {
27902ea229 const [checks, containers, system] = await Promise.all([
27902ea230 Promise.all([
27902ea231 checkService("Web", "http://grove-web:3000/"),
27902ea232 checkService("API", "http://grove-api:4000/api/health"),
27902ea233 checkService("Hub API", "http://hub-api:4000/api/auth/me"),
27902ea234 checkService("EdenAPI", "http://mononoke-slapi:8443/health_check"),
27902ea235 checkService("Git", "http://mononoke-git:8080/health_check"),
27902ea236 checkService("Bridge", "http://grove-bridge:8443/health_check"),
27902ea237 ]),
27902ea238 getDockerStats(),
27902ea239 getSystemMetrics(),
27902ea240 ]);
27902ea241
27902ea242 const overall = checks.every((c) => c.status === "operational")
27902ea243 ? "operational"
27902ea244 : checks.some((c) => c.status === "down")
27902ea245 ? "major_outage"
27902ea246 : "degraded";
27902ea247
27902ea248 return NextResponse.json({
27902ea249 status: overall,
27902ea250 services: checks,
27902ea251 containers,
27902ea252 system,
27902ea253 checked_at: new Date().toISOString(),
27902ea254 });
27902ea255}