| 27902ea | | | 1 | import { NextResponse } from "next/server"; |
| 27902ea | | | 2 | |
| 27902ea | | | 3 | interface ServiceCheck { |
| 27902ea | | | 4 | name: string; |
| 27902ea | | | 5 | status: "operational" | "degraded" | "down"; |
| 27902ea | | | 6 | latency: number | null; |
| 27902ea | | | 7 | detail?: string; |
| 27902ea | | | 8 | } |
| 27902ea | | | 9 | |
| 27902ea | | | 10 | interface ContainerStats { |
| 27902ea | | | 11 | name: string; |
| 27902ea | | | 12 | status: string; |
| 27902ea | | | 13 | cpu_percent: number; |
| 27902ea | | | 14 | mem_usage_mb: number; |
| 27902ea | | | 15 | mem_limit_mb: number; |
| 27902ea | | | 16 | mem_percent: number; |
| 27902ea | | | 17 | net_rx_mb: number; |
| 27902ea | | | 18 | net_tx_mb: number; |
| 27902ea | | | 19 | uptime: string; |
| 27902ea | | | 20 | } |
| 27902ea | | | 21 | |
| 27902ea | | | 22 | interface SystemMetrics { |
| 27902ea | | | 23 | cpu_percent: number; |
| 27902ea | | | 24 | mem_total_mb: number; |
| 27902ea | | | 25 | mem_used_mb: number; |
| 27902ea | | | 26 | mem_percent: number; |
| 27902ea | | | 27 | disk_total_gb: number; |
| 27902ea | | | 28 | disk_used_gb: number; |
| 27902ea | | | 29 | disk_percent: number; |
| 27902ea | | | 30 | load_avg: number[]; |
| 27902ea | | | 31 | uptime: string; |
| 27902ea | | | 32 | } |
| 27902ea | | | 33 | |
| 27902ea | | | 34 | async function checkService( |
| 27902ea | | | 35 | name: string, |
| 27902ea | | | 36 | url: string, |
| 27902ea | | | 37 | timeout = 5000 |
| 27902ea | | | 38 | ): Promise<ServiceCheck> { |
| 27902ea | | | 39 | const start = Date.now(); |
| 27902ea | | | 40 | try { |
| 27902ea | | | 41 | const controller = new AbortController(); |
| 27902ea | | | 42 | const timer = setTimeout(() => controller.abort(), timeout); |
| 27902ea | | | 43 | const res = await fetch(url, { signal: controller.signal, cache: "no-store" }); |
| 27902ea | | | 44 | clearTimeout(timer); |
| 27902ea | | | 45 | const latency = Date.now() - start; |
| 27902ea | | | 46 | if (res.ok || res.status === 401) { |
| 27902ea | | | 47 | return { name, status: "operational", latency }; |
| 27902ea | | | 48 | } |
| 27902ea | | | 49 | return { name, status: "degraded", latency, detail: `HTTP ${res.status}` }; |
| 27902ea | | | 50 | } catch (err: unknown) { |
| 27902ea | | | 51 | return { |
| 27902ea | | | 52 | name, |
| 27902ea | | | 53 | status: "down", |
| 27902ea | | | 54 | latency: null, |
| 27902ea | | | 55 | detail: err instanceof Error ? err.message : "Connection failed", |
| 27902ea | | | 56 | }; |
| 27902ea | | | 57 | } |
| 27902ea | | | 58 | } |
| 27902ea | | | 59 | |
| 27902ea | | | 60 | async function getDockerStats(): Promise<ContainerStats[]> { |
| 27902ea | | | 61 | try { |
| 27902ea | | | 62 | // Docker socket — works when running inside Docker on the host |
| 27902ea | | | 63 | const controller = new AbortController(); |
| 27902ea | | | 64 | const timer = setTimeout(() => controller.abort(), 5000); |
| 27902ea | | | 65 | const res = await fetch("http://localhost:2375/containers/json?all=false", { |
| 27902ea | | | 66 | signal: controller.signal, |
| 27902ea | | | 67 | cache: "no-store", |
| 27902ea | | | 68 | }); |
| 27902ea | | | 69 | clearTimeout(timer); |
| 27902ea | | | 70 | if (!res.ok) return []; |
| 27902ea | | | 71 | |
| 27902ea | | | 72 | const containers: any[] = await res.json(); |
| 27902ea | | | 73 | const stats: ContainerStats[] = []; |
| 27902ea | | | 74 | |
| 27902ea | | | 75 | for (const c of containers) { |
| 27902ea | | | 76 | const name = (c.Names?.[0] ?? "").replace(/^\//, "").replace(/^grove-/, "").replace(/-1$/, ""); |
| 27902ea | | | 77 | const upSince = c.Created ? new Date(c.Created * 1000) : null; |
| 27902ea | | | 78 | const uptimeMs = upSince ? Date.now() - upSince.getTime() : 0; |
| 27902ea | | | 79 | |
| 27902ea | | | 80 | try { |
| 27902ea | | | 81 | const sRes = await fetch(`http://localhost:2375/containers/${c.Id}/stats?stream=false`, { |
| 27902ea | | | 82 | cache: "no-store", |
| 27902ea | | | 83 | }); |
| 27902ea | | | 84 | if (sRes.ok) { |
| 27902ea | | | 85 | const s = await sRes.json(); |
| 27902ea | | | 86 | const cpuDelta = s.cpu_stats.cpu_usage.total_usage - s.precpu_stats.cpu_usage.total_usage; |
| 27902ea | | | 87 | const sysDelta = s.cpu_stats.system_cpu_usage - s.precpu_stats.system_cpu_usage; |
| 27902ea | | | 88 | const cpuCount = s.cpu_stats.online_cpus || 1; |
| 27902ea | | | 89 | const cpuPercent = sysDelta > 0 ? (cpuDelta / sysDelta) * cpuCount * 100 : 0; |
| 27902ea | | | 90 | |
| 27902ea | | | 91 | const memUsage = s.memory_stats.usage - (s.memory_stats.stats?.cache || 0); |
| 27902ea | | | 92 | const memLimit = s.memory_stats.limit; |
| 27902ea | | | 93 | |
| 27902ea | | | 94 | const netRx = Object.values(s.networks || {}).reduce((a: number, n: any) => a + (n.rx_bytes || 0), 0); |
| 27902ea | | | 95 | const netTx = Object.values(s.networks || {}).reduce((a: number, n: any) => a + (n.tx_bytes || 0), 0); |
| 27902ea | | | 96 | |
| 27902ea | | | 97 | stats.push({ |
| 27902ea | | | 98 | name, |
| 27902ea | | | 99 | status: c.State, |
| 27902ea | | | 100 | cpu_percent: Math.round(cpuPercent * 100) / 100, |
| 27902ea | | | 101 | mem_usage_mb: Math.round(memUsage / 1024 / 1024), |
| 27902ea | | | 102 | mem_limit_mb: Math.round(memLimit / 1024 / 1024), |
| 27902ea | | | 103 | mem_percent: Math.round((memUsage / memLimit) * 10000) / 100, |
| 27902ea | | | 104 | net_rx_mb: Math.round(netRx / 1024 / 1024 * 100) / 100, |
| 27902ea | | | 105 | net_tx_mb: Math.round(netTx / 1024 / 1024 * 100) / 100, |
| 27902ea | | | 106 | uptime: formatUptime(uptimeMs), |
| 27902ea | | | 107 | }); |
| 27902ea | | | 108 | continue; |
| 27902ea | | | 109 | } |
| 27902ea | | | 110 | } catch {} |
| 27902ea | | | 111 | |
| 27902ea | | | 112 | stats.push({ |
| 27902ea | | | 113 | name, |
| 27902ea | | | 114 | status: c.State, |
| 27902ea | | | 115 | cpu_percent: 0, |
| 27902ea | | | 116 | mem_usage_mb: 0, |
| 27902ea | | | 117 | mem_limit_mb: 0, |
| 27902ea | | | 118 | mem_percent: 0, |
| 27902ea | | | 119 | net_rx_mb: 0, |
| 27902ea | | | 120 | net_tx_mb: 0, |
| 27902ea | | | 121 | uptime: formatUptime(uptimeMs), |
| 27902ea | | | 122 | }); |
| 27902ea | | | 123 | } |
| 27902ea | | | 124 | |
| 27902ea | | | 125 | return stats; |
| 27902ea | | | 126 | } catch { |
| 27902ea | | | 127 | return []; |
| 27902ea | | | 128 | } |
| 27902ea | | | 129 | } |
| 27902ea | | | 130 | |
| 27902ea | | | 131 | async function getSystemMetrics(): Promise<SystemMetrics | null> { |
| 27902ea | | | 132 | try { |
| 27902ea | | | 133 | // Use /proc on Linux hosts |
| 27902ea | | | 134 | const [memInfo, loadAvg, uptime, diskStat] = await Promise.allSettled([ |
| 27902ea | | | 135 | fetchFile("/proc/meminfo"), |
| 27902ea | | | 136 | fetchFile("/proc/loadavg"), |
| 27902ea | | | 137 | fetchFile("/proc/uptime"), |
| 27902ea | | | 138 | fetchFile("/proc/diskstats"), |
| 27902ea | | | 139 | ]); |
| 27902ea | | | 140 | |
| 27902ea | | | 141 | let cpuPercent = 0; |
| 27902ea | | | 142 | let memTotal = 0; |
| 27902ea | | | 143 | let memUsed = 0; |
| 27902ea | | | 144 | let memPercent = 0; |
| 27902ea | | | 145 | let loads: number[] = []; |
| 27902ea | | | 146 | let uptimeStr = ""; |
| 27902ea | | | 147 | |
| 27902ea | | | 148 | if (memInfo.status === "fulfilled" && memInfo.value) { |
| 27902ea | | | 149 | const lines = memInfo.value.split("\n"); |
| 27902ea | | | 150 | const get = (key: string) => { |
| 27902ea | | | 151 | const line = lines.find((l: string) => l.startsWith(key)); |
| 27902ea | | | 152 | return line ? parseInt(line.split(/\s+/)[1]) : 0; |
| 27902ea | | | 153 | }; |
| 27902ea | | | 154 | memTotal = Math.round(get("MemTotal:") / 1024); |
| 27902ea | | | 155 | const memFree = get("MemFree:"); |
| 27902ea | | | 156 | const buffers = get("Buffers:"); |
| 27902ea | | | 157 | const cached = get("Cached:"); |
| 27902ea | | | 158 | const available = get("MemAvailable:"); |
| 27902ea | | | 159 | memUsed = Math.round((get("MemTotal:") - available) / 1024); |
| 27902ea | | | 160 | memPercent = Math.round((memUsed / memTotal) * 100); |
| 27902ea | | | 161 | } |
| 27902ea | | | 162 | |
| 27902ea | | | 163 | if (loadAvg.status === "fulfilled" && loadAvg.value) { |
| 27902ea | | | 164 | const parts = loadAvg.value.trim().split(/\s+/); |
| 27902ea | | | 165 | loads = parts.slice(0, 3).map(Number); |
| 27902ea | | | 166 | // rough CPU% from 1-min load avg |
| 27902ea | | | 167 | const { cpus } = await import("os"); |
| 27902ea | | | 168 | const numCpus = cpus().length; |
| 27902ea | | | 169 | cpuPercent = Math.min(100, Math.round((loads[0] / numCpus) * 100)); |
| 27902ea | | | 170 | } |
| 27902ea | | | 171 | |
| 27902ea | | | 172 | if (uptime.status === "fulfilled" && uptime.value) { |
| 27902ea | | | 173 | const secs = parseFloat(uptime.value.split(" ")[0]); |
| 27902ea | | | 174 | uptimeStr = formatUptime(secs * 1000); |
| 27902ea | | | 175 | } |
| 27902ea | | | 176 | |
| 27902ea | | | 177 | // Disk usage via statfs-like approach (use os module) |
| 27902ea | | | 178 | let diskTotal = 0; |
| 27902ea | | | 179 | let diskUsed = 0; |
| 27902ea | | | 180 | let diskPercent = 0; |
| 27902ea | | | 181 | try { |
| 27902ea | | | 182 | const { execSync } = await import("child_process"); |
| 27902ea | | | 183 | const df = execSync("df -BG / 2>/dev/null || df -g / 2>/dev/null", { encoding: "utf-8" }); |
| 27902ea | | | 184 | const line = df.trim().split("\n")[1]; |
| 27902ea | | | 185 | if (line) { |
| 27902ea | | | 186 | const parts = line.split(/\s+/); |
| 27902ea | | | 187 | diskTotal = parseInt(parts[1]); |
| 27902ea | | | 188 | diskUsed = parseInt(parts[2]); |
| 27902ea | | | 189 | diskPercent = Math.round((diskUsed / diskTotal) * 100); |
| 27902ea | | | 190 | } |
| 27902ea | | | 191 | } catch {} |
| 27902ea | | | 192 | |
| 27902ea | | | 193 | return { |
| 27902ea | | | 194 | cpu_percent: cpuPercent, |
| 27902ea | | | 195 | mem_total_mb: memTotal, |
| 27902ea | | | 196 | mem_used_mb: memUsed, |
| 27902ea | | | 197 | mem_percent: memPercent, |
| 27902ea | | | 198 | disk_total_gb: diskTotal, |
| 27902ea | | | 199 | disk_used_gb: diskUsed, |
| 27902ea | | | 200 | disk_percent: diskPercent, |
| 27902ea | | | 201 | load_avg: loads, |
| 27902ea | | | 202 | uptime: uptimeStr, |
| 27902ea | | | 203 | }; |
| 27902ea | | | 204 | } catch { |
| 27902ea | | | 205 | return null; |
| 27902ea | | | 206 | } |
| 27902ea | | | 207 | } |
| 27902ea | | | 208 | |
| 27902ea | | | 209 | async function fetchFile(path: string): Promise<string | null> { |
| 27902ea | | | 210 | try { |
| 27902ea | | | 211 | const { readFileSync } = await import("fs"); |
| 27902ea | | | 212 | return readFileSync(path, "utf-8"); |
| 27902ea | | | 213 | } catch { |
| 27902ea | | | 214 | return null; |
| 27902ea | | | 215 | } |
| 27902ea | | | 216 | } |
| 27902ea | | | 217 | |
| 27902ea | | | 218 | function formatUptime(ms: number): string { |
| 27902ea | | | 219 | const secs = Math.floor(ms / 1000); |
| 27902ea | | | 220 | const days = Math.floor(secs / 86400); |
| 27902ea | | | 221 | const hours = Math.floor((secs % 86400) / 3600); |
| 27902ea | | | 222 | const mins = Math.floor((secs % 3600) / 60); |
| 27902ea | | | 223 | if (days > 0) return `${days}d ${hours}h`; |
| 27902ea | | | 224 | if (hours > 0) return `${hours}h ${mins}m`; |
| 27902ea | | | 225 | return `${mins}m`; |
| 27902ea | | | 226 | } |
| 27902ea | | | 227 | |
| 27902ea | | | 228 | export async function GET() { |
| 27902ea | | | 229 | const [checks, containers, system] = await Promise.all([ |
| 27902ea | | | 230 | Promise.all([ |
| 27902ea | | | 231 | checkService("Web", "http://grove-web:3000/"), |
| 27902ea | | | 232 | checkService("API", "http://grove-api:4000/api/health"), |
| 27902ea | | | 233 | checkService("Hub API", "http://hub-api:4000/api/auth/me"), |
| 27902ea | | | 234 | checkService("EdenAPI", "http://mononoke-slapi:8443/health_check"), |
| 27902ea | | | 235 | checkService("Git", "http://mononoke-git:8080/health_check"), |
| 27902ea | | | 236 | checkService("Bridge", "http://grove-bridge:8443/health_check"), |
| 27902ea | | | 237 | ]), |
| 27902ea | | | 238 | getDockerStats(), |
| 27902ea | | | 239 | getSystemMetrics(), |
| 27902ea | | | 240 | ]); |
| 27902ea | | | 241 | |
| 27902ea | | | 242 | const overall = checks.every((c) => c.status === "operational") |
| 27902ea | | | 243 | ? "operational" |
| 27902ea | | | 244 | : checks.some((c) => c.status === "down") |
| 27902ea | | | 245 | ? "major_outage" |
| 27902ea | | | 246 | : "degraded"; |
| 27902ea | | | 247 | |
| 27902ea | | | 248 | return NextResponse.json({ |
| 27902ea | | | 249 | status: overall, |
| 27902ea | | | 250 | services: checks, |
| 27902ea | | | 251 | containers, |
| 27902ea | | | 252 | system, |
| 27902ea | | | 253 | checked_at: new Date().toISOString(), |
| 27902ea | | | 254 | }); |
| 27902ea | | | 255 | } |