gowatch
分享一个很久之前学习go做的一个日常巡检工具,如果你也在学习go,作为入门的一个学习案例,还是挺不错的。
源码
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"math"
"os"
"os/signal"
"path/filepath"
"sort"
"strings"
"sync"
"syscall"
"time"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shirou/gopsutil/v3/disk"
"github.com/shirou/gopsutil/v3/host"
"github.com/shirou/gopsutil/v3/load"
"github.com/shirou/gopsutil/v3/mem"
gnet "github.com/shirou/gopsutil/v3/net"
"github.com/shirou/gopsutil/v3/process"
)
// 进程退出码,便于外部脚本区分正常结束、告警结束和运行失败。
const (
ExitOK = 0
ExitAlert = 10
ExitFail = 20
ExitLock = 30
)
// Config 对应配置文件中的整体运行参数。
type Config struct {
JobName string `json:"jobname"`
Interval int `json:"interval"`
Format string `json:"format"`
LogFile string `json:"log_file"`
LogLevel string `json:"log_level"`
LockFile string `json:"lock_file"`
Mountpoints []string `json:"mountpoints"`
NetIfaces []string `json:"net_ifaces"`
TopN int `json:"topn"`
IgnoreProcess []string `json:"ignore_process"`
Thresholds map[string]ThresholdRule `json:"thresholds"`
PromTextfile string `json:"prom_textfile"`
}
// ThresholdRule 是配置文件中的原始阈值定义。
type ThresholdRule struct {
GT *float64 `json:"gt"`
LT *float64 `json:"lt"`
Times int `json:"times"`
Mount string `json:"mount"`
GTPerCPU *float64 `json:"gt_per_cpu"`
}
// Threshold 是运行时使用的阈值对象,已经补齐默认值。
type Threshold struct {
Name string
GT *float64
LT *float64
Times int
Mount string
GTPerCPU *float64
}
// Alert 表示一次已经满足触发条件的告警结果。
type Alert struct {
Type string `json:"type"`
Mount string `json:"mount,omitempty"`
Value float64 `json:"value"`
Times int `json:"times"`
Reason string `json:"reason"`
Rule map[string]any `json:"rule"`
}
// CPUInfo 汇总 CPU 数量、总利用率和每核利用率。
type CPUInfo struct {
Count int `json:"count"`
Percent float64 `json:"percent"`
PerCPU []float64 `json:"percpu_percent,omitempty"`
}
// LoadInfo 表示系统负载平均值。
type LoadInfo struct {
Load1 float64 `json:"load1"`
Load5 float64 `json:"load5"`
Load15 float64 `json:"load15"`
}
// MemInfo 汇总内存和交换分区使用情况。
type MemInfo struct {
Total uint64 `json:"total"`
Available uint64 `json:"available"`
Used uint64 `json:"used"`
Percent float64 `json:"percent"`
SwapTotal uint64 `json:"swap_total"`
SwapUsed uint64 `json:"swap_used"`
SwapPercent float64 `json:"swap_percent"`
}
// DiskUsage 记录某个挂载点的空间使用情况。
type DiskUsage struct {
Total uint64 `json:"total,omitempty"`
Used uint64 `json:"used,omitempty"`
Free uint64 `json:"free,omitempty"`
Percent float64 `json:"percent,omitempty"`
Error string `json:"error,omitempty"`
}
// DiskIO 汇总全局磁盘 IO 计数器。
type DiskIO struct {
ReadBytes uint64 `json:"read_bytes"`
WriteBytes uint64 `json:"write_bytes"`
ReadCount uint64 `json:"read_count"`
WriteCount uint64 `json:"write_count"`
}
// DiskInfo 聚合挂载点空间和全局 IO 信息。
type DiskInfo struct {
Usage map[string]DiskUsage `json:"usage"`
IO DiskIO `json:"io"`
}
// NetCounter 保存网卡累计收发字节数。
type NetCounter struct {
BytesSent uint64 `json:"bytes_sent"`
BytesRecv uint64 `json:"bytes_recv"`
}
// NetRate 保存两次采样之间计算出的网卡瞬时速率。
type NetRate struct {
RxBps float64 `json:"rx_Bps"`
TxBps float64 `json:"tx_Bps"`
Dt float64 `json:"dt"`
}
// NetInfo 聚合网卡累计值和速率。
type NetInfo struct {
Counters map[string]NetCounter `json:"counters"`
Rate map[string]NetRate `json:"rate"`
}
// Metrics 是单次采样的完整结果。
type Metrics struct {
TS string `json:"ts"`
Host string `json:"host"`
CPU CPUInfo `json:"cpu"`
Load LoadInfo `json:"load"`
Mem MemInfo `json:"mem"`
Disk DiskInfo `json:"disk"`
Net NetInfo `json:"net"`
UptimeSec uint64 `json:"uptime_sec"`
BootTime string `json:"boot_time"`
}
// ProcessInfo 用于输出资源占用最高的进程明细。
type ProcessInfo struct {
PID int32 `json:"pid"`
Name string `json:"name"`
User string `json:"user"`
CPUPercent float64 `json:"cpu_percent"`
RSS uint64 `json:"rss"`
Cmdline string `json:"cmdline"`
}
// NetSnapshot 保存上一次网卡采样结果,用于计算速率。
type NetSnapshot struct {
CapturedAt time.Time
CountersByNIC map[string]NetCounter
}
// Collector 负责从 gopsutil 读取系统指标。
type Collector struct {
mountpoints []string
netIfaces []string
previousNetSnapshot *NetSnapshot
}
// AlertEngine 根据阈值配置判断本次采样是否触发告警。
type AlertEngine struct {
thresholds []Threshold
hitCounts map[string]int
}
// RotateWriter 在日志文件达到上限时执行轮转。
type RotateWriter struct {
mu sync.Mutex
path string
maxBytes int64
backupCount int
currentFile *os.File
}
// AppLogger 封装文本和 JSON 两种日志输出格式。
type AppLogger struct {
logger *log.Logger
format string
debug bool
}
// FileLock 保证同一 job 只有一个实例运行。
type FileLock struct {
path string
lockFile *os.File
}
func isoNow() string {
return time.Now().Format(time.RFC3339)
}
func maxInt(left, right int) int {
if left > right {
return left
}
return right
}
func readJSONFile(path string, target any) error {
fileContent, err := os.ReadFile(path)
if err != nil {
return err
}
return json.Unmarshal(fileContent, target)
}
func ensureDir(path string) error {
if path == "" || path == "." {
return nil
}
return os.MkdirAll(path, 0o755)
}
// atomicWriteText 先写入临时文件再替换目标文件,避免读到半截内容。
func atomicWriteText(path, content string) error {
dirPath := filepath.Dir(path)
if err := ensureDir(dirPath); err != nil {
return err
}
tempFile, err := os.CreateTemp(dirPath, ".tmp_*")
if err != nil {
return err
}
tempFilePath := tempFile.Name()
defer os.Remove(tempFilePath)
if _, err := tempFile.WriteString(content); err != nil {
tempFile.Close()
return err
}
if err := tempFile.Sync(); err != nil {
tempFile.Close()
return err
}
if err := tempFile.Close(); err != nil {
return err
}
return os.Rename(tempFilePath, path)
}
// newRotateWriter 创建带自动轮转能力的文件写入器。
func newRotateWriter(path string, maxBytes int64, backupCount int) (*RotateWriter, error) {
rotateWriter := &RotateWriter{path: path, maxBytes: maxBytes, backupCount: backupCount}
if err := rotateWriter.open(); err != nil {
return nil, err
}
return rotateWriter, nil
}
func (writer *RotateWriter) open() error {
if err := ensureDir(filepath.Dir(writer.path)); err != nil {
return err
}
logFile, err := os.OpenFile(writer.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return err
}
writer.currentFile = logFile
return nil
}
func (writer *RotateWriter) rotate() error {
if writer.currentFile != nil {
_ = writer.currentFile.Close()
writer.currentFile = nil
}
// 先倒序移动旧备份,避免覆盖还没有腾挪的位置。
for backupIndex := writer.backupCount - 1; backupIndex >= 1; backupIndex-- {
oldPath := fmt.Sprintf("%s.%d", writer.path, backupIndex)
newPath := fmt.Sprintf("%s.%d", writer.path, backupIndex+1)
if _, err := os.Stat(oldPath); err == nil {
_ = os.Rename(oldPath, newPath)
}
}
if _, err := os.Stat(writer.path); err == nil {
_ = os.Rename(writer.path, writer.path+".1")
}
return writer.open()
}
func (writer *RotateWriter) Write(data []byte) (int, error) {
writer.mu.Lock()
defer writer.mu.Unlock()
if writer.currentFile == nil {
if err := writer.open(); err != nil {
return 0, err
}
}
fileInfo, err := writer.currentFile.Stat()
if err == nil && fileInfo.Size()+int64(len(data)) > writer.maxBytes && writer.maxBytes > 0 {
if err := writer.rotate(); err != nil {
return 0, err
}
}
return writer.currentFile.Write(data)
}
// newLogger 根据配置组合标准输出和可选的轮转日志文件。
func newLogger(logFile, format string, debug bool) (*AppLogger, error) {
writers := []io.Writer{os.Stdout}
if logFile != "" {
rotateWriter, err := newRotateWriter(logFile, 10*1024*1024, 7)
if err != nil {
return nil, err
}
writers = append(writers, rotateWriter)
}
baseLogger := log.New(io.MultiWriter(writers...), "", 0)
return &AppLogger{logger: baseLogger, format: format, debug: debug}, nil
}
func (appLogger *AppLogger) log(level, msg string, fields map[string]any) {
if level == "DEBUG" && !appLogger.debug {
return
}
if fields == nil {
fields = map[string]any{}
}
if appLogger.format == "json" {
logFields := map[string]any{
"ts": isoNow(),
"level": level,
"msg": msg,
}
for fieldName, fieldValue := range fields {
logFields[fieldName] = fieldValue
}
jsonPayload, _ := json.Marshal(logFields)
appLogger.logger.Println(string(jsonPayload))
return
}
var fieldParts []string
for fieldName, fieldValue := range fields {
fieldParts = append(fieldParts, fmt.Sprintf("%s=%v", fieldName, fieldValue))
}
sort.Strings(fieldParts)
line := fmt.Sprintf("%s %-5s %s", isoNow(), level, msg)
if len(fieldParts) > 0 {
line += " " + strings.Join(fieldParts, " ")
}
appLogger.logger.Println(line)
}
func (appLogger *AppLogger) Info(msg string, fields map[string]any) {
appLogger.log("INFO", msg, fields)
}
func (appLogger *AppLogger) Warn(msg string, fields map[string]any) {
appLogger.log("WARN", msg, fields)
}
func (appLogger *AppLogger) Error(msg string, fields map[string]any) {
appLogger.log("ERROR", msg, fields)
}
func (appLogger *AppLogger) Debug(msg string, fields map[string]any) {
appLogger.log("DEBUG", msg, fields)
}
// Acquire 使用 flock 获取非阻塞独占锁,避免同一任务重复运行。
func (lock *FileLock) Acquire() error {
if err := ensureDir(filepath.Dir(lock.path)); err != nil {
return err
}
lockHandle, err := os.OpenFile(lock.path, os.O_CREATE|os.O_RDWR, 0o644)
if err != nil {
return err
}
if err := syscall.Flock(int(lockHandle.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil {
_ = lockHandle.Close()
return err
}
if err := lockHandle.Truncate(0); err == nil {
_, _ = lockHandle.WriteString(fmt.Sprintf("%d\n", os.Getpid()))
}
lock.lockFile = lockHandle
return nil
}
func (lock *FileLock) Release() {
if lock.lockFile == nil {
return
}
_ = syscall.Flock(int(lock.lockFile.Fd()), syscall.LOCK_UN)
_ = lock.lockFile.Close()
lock.lockFile = nil
}
// parseThresholds 把配置中的阈值规则整理成运行时结构,并补齐默认触发次数。
func parseThresholds(config Config) []Threshold {
var parsedThresholds []Threshold
for name, rule := range config.Thresholds {
requiredHits := rule.Times
if requiredHits <= 0 {
requiredHits = 1
}
parsedThresholds = append(parsedThresholds, Threshold{
Name: name,
GT: rule.GT,
LT: rule.LT,
Times: requiredHits,
Mount: rule.Mount,
GTPerCPU: rule.GTPerCPU,
})
}
return parsedThresholds
}
// Check 判断单个指标值是否越过阈值,并返回告警原因文本。
func (threshold Threshold) Check(value float64, cpuCount int) (bool, string) {
if threshold.Name == "load1" && threshold.GTPerCPU != nil {
loadLimit := *threshold.GTPerCPU * float64(maxInt(cpuCount, 1))
return value > loadLimit, fmt.Sprintf("load1 %.2f > %.2f (gt_per_cpu=%.2f)", value, loadLimit, *threshold.GTPerCPU)
}
if threshold.GT != nil && value > *threshold.GT {
return true, fmt.Sprintf("%s %.2f > %.2f", threshold.Name, value, *threshold.GT)
}
if threshold.LT != nil && value < *threshold.LT {
return true, fmt.Sprintf("%s %.2f < %.2f", threshold.Name, value, *threshold.LT)
}
return false, ""
}
func newAlertEngine(thresholds []Threshold) *AlertEngine {
return &AlertEngine{thresholds: thresholds, hitCounts: map[string]int{}}
}
// Evaluate 根据本次采样结果更新连续命中次数,并返回触发的告警列表。
func (engine *AlertEngine) Evaluate(metrics Metrics) (bool, []Alert) {
var alerts []Alert
hasAlert := false
getMetricValue := func(threshold Threshold) (float64, bool) {
switch threshold.Name {
case "cpu_percent":
return metrics.CPU.Percent, true
case "mem_percent":
return metrics.Mem.Percent, true
case "load1":
return metrics.Load.Load1, true
case "disk_percent":
mountpoint := threshold.Mount
if mountpoint == "" {
mountpoint = "/"
}
diskUsage, ok := metrics.Disk.Usage[mountpoint]
if !ok || diskUsage.Error != "" {
return 0, false
}
return diskUsage.Percent, true
default:
return 0, false
}
}
for _, threshold := range engine.thresholds {
currentValue, ok := getMetricValue(threshold)
alertKey := threshold.Name
if threshold.Mount != "" {
alertKey += ":" + threshold.Mount
}
if !ok {
engine.hitCounts[alertKey] = 0
continue
}
hit, reason := threshold.Check(currentValue, metrics.CPU.Count)
if hit {
engine.hitCounts[alertKey]++
} else {
engine.hitCounts[alertKey] = 0
}
if hit && engine.hitCounts[alertKey] >= maxInt(threshold.Times, 1) {
hasAlert = true
alerts = append(alerts, Alert{
Type: threshold.Name,
Mount: threshold.Mount,
Value: currentValue,
Times: engine.hitCounts[alertKey],
Reason: reason,
Rule: map[string]any{
"gt": threshold.GT,
"lt": threshold.LT,
"times": threshold.Times,
"gt_per_cpu": threshold.GTPerCPU,
},
})
}
}
return hasAlert, alerts
}
// warmUpCPUAndProcesses 先做一次预热采样,避免第一次百分比统计明显失真。
func warmUpCPUAndProcesses() {
_, _ = cpu.Percent(0, false)
_, _ = cpu.Percent(0, true)
processList, _ := process.Processes()
for _, proc := range processList {
_, _ = proc.Percent(0)
}
}
func newCollector(mountpoints, netIfaces []string) *Collector {
if len(mountpoints) == 0 {
mountpoints = []string{"/"}
}
return &Collector{mountpoints: mountpoints, netIfaces: netIfaces}
}
// Collect 执行一次完整采样,按 CPU、内存、磁盘、网络等维度组装结果。
func (collector *Collector) Collect() (Metrics, error) {
var metrics Metrics
metrics.TS = isoNow()
hostInfo, err := host.Info()
if err != nil {
return metrics, err
}
metrics.Host = hostInfo.Hostname
metrics.UptimeSec = hostInfo.Uptime
metrics.BootTime = time.Unix(int64(hostInfo.BootTime), 0).Format(time.RFC3339)
totalCPUPercentages, err := cpu.Percent(0, false)
if err != nil {
return metrics, err
}
perCPUPercentages, err := cpu.Percent(0, true)
if err != nil {
return metrics, err
}
cpuCount, _ := cpu.Counts(true)
totalCPUPercent := 0.0
if len(totalCPUPercentages) > 0 {
totalCPUPercent = totalCPUPercentages[0]
}
metrics.CPU = CPUInfo{Count: maxInt(cpuCount, 1), Percent: totalCPUPercent, PerCPU: perCPUPercentages}
loadAverage, err := load.Avg()
if err == nil {
metrics.Load = LoadInfo{Load1: loadAverage.Load1, Load5: loadAverage.Load5, Load15: loadAverage.Load15}
}
virtualMemoryStat, err := mem.VirtualMemory()
if err != nil {
return metrics, err
}
swapMemoryStat, _ := mem.SwapMemory()
metrics.Mem = MemInfo{
Total: virtualMemoryStat.Total,
Available: virtualMemoryStat.Available,
Used: virtualMemoryStat.Used,
Percent: virtualMemoryStat.UsedPercent,
SwapTotal: swapMemoryStat.Total,
SwapUsed: swapMemoryStat.Used,
SwapPercent: swapMemoryStat.UsedPercent,
}
diskUsageByMount := map[string]DiskUsage{}
for _, mountpoint := range collector.mountpoints {
diskUsageStat, err := disk.Usage(mountpoint)
if err != nil {
diskUsageByMount[mountpoint] = DiskUsage{Error: err.Error()}
continue
}
diskUsageByMount[mountpoint] = DiskUsage{
Total: diskUsageStat.Total,
Used: diskUsageStat.Used,
Free: diskUsageStat.Free,
Percent: diskUsageStat.UsedPercent,
}
}
diskIOCounters, _ := disk.IOCounters()
diskIO := DiskIO{}
for _, diskCounter := range diskIOCounters {
diskIO.ReadBytes += diskCounter.ReadBytes
diskIO.WriteBytes += diskCounter.WriteBytes
diskIO.ReadCount += diskCounter.ReadCount
diskIO.WriteCount += diskCounter.WriteCount
}
metrics.Disk = DiskInfo{Usage: diskUsageByMount, IO: diskIO}
nicCounters, err := gnet.IOCounters(true)
if err != nil {
return metrics, err
}
selectedInterfaces := map[string]bool{}
if len(collector.netIfaces) > 0 {
for _, iface := range collector.netIfaces {
selectedInterfaces[iface] = true
}
}
currentNetCounters := map[string]NetCounter{}
for _, nicCounter := range nicCounters {
if len(selectedInterfaces) > 0 && !selectedInterfaces[nicCounter.Name] {
continue
}
currentNetCounters[nicCounter.Name] = NetCounter{
BytesSent: nicCounter.BytesSent,
BytesRecv: nicCounter.BytesRecv,
}
}
netRates := map[string]NetRate{}
now := time.Now()
if collector.previousNetSnapshot != nil {
elapsedSeconds := now.Sub(collector.previousNetSnapshot.CapturedAt).Seconds()
if elapsedSeconds < 1e-6 {
elapsedSeconds = 1e-6
}
// 网卡速率由两次累计值相减后除以采样间隔得到。
for iface, currentCounter := range currentNetCounters {
previousCounter, ok := collector.previousNetSnapshot.CountersByNIC[iface]
if !ok {
continue
}
rxBps := float64(currentCounter.BytesRecv-previousCounter.BytesRecv) / elapsedSeconds
txBps := float64(currentCounter.BytesSent-previousCounter.BytesSent) / elapsedSeconds
netRates[iface] = NetRate{RxBps: rxBps, TxBps: txBps, Dt: elapsedSeconds}
}
}
collector.previousNetSnapshot = &NetSnapshot{CapturedAt: now, CountersByNIC: currentNetCounters}
metrics.Net = NetInfo{Counters: currentNetCounters, Rate: netRates}
return metrics, nil
}
// topNProcesses 返回 CPU 和内存占用最高的前 N 个进程。
func topNProcesses(topN int, ignoreNames []string) map[string][]ProcessInfo {
ignoredProcessNames := map[string]bool{}
for _, processName := range ignoreNames {
ignoredProcessNames[processName] = true
}
processList, err := process.Processes()
if err != nil {
return map[string][]ProcessInfo{"cpu": {}, "mem": {}}
}
processInfos := make([]ProcessInfo, 0, len(processList))
for _, proc := range processList {
processName, err := proc.Name()
if err != nil || ignoredProcessNames[processName] {
continue
}
username, _ := proc.Username()
commandLine, _ := proc.Cmdline()
memoryInfo, err := proc.MemoryInfo()
if err != nil {
continue
}
cpuPercent, _ := proc.Percent(0)
processInfos = append(processInfos, ProcessInfo{
PID: proc.Pid,
Name: processName,
User: username,
CPUPercent: cpuPercent,
RSS: memoryInfo.RSS,
Cmdline: truncate(commandLine, 500),
})
}
processesByCPU := append([]ProcessInfo(nil), processInfos...)
sort.Slice(processesByCPU, func(i, j int) bool { return processesByCPU[i].CPUPercent > processesByCPU[j].CPUPercent })
processesByMemory := append([]ProcessInfo(nil), processInfos...)
sort.Slice(processesByMemory, func(i, j int) bool { return processesByMemory[i].RSS > processesByMemory[j].RSS })
if topN < 0 {
topN = 0
}
if len(processesByCPU) > topN {
processesByCPU = processesByCPU[:topN]
}
if len(processesByMemory) > topN {
processesByMemory = processesByMemory[:topN]
}
return map[string][]ProcessInfo{"cpu": processesByCPU, "mem": processesByMemory}
}
func truncate(value string, maxLength int) string {
if len(value) <= maxLength {
return value
}
return value[:maxLength]
}
// promEscape 对 label 值做最小转义,避免生成非法的 Prometheus 文本格式。
func promEscape(value string) string {
value = strings.ReplaceAll(value, `\`, `\\`)
value = strings.ReplaceAll(value, "\n", `\n`)
value = strings.ReplaceAll(value, `"`, `\"`)
return value
}
// buildPromText 把采样结果和告警状态导出为 Prometheus textfile 格式。
func buildPromText(metrics Metrics, alerts []Alert) string {
var promBuilder strings.Builder
fmt.Fprintf(&promBuilder, "# HELP gowatch_cpu_percent CPU usage percent\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_cpu_percent gauge\n")
fmt.Fprintf(&promBuilder, "gowatch_cpu_percent %.3f\n", metrics.CPU.Percent)
fmt.Fprintf(&promBuilder, "# HELP gowatch_mem_percent Memory usage percent\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_mem_percent gauge\n")
fmt.Fprintf(&promBuilder, "gowatch_mem_percent %.3f\n", metrics.Mem.Percent)
fmt.Fprintf(&promBuilder, "# HELP gowatch_load1 System load1\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_load1 gauge\n")
fmt.Fprintf(&promBuilder, "gowatch_load1 %.3f\n", metrics.Load.Load1)
fmt.Fprintf(&promBuilder, "# HELP gowatch_disk_percent Disk usage percent by mount\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_disk_percent gauge\n")
mountpoints := make([]string, 0, len(metrics.Disk.Usage))
for mountpoint := range metrics.Disk.Usage {
mountpoints = append(mountpoints, mountpoint)
}
sort.Strings(mountpoints)
for _, mountpoint := range mountpoints {
diskUsage := metrics.Disk.Usage[mountpoint]
if diskUsage.Error != "" {
continue
}
fmt.Fprintf(&promBuilder, "gowatch_disk_percent{mount=\"%s\"} %.3f\n", promEscape(mountpoint), diskUsage.Percent)
}
fmt.Fprintf(&promBuilder, "# HELP gowatch_net_rx_bytes_per_sec Network RX rate bytes/s\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_net_rx_bytes_per_sec gauge\n")
fmt.Fprintf(&promBuilder, "# HELP gowatch_net_tx_bytes_per_sec Network TX rate bytes/s\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_net_tx_bytes_per_sec gauge\n")
interfaceNames := make([]string, 0, len(metrics.Net.Rate))
for interfaceName := range metrics.Net.Rate {
interfaceNames = append(interfaceNames, interfaceName)
}
sort.Strings(interfaceNames)
for _, interfaceName := range interfaceNames {
netRate := metrics.Net.Rate[interfaceName]
fmt.Fprintf(&promBuilder, "gowatch_net_rx_bytes_per_sec{iface=\"%s\"} %.3f\n", promEscape(interfaceName), netRate.RxBps)
fmt.Fprintf(&promBuilder, "gowatch_net_tx_bytes_per_sec{iface=\"%s\"} %.3f\n", promEscape(interfaceName), netRate.TxBps)
}
activeAlerts := map[string]bool{}
for _, alert := range alerts {
activeAlerts[alert.Type] = true
}
knownAlertTypes := []string{"cpu_percent", "mem_percent", "disk_percent", "load1"}
fmt.Fprintf(&promBuilder, "# HELP gowatch_alert_active Alert active (0/1) by type\n")
fmt.Fprintf(&promBuilder, "# TYPE gowatch_alert_active gauge\n")
for _, alertType := range knownAlertTypes {
activeValue := 0
if activeAlerts[alertType] {
activeValue = 1
}
fmt.Fprintf(&promBuilder, "gowatch_alert_active{type=\"%s\"} %d\n", promEscape(alertType), activeValue)
}
return promBuilder.String()
}
// sampleSummary 生成简洁的人类可读采样摘要。
func sampleSummary(jobName string, metrics Metrics, hasAlert bool) string {
diskRoot := "n/a"
if rootDiskUsage, ok := metrics.Disk.Usage["/"]; ok && rootDiskUsage.Error == "" {
diskRoot = fmt.Sprintf("%.1f", rootDiskUsage.Percent)
}
return fmt.Sprintf("job=%s cpu=%.1f%% mem=%.1f%% load1=%.2f disk/=%s alert=%v",
jobName, metrics.CPU.Percent, metrics.Mem.Percent, metrics.Load.Load1, diskRoot, hasAlert)
}
// configValueInt 按命令行、配置文件、默认值的顺序选择最终数值参数。
func configValueInt(flagValue, configValue, defaultValue int) int {
if flagValue >= 0 {
return flagValue
}
if configValue > 0 {
return configValue
}
return defaultValue
}
// configValueString 按命令行、配置文件、默认值的顺序选择最终字符串参数。
func configValueString(flagValue, configValue, defaultValue string) string {
if flagValue != "" {
return flagValue
}
if configValue != "" {
return configValue
}
return defaultValue
}
// run 是主执行循环:采样、判定告警、记录日志,并处理退出条件。
func run(ctx context.Context, config Config, once bool, intervalSec, durationSec int, format, promTextfilePath string, debug bool) int {
jobName := config.JobName
if jobName == "" {
jobName = "gowatch"
}
intervalSec = maxInt(intervalSec, 1)
if config.TopN <= 0 {
config.TopN = 5
}
logger, err := newLogger(config.LogFile, format, debug || strings.EqualFold(config.LogLevel, "DEBUG"))
if err != nil {
fmt.Fprintf(os.Stderr, "init logger failed: %v\n", err)
return ExitFail
}
lockPath := config.LockFile
if lockPath == "" {
lockPath = fmt.Sprintf("/tmp/gowatch_%s.lock", jobName)
}
fileLock := &FileLock{path: lockPath}
if err := fileLock.Acquire(); err != nil {
logger.Warn("lock not acquired, another instance may be running", map[string]any{"job": jobName, "lock": lockPath, "err": err.Error()})
return ExitLock
}
defer fileLock.Release()
warmUpCPUAndProcesses()
collector := newCollector(config.Mountpoints, config.NetIfaces)
engine := newAlertEngine(parseThresholds(config))
startedAt := time.Now()
anyAlert := false
for {
loopStartedAt := time.Now()
metrics, err := collector.Collect()
if err != nil {
logger.Error("collect failed", map[string]any{"job": jobName, "err": err.Error()})
return ExitFail
}
hasAlert, alerts := engine.Evaluate(metrics)
anyAlert = anyAlert || hasAlert
if format == "json" {
logger.Info("sample", map[string]any{"job": jobName, "metrics": metrics, "alert": hasAlert, "alerts": alerts})
} else {
logger.Info(sampleSummary(jobName, metrics, hasAlert), map[string]any{"job": jobName})
}
if hasAlert {
topProcesses := topNProcesses(config.TopN, config.IgnoreProcess)
if format == "json" {
logger.Warn("alert_detail", map[string]any{"job": jobName, "alerts": alerts, "topn": topProcesses})
} else {
logger.Warn("ALERT", map[string]any{"job": jobName, "alerts": alerts})
logger.Warn("TopCPU", map[string]any{"job": jobName, "procs": topProcesses["cpu"]})
logger.Warn("TopMEM", map[string]any{"job": jobName, "procs": topProcesses["mem"]})
}
}
if promTextfilePath != "" {
if err := atomicWriteText(promTextfilePath, buildPromText(metrics, alerts)); err != nil {
logger.Error("prom textfile write failed", map[string]any{"job": jobName, "err": err.Error(), "path": promTextfilePath})
}
}
if once {
if hasAlert {
return ExitAlert
}
return ExitOK
}
if durationSec > 0 && time.Since(startedAt) >= time.Duration(durationSec)*time.Second {
logger.Info("duration reached, exiting", map[string]any{"job": jobName, "duration": durationSec})
break
}
collectDuration := time.Since(loopStartedAt)
if collectDuration > time.Duration(intervalSec)*time.Second {
logger.Warn("collector_overrun", map[string]any{
"job": jobName,
"spent_sec": math.Round(collectDuration.Seconds()*1000) / 1000,
"interval_sec": intervalSec,
})
} else {
// 未超时则等待到下一个采样周期,同时继续监听退出信号。
select {
case <-ctx.Done():
logger.Info("received stop signal, exiting", map[string]any{"job": jobName})
if anyAlert {
return ExitAlert
}
return ExitOK
case <-time.After(time.Duration(intervalSec)*time.Second - collectDuration):
}
}
select {
case <-ctx.Done():
logger.Info("received stop signal, exiting", map[string]any{"job": jobName})
if anyAlert {
return ExitAlert
}
return ExitOK
default:
}
}
if anyAlert {
return ExitAlert
}
return ExitOK
}
// main 负责解析参数、加载配置,并接入系统信号控制主循环退出。
func main() {
var (
configPath = flag.String("config", "", "config.json path")
once = flag.Bool("once", false, "collect once and exit")
interval = flag.Int("interval", -1, "override interval seconds")
duration = flag.Int("duration", -1, "run duration seconds")
format = flag.String("format", "", "log format: json|text")
promTextfile = flag.String("prom-textfile", "", "prometheus textfile output path")
debug = flag.Bool("debug", false, "enable debug logging")
)
flag.Parse()
if *configPath == "" {
fmt.Fprintln(os.Stderr, "--config is required")
os.Exit(ExitFail)
}
var config Config
if err := readJSONFile(*configPath, &config); err != nil {
fmt.Fprintf(os.Stderr, "read config failed: %v\n", err)
os.Exit(ExitFail)
}
finalFormat := configValueString(*format, config.Format, "json")
finalPromPath := configValueString(*promTextfile, config.PromTextfile, "")
finalInterval := configValueInt(*interval, config.Interval, 5)
finalDuration := *duration
if finalDuration < 0 {
finalDuration = 0
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
signalCh := make(chan os.Signal, 1)
signalNotify(signalCh)
go func() {
<-signalCh
cancel()
}()
exitCode := run(ctx, config, *once, finalInterval, finalDuration, finalFormat, finalPromPath, *debug)
os.Exit(exitCode)
}
// signalNotify 统一注册常见的终止信号。
func signalNotify(signalCh chan<- os.Signal) {
signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM)
}1. 这个工具能做什么
周期性采集主机运行状态,包括 CPU、Load、内存、Swap、磁盘、网络、开机时间和运行时长。
支持一次性采样,也支持按固定间隔持续运行。
支持配置运行时长,到达指定时间后自动退出。
支持按挂载点采集磁盘使用率,默认至少会关注根分区
/。支持按网卡白名单采集网络流量,并计算每秒收发速率。
支持基于阈值做告警判断,当前支持
cpu_percent、mem_percent、disk_percent、load1。支持连续命中次数控制,避免单次抖动直接触发告警。
支持按 CPU 核数计算
load1阈值,适合多核机器。告警触发时会额外输出 CPU 和内存占用最高的进程列表,方便定位问题。
支持忽略指定进程名,避免监控结果被无关进程干扰。
支持
json和text两种日志格式。支持同时输出到标准输出和日志文件。
如果配置了日志文件,内置日志轮转,单文件达到 10 MB 后自动切分,默认保留 7 份备份。
支持导出 Prometheus textfile,可直接配合 Node Exporter 的 textfile collector 使用。
通过文件锁保证同一个 job 只运行一个实例,避免重复采样。
支持响应
SIGINT和SIGTERM,适合被 systemd、crontab 或脚本调度。
2. 适合的使用场景
给一台机器做轻量的本地巡检
被 crontab 或 systemd 定时拉起
作为批处理任务前后的健康检查
把系统关键指标以 JSON 打到日志平台
把告警状态和资源指标暴露给 Prometheus
3. 采集内容
CPU: 总使用率、每核使用率、CPU 核数
Load:
load1、load5、load15内存: 总量、可用量、已用量、使用率
Swap: 总量、已用量、使用率
磁盘: 每个挂载点的总量、已用量、剩余量、使用率
磁盘 IO: 全局读写字节数、读写次数
网络: 每块网卡的累计收发字节数、每秒收发速率
主机信息: 主机名、开机时间、运行时长、采样时间
4. 告警能力
支持的阈值类型如下:
cpu_percent: CPU 总使用率mem_percent: 内存使用率disk_percent: 指定挂载点的磁盘使用率load1: 1 分钟平均负载
每条阈值规则支持这些能力:
gt: 大于该值时触发lt: 小于该值时触发times: 连续命中多少次后才真正告警mount: 仅用于disk_percent,指定要检查的挂载点gt_per_cpu: 仅用于load1,按CPU 核数 x 阈值计算上限
告警触发后:
单次采样会返回退出码
10持续运行模式下会记录告警详情
会输出 Top N CPU 进程和 Top N 内存进程
5. 输出形式
5.1 文本日志
适合直接在终端查看,输出类似:
2026-03-21T12:00:00+08:00 INFO job=gowatch cpu=13.7% mem=61.2% load1=1.25 disk/=72.4 alert=false5.2 JSON 日志
适合被日志系统或脚本消费,采样结果和告警详情都会以 JSON 输出。
5.3 Prometheus textfile
如果指定 --prom-textfile 或配置项 prom_textfile,会输出这些指标:
gowatch_cpu_percentgowatch_mem_percentgowatch_load1gowatch_disk_percentgowatch_net_rx_bytes_per_secgowatch_net_tx_bytes_per_secgowatch_alert_active
6. 命令行参数
7. 配置项说明
8. 退出码
9. 快速开始
9.1 构建
go build -o gowatch .config.json 示例配置
{
"jobname": "gowatch",
"interval": 5,
"format": "json",
"log_file": "/tmp/gowatch/gowatch.log",
"log_level": "INFO",
"lock_file": "/tmp/gowatch/gowatch.lock",
"mountpoints": ["/"],
"net_ifaces": ["eth0"],
"topn": 5,
"ignore_process": ["systemd", "kthreadd"],
"thresholds": {
"cpu_percent": { "gt": 85, "times": 3 },
"mem_percent": { "gt": 80, "times": 3 },
"disk_percent": { "gt": 80, "times": 1, "mount": "/" },
"load1": { "gt_per_cpu": 1.5, "times": 2 }
},
"prom_textfile": "/tmp/gowatch/gowatch.prom"
}9.2 运行
9.2.1 持续监控
./gowatch --config ./config.jsonroot@hosthatchhk:~/gowatch# ./gowatch --config ./config.json
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:55:27+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":50,"percpu_percent":[80.00000074505806,0,33.33333022892468]},"load":{"load1":0.14,"load5":0.14,"load15":0.19},"mem":{"total":6225928192,"available":1836888064,"used":4060340224,"percent":65.21662471496748,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39364460544,"free":16377626624,"percent":70.61892107728262}},"io":{"read_bytes":152716232704,"write_bytes":9487571705856,"read_count":3084159,"write_count":935100404}},"net":{"counters":{"eth0":{"bytes_sent":273623595999,"bytes_recv":546038534609}},"rate":{}},"uptime_sec":8563532,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:55:27+08:00"}
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:55:32+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":5.827193577096099,"percpu_percent":[6.43863177954638,4.780876480349941,6.477732835501928]},"load":{"load1":0.13,"load5":0.14,"load15":0.19},"mem":{"total":6225928192,"available":1836658688,"used":4060557312,"percent":65.2201115524848,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39364501504,"free":16377585664,"percent":70.61899455856414}},"io":{"read_bytes":152716232704,"write_bytes":9487571894272,"read_count":3084159,"write_count":935100418}},"net":{"counters":{"eth0":{"bytes_sent":273623758861,"bytes_recv":546038714259}},"rate":{"eth0":{"rx_Bps":35938.82178039044,"tx_Bps":32580.397399376274,"dt":4.998772667}}},"uptime_sec":8563537,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:55:32+08:00"}
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:55:37+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":7.262945515373922,"percpu_percent":[7.272727242966,7.085020236121907,6.666666650360682]},"load":{"load1":0.12,"load5":0.14,"load15":0.18},"mem":{"total":6225928192,"available":1828683776,"used":4068552704,"percent":65.34853243614153,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39364603904,"free":16377483264,"percent":70.61917826176796}},"io":{"read_bytes":152716232704,"write_bytes":9487572082688,"read_count":3084159,"write_count":935100438}},"net":{"counters":{"eth0":{"bytes_sent":273623897867,"bytes_recv":546039385846}},"rate":{"eth0":{"rx_Bps":134264.89941679427,"tx_Bps":27790.333357153886,"dt":5.001955112}}},"uptime_sec":8563542,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:55:37+08:00"}
^C{"job":"gowatch","level":"INFO","msg":"received stop signal, exiting","ts":"2026-03-21T23:55:38+08:00"}
root@hosthatchhk:~/gowatch#9.2.2 只采样一次
./gowatch --config ./config.json --onceroot@hosthatchhk:~/gowatch# ./gowatch --config ./config.json --once
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:54:56+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":50.000001552204274,"percpu_percent":[80,40,0]},"load":{"load1":0.02,"load5":0.12,"load15":0.18},"mem":{"total":6225928192,"available":1822437376,"used":4074803200,"percent":65.44892704088548,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39363870720,"free":16378216448,"percent":70.61786294682865}},"io":{"read_bytes":152716232704,"write_bytes":9487528665088,"read_count":3084159,"write_count":935096172}},"net":{"counters":{"eth0":{"bytes_sent":273622580613,"bytes_recv":546036263097}},"rate":{}},"uptime_sec":8563501,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:54:57+08:00"}
root@hosthatchhk:~/gowatch#9.2.3 覆盖采样间隔并输出为文本日志
./gowatch --config ./config.json --interval 10 --format textroot@hosthatchhk:~/gowatch# ./gowatch --config ./config.json --interval 10 --format text
2026-03-21T23:53:58+08:00 INFO job=gowatch cpu=50.0% mem=65.2% load1=0.07 disk/=70.6 alert=false job=gowatch
2026-03-21T23:54:08+08:00 INFO job=gowatch cpu=6.4% mem=65.3% load1=0.06 disk/=70.6 alert=false job=gowatch
2026-03-21T23:54:18+08:00 INFO job=gowatch cpu=6.0% mem=65.4% load1=0.05 disk/=70.6 alert=false job=gowatch
2026-03-21T23:54:28+08:00 INFO job=gowatch cpu=11.4% mem=65.5% load1=0.04 disk/=70.6 alert=false job=gowatch
^C2026-03-21T23:54:29+08:00 INFO received stop signal, exiting job=gowatch
root@hosthatchhk:~/gowatch#9.2.4 运行 15 秒后退出
./gowatch --config ./config.json --format text --duration 15root@hosthatchhk:~/gowatch# ./gowatch --config ./config.json --format text --duration 15
2026-03-21T23:57:18+08:00 INFO job=gowatch cpu=45.5% mem=65.0% load1=0.04 disk/=70.6 alert=false job=gowatch
2026-03-21T23:57:23+08:00 INFO job=gowatch cpu=18.3% mem=65.1% load1=0.04 disk/=70.6 alert=false job=gowatch
2026-03-21T23:57:28+08:00 INFO job=gowatch cpu=5.9% mem=65.3% load1=0.03 disk/=70.6 alert=false job=gowatch
2026-03-21T23:57:33+08:00 INFO job=gowatch cpu=4.9% mem=65.3% load1=0.03 disk/=70.6 alert=false job=gowatch
2026-03-21T23:57:33+08:00 INFO duration reached, exiting duration=15 job=gowatch
root@hosthatchhk:~/gowatch#9.2.5 导出 Prometheus textfile
./gowatch --config ./config.json --prom-textfile /var/lib/node_exporter/textfile/gowatch.promroot@hosthatchhk:~/gowatch# ./gowatch --config ./config.json --prom-textfile /var/lib/node_exporter/textfile/gowatch.prom
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:59:36+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":36.363637903012474,"percpu_percent":[0,0.0000023283063801601846,100]},"load":{"load1":0.27,"load5":0.14,"load15":0.17},"mem":{"total":6225928192,"available":1828020224,"used":4069199872,"percent":65.35892715930636,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39369326592,"free":16372760576,"percent":70.62765065352782}},"io":{"read_bytes":152716445696,"write_bytes":9487915646976,"read_count":3084169,"write_count":935135058}},"net":{"counters":{"eth0":{"bytes_sent":273631942144,"bytes_recv":546057293228}},"rate":{}},"uptime_sec":8563781,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:59:36+08:00"}
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:59:41+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":8.19341841829658,"percpu_percent":[7.4898785615917856,7.085020271155843,9.456740420938434]},"load":{"load1":0.25,"load5":0.14,"load15":0.17},"mem":{"total":6225928192,"available":1822982144,"used":4074246144,"percent":65.43997968423726,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39369478144,"free":16372609024,"percent":70.62792253426946}},"io":{"read_bytes":152716445696,"write_bytes":9487916007424,"read_count":3084169,"write_count":935135086}},"net":{"counters":{"eth0":{"bytes_sent":273632105941,"bytes_recv":546057754952}},"rate":{"eth0":{"rx_Bps":92323.40111590724,"tx_Bps":32751.808726820043,"dt":5.001158909}}},"uptime_sec":8563786,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:59:41+08:00"}
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:59:46+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":7.4747474715515265,"percpu_percent":[8.216432838755008,7.04225349189674,7.723577255538492]},"load":{"load1":0.23,"load5":0.13,"load15":0.17},"mem":{"total":6225928192,"available":1823047680,"used":4074180608,"percent":65.43892705404335,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39369527296,"free":16372559872,"percent":70.62801071180729}},"io":{"read_bytes":152716445696,"write_bytes":9487953575936,"read_count":3084169,"write_count":935138866}},"net":{"counters":{"eth0":{"bytes_sent":273632350259,"bytes_recv":546057999243}},"rate":{"eth0":{"rx_Bps":48858.644203249634,"tx_Bps":48864.04425234472,"dt":4.999954542}}},"uptime_sec":8563791,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:59:46+08:00"}
{"alert":false,"alerts":null,"job":"gowatch","level":"INFO","metrics":{"ts":"2026-03-21T23:59:51+08:00","host":"hosthatchhk","cpu":{"count":3,"percent":6.908115366745801,"percpu_percent":[6.2248996036321795,8.016032071234704,6.464646452825576]},"load":{"load1":0.21,"load5":0.13,"load15":0.17},"mem":{"total":6225928192,"available":1820225536,"used":4077002752,"percent":65.4842559417685,"swap_total":0,"swap_used":0,"swap_percent":0},"disk":{"usage":{"/":{"total":58760200192,"used":39369633792,"free":16372453376,"percent":70.62820176313926}},"io":{"read_bytes":152716445696,"write_bytes":9487954894848,"read_count":3084169,"write_count":935139038}},"net":{"counters":{"eth0":{"bytes_sent":273632486502,"bytes_recv":546058457714}},"rate":{"eth0":{"rx_Bps":91584.09062753986,"tx_Bps":27215.87899642052,"dt":5.006011381}}},"uptime_sec":8563796,"boot_time":"2025-12-12T21:09:56+08:00"},"msg":"sample","ts":"2026-03-21T23:59:51+08:00"}
^C{"job":"gowatch","level":"INFO","msg":"received stop signal, exiting","ts":"2026-03-21T23:59:52+08:00"}
root@hosthatchhk:~/gowatch# ls -alh /var/lib/node_exporter/textfile/gowatch.prom
-rw------- 1 root root 988 Mar 21 23:59 /var/lib/node_exporter/textfile/gowatch.prom
root@hosthatchhk:~/gowatch# cat /var/lib/node_exporter/textfile/gowatch.prom
# HELP gowatch_cpu_percent CPU usage percent
# TYPE gowatch_cpu_percent gauge
gowatch_cpu_percent 6.908
# HELP gowatch_mem_percent Memory usage percent
# TYPE gowatch_mem_percent gauge
gowatch_mem_percent 65.484
# HELP gowatch_load1 System load1
# TYPE gowatch_load1 gauge
gowatch_load1 0.210
# HELP gowatch_disk_percent Disk usage percent by mount
# TYPE gowatch_disk_percent gauge
gowatch_disk_percent{mount="/"} 70.628
# HELP gowatch_net_rx_bytes_per_sec Network RX rate bytes/s
# TYPE gowatch_net_rx_bytes_per_sec gauge
# HELP gowatch_net_tx_bytes_per_sec Network TX rate bytes/s
# TYPE gowatch_net_tx_bytes_per_sec gauge
gowatch_net_rx_bytes_per_sec{iface="eth0"} 91584.091
gowatch_net_tx_bytes_per_sec{iface="eth0"} 27215.879
# HELP gowatch_alert_active Alert active (0/1) by type
# TYPE gowatch_alert_active gauge
gowatch_alert_active{type="cpu_percent"} 0
gowatch_alert_active{type="mem_percent"} 0
gowatch_alert_active{type="disk_percent"} 0
gowatch_alert_active{type="load1"} 0
root@hosthatchhk:~/gowatch#