Files
monitor/agent/main.go
2025-12-05 00:03:44 +08:00

1179 lines
31 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
stdnet "net"
"net/http"
"os"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/mem"
"github.com/shirou/gopsutil/net"
"github.com/shirou/gopsutil/process"
)
// Config Agent配置
type Config struct {
ServerURL string `json:"server_url"`
ID string `json:"id"` // Agent唯一标识自动生成
Name string `json:"name"` // Agent显示名称
Token string `json:"token"` // 设备认证令牌
Interval string `json:"interval"` // 采集间隔
Debug bool `json:"debug"` // 调试模式
APIPort int `json:"api_port"` // API端口
}
// NetworkInterfaceMetrics 网卡监控指标
type NetworkInterfaceMetrics struct {
BytesSent uint64 `json:"bytes_sent"` // 发送速率 (bytes/s)
BytesReceived uint64 `json:"bytes_received"` // 接收速率 (bytes/s)
TxBytes uint64 `json:"tx_bytes"` // 累计发送字节数
RxBytes uint64 `json:"rx_bytes"` // 累计接收字节数
}
// DiskMetrics 磁盘监控指标
type DiskMetrics struct {
UsedPercent float64 `json:"used_percent"` // 使用率百分比
Total uint64 `json:"total"` // 总容量 (bytes)
}
// ProcessMetrics 进程监控指标
type ProcessMetrics struct {
Name string `json:"name"` // 进程名
Username string `json:"username"` // 用户名
PID int32 `json:"pid"` // 进程ID
CPU float64 `json:"cpu"` // CPU使用率
Memory float64 `json:"memory"` // 内存使用率
Path string `json:"path"` // 路径
Cmdline string `json:"cmdline"` // 命令行
Ports []int `json:"ports"` // 占用端口
}
// DiskDetailMetrics 磁盘详细信息
type DiskDetailMetrics struct {
DeviceID string `json:"device_id"` // 设备ID
Status string `json:"status"` // 设备状态
Type string `json:"type"` // 设备类型
SizeGB float64 `json:"size_gb"` // 设备大小(GB)
Model string `json:"model"` // 设备型号
InterfaceType string `json:"interface_type"` // 接口类型
Description string `json:"description"` // 设备描述
}
// LogEntry 系统日志条目
type LogEntry struct {
Sequence int `json:"sequence"` // 日志序号
Source string `json:"source"` // 来源
Time time.Time `json:"time"` // 发生时间
Message string `json:"message"` // 内容
}
// Metrics 监控指标
type Metrics struct {
CPU float64 `json:"cpu"`
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
Memory float64 `json:"memory"`
Disk map[string]DiskMetrics `json:"disk"`
DiskDetails []DiskDetailMetrics `json:"disk_details"` // 磁盘详细信息
Network map[string]NetworkInterfaceMetrics `json:"network"`
Processes []ProcessMetrics `json:"processes"` // 进程信息
Logs []LogEntry `json:"logs"` // 系统日志
RxTotal uint64 `json:"rx_total"` // 所有网卡累计接收字节数总和
TxTotal uint64 `json:"tx_total"` // 所有网卡累计发送字节数总和
RxRate uint64 `json:"rx_rate"` // 所有网卡实时接收速率总和 (bytes/s)
TxRate uint64 `json:"tx_rate"` // 所有网卡实时发送速率总和 (bytes/s)
// 设备信息字段
AgentID string `json:"agent_id"` // Agent唯一标识
Name string `json:"name"` // 设备名称
IP string `json:"ip"` // 设备IP地址
}
// 全局配置
var config Config
// 保存解析后的时间间隔
var parsedInterval time.Duration
// 保存上一次网络流量采集的数据
type NetworkStats struct {
BytesSent uint64
BytesReceived uint64
}
var (
lastNetworkStats = make(map[string]NetworkStats)
lastCollectTime time.Time
)
// 保存采集到的数据点
var metricsBuffer []*Metrics
var metricsBufferMutex sync.Mutex
// 初始化互斥锁
func init() {
metricsBuffer = make([]*Metrics, 0)
}
// getLocalIP 获取本机IP地址
func getLocalIP() string {
// 获取所有网络接口
interfaces, err := stdnet.Interfaces()
if err != nil {
log.Printf("Failed to get network interfaces: %v", err)
return ""
}
// 遍历网络接口查找非回环、UP状态的IP
for _, iface := range interfaces {
// 跳过回环接口和非UP状态的接口
if iface.Flags&stdnet.FlagLoopback != 0 || iface.Flags&stdnet.FlagUp == 0 {
continue
}
// 获取接口的IP地址
addresses, err := iface.Addrs()
if err != nil {
log.Printf("Failed to get addresses for interface %s: %v", iface.Name, err)
continue
}
// 遍历地址并返回IPv4地址
for _, addr := range addresses {
var ip stdnet.IP
switch v := addr.(type) {
case *stdnet.IPNet:
ip = v.IP
case *stdnet.IPAddr:
ip = v.IP
}
// 跳过IPv6地址和回环地址
if ip == nil || ip.IsLoopback() || ip.To4() == nil {
continue
}
return ip.String()
}
}
// 如果找不到合适的IP尝试另一种方法
conn, err := stdnet.Dial("udp", "8.8.8.8:80")
if err != nil {
log.Printf("Failed to dial UDP: %v", err)
return ""
}
defer conn.Close()
localAddr := conn.LocalAddr().(*stdnet.UDPAddr)
return localAddr.IP.String()
}
// 初始化配置
func initConfig() {
// 默认配置
config = Config{
ServerURL: "http://localhost:8080/api",
ID: "", // 自动生成
Name: "", // 自动生成或从配置读取
Token: "", // 设备认证令牌,从配置或环境变量读取
Interval: "10s",
Debug: false, // 默认非调试模式
APIPort: 8081, // 默认API端口8081
}
// 读取配置文件
readConfigFile()
// 从环境变量读取配置(优先级高于配置文件)
loadFromEnv()
// 生成或确保ID存在
ensureAgentID()
// 确保名称存在
ensureAgentName()
// 保存配置到文件(如果有修改)
saveConfigFile()
// 解析时间间隔
var err error
parsedInterval, err = time.ParseDuration(config.Interval)
if err != nil {
log.Printf("Failed to parse interval: %v, using default 10s", err)
parsedInterval = 10 * time.Second
}
// 打印配置信息
if config.Debug {
log.Printf("Agent ID: %s, Name: %s, Debug: %v, API Port: %d", config.ID, config.Name, config.Debug, config.APIPort)
} else {
log.Printf("Agent ID: %s, Name: %s", config.ID, config.Name)
}
}
// 从环境变量读取配置
func loadFromEnv() {
if serverURL := os.Getenv("AGENT_SERVER_URL"); serverURL != "" {
config.ServerURL = serverURL
}
if id := os.Getenv("AGENT_ID"); id != "" {
config.ID = id
}
if name := os.Getenv("AGENT_NAME"); name != "" {
config.Name = name
}
if token := os.Getenv("AGENT_TOKEN"); token != "" {
config.Token = token
}
if intervalStr := os.Getenv("AGENT_INTERVAL"); intervalStr != "" {
config.Interval = intervalStr
}
if debugStr := os.Getenv("AGENT_DEBUG"); debugStr != "" {
debug, err := strconv.ParseBool(debugStr)
if err == nil {
config.Debug = debug
}
}
if apiPortStr := os.Getenv("AGENT_API_PORT"); apiPortStr != "" {
apiPort, err := strconv.Atoi(apiPortStr)
if err == nil {
config.APIPort = apiPort
}
}
}
// 确保Agent ID存在不存在则生成
func ensureAgentID() {
if config.ID != "" {
return
}
// 使用时间戳和随机数生成唯一ID
config.ID = fmt.Sprintf("agent-%d-%d", time.Now().UnixNano(), os.Getpid())
log.Printf("Generated new Agent ID: %s", config.ID)
}
// 确保Agent名称存在不存在则生成
func ensureAgentName() {
if config.Name != "" {
return
}
// 尝试获取主机名作为默认名称
hostname, err := os.Hostname()
if err != nil {
// 如果获取主机名失败使用ID的前8位作为默认名称
hostname = fmt.Sprintf("Agent-%s", config.ID[:8])
}
config.Name = hostname
log.Printf("Generated new Agent Name: %s", config.Name)
}
// 保存配置到文件
func saveConfigFile() {
// 获取配置文件路径,默认./agent.json可通过环境变量指定
configFile := os.Getenv("AGENT_CONFIG_FILE")
if configFile == "" {
configFile = "./agent.json"
}
// 将配置转换为JSON
jsonData, err := json.MarshalIndent(config, "", " ")
if err != nil {
log.Printf("Failed to marshal config: %v", err)
return
}
// 保存配置到文件
if err := os.WriteFile(configFile, jsonData, 0644); err != nil {
log.Printf("Failed to save config to file: %v", err)
return
}
log.Printf("Config saved to %s", configFile)
}
// 读取配置文件
func readConfigFile() {
// 获取配置文件路径,默认./agent.json可通过环境变量指定
configFile := os.Getenv("AGENT_CONFIG_FILE")
if configFile == "" {
configFile = "./agent.json"
}
// 检查配置文件是否存在
if _, err := os.Stat(configFile); os.IsNotExist(err) {
log.Printf("Config file %s not found, using default config", configFile)
return
}
// 读取配置文件
content, err := os.ReadFile(configFile)
if err != nil {
log.Printf("Failed to read config file %s: %v, using default config", configFile, err)
return
}
// 解析配置文件
var fileConfig Config
if err := json.Unmarshal(content, &fileConfig); err != nil {
log.Printf("Failed to parse config file %s: %v, using default config", configFile, err)
return
}
// 合并配置:所有字段都从配置文件覆盖,除了空字符串
if fileConfig.ServerURL != "" {
config.ServerURL = fileConfig.ServerURL
}
if fileConfig.ID != "" {
config.ID = fileConfig.ID
}
if fileConfig.Name != "" {
config.Name = fileConfig.Name
}
if fileConfig.Token != "" {
config.Token = fileConfig.Token
}
if fileConfig.Interval != "" {
config.Interval = fileConfig.Interval
}
// 对于bool类型总是从配置文件覆盖
config.Debug = fileConfig.Debug
// 对于int类型如果配置文件中的值大于0则使用配置文件中的值否则使用默认值
if fileConfig.APIPort > 0 {
config.APIPort = fileConfig.APIPort
} else {
config.APIPort = 8081 // 默认API端口
}
log.Printf("Config loaded from %s", configFile)
}
// 采集CPU使用率和频率
func collectCPU() (float64, float64, error) {
// 采集CPU使用率
percentages, err := cpu.Percent(0, false)
if err != nil {
return 0, 0, err
}
// 采集CPU频率使用CPUInfo获取频率信息
cpus, err := cpu.Info()
if err != nil {
return percentages[0], 0, nil // 频率采集失败返回使用率和0频率
}
// 计算平均频率 (转换为MHz)
var totalFreq float64
for _, cpuInfo := range cpus {
// CPUInfo返回的MHz直接累加
totalFreq += cpuInfo.Mhz
}
avgFreq := totalFreq / float64(len(cpus))
return percentages[0], avgFreq, nil
}
// 采集内存使用率
func collectMemory() (float64, error) {
vm, err := mem.VirtualMemory()
if err != nil {
return 0, err
}
return vm.UsedPercent, nil
}
// 采集磁盘使用率和总容量
func collectDisk() (map[string]DiskMetrics, error) {
// 获取系统所有挂载点
partitions, err := disk.Partitions(false)
if err != nil {
return nil, err
}
// 初始化返回值
diskMetricsMap := make(map[string]DiskMetrics)
// 遍历所有挂载点,采集磁盘使用率和总容量
for _, partition := range partitions {
// 只处理本地文件系统,跳过网络文件系统
if partition.Fstype == "" {
continue
}
// 采集磁盘使用率和总容量
usage, err := disk.Usage(partition.Mountpoint)
if err != nil {
continue
}
// 创建DiskMetrics结构体
diskMetrics := DiskMetrics{
UsedPercent: usage.UsedPercent,
Total: usage.Total,
}
// 保存磁盘指标
diskMetricsMap[partition.Mountpoint] = diskMetrics
}
return diskMetricsMap, nil
}
// 采集网络流量
func collectNetwork() (map[string]NetworkInterfaceMetrics, uint64, uint64, uint64, uint64, error) {
// 获取所有网卡的统计数据
ioCounters, err := net.IOCounters(true)
if err != nil {
// 当获取网卡数据失败时返回空map和0值
return make(map[string]NetworkInterfaceMetrics), 0, 0, 0, 0, nil
}
// 初始化返回值
networkMetrics := make(map[string]NetworkInterfaceMetrics)
var totalRxBytes, totalTxBytes, totalRxRate, totalTxRate uint64
// 获取当前时间
currentTime := time.Now()
// 遍历所有网卡
for _, counter := range ioCounters {
// 跳过空名称的网卡
if counter.Name == "" {
continue
}
// 获取当前网卡的累计流量
currentBytesSent := counter.BytesSent
currentBytesReceived := counter.BytesRecv
// 计算速率
var bytesSentRate, bytesReceivedRate uint64
if !lastCollectTime.IsZero() {
// 计算时间差(秒)
timeDiff := currentTime.Sub(lastCollectTime).Seconds()
if timeDiff > 0 {
// 获取上一次该网卡的流量
lastStats, exists := lastNetworkStats[counter.Name]
if exists {
// 计算流量差
sentDiff := currentBytesSent - lastStats.BytesSent
receivedDiff := currentBytesReceived - lastStats.BytesReceived
// 计算速率bytes/s
bytesSentRate = uint64(float64(sentDiff) / timeDiff)
bytesReceivedRate = uint64(float64(receivedDiff) / timeDiff)
}
}
}
// 更新上一次采集的值
lastNetworkStats[counter.Name] = NetworkStats{
BytesSent: currentBytesSent,
BytesReceived: currentBytesReceived,
}
// 保存当前网卡的速率和累计流量
networkMetrics[counter.Name] = NetworkInterfaceMetrics{
BytesSent: bytesSentRate,
BytesReceived: bytesReceivedRate,
TxBytes: currentBytesSent,
RxBytes: currentBytesReceived,
}
// 累加总流量
totalRxBytes += currentBytesReceived
totalTxBytes += currentBytesSent
totalRxRate += bytesReceivedRate
totalTxRate += bytesSentRate
}
// 更新上一次采集时间
lastCollectTime = currentTime
// 返回所有网卡的速率和累计流量,以及总和
return networkMetrics, totalRxBytes, totalTxBytes, totalRxRate, totalTxRate, nil
}
// 采集所有监控指标
// 采集进程信息返回CPU使用率较高的前N个进程
func collectProcessMetrics() ([]ProcessMetrics, error) {
// 只采集CPU使用率较高的前20个进程避免性能问题
const maxProcesses = 20
// 获取所有进程ID
pids, err := process.Pids()
if err != nil {
return nil, fmt.Errorf("failed to get process IDs: %w", err)
}
// 创建进程信息切片
processes := make([]ProcessMetrics, 0, maxProcesses)
// 用于并发采集进程信息
var wg sync.WaitGroup
var mu sync.Mutex
errCh := make(chan error, len(pids))
// 限制并发数量
concurrencyLimit := 10
semaphore := make(chan struct{}, concurrencyLimit)
for _, pid := range pids {
wg.Add(1)
// 控制并发数量
semaphore <- struct{}{}
go func(pid int32) {
defer wg.Done()
defer func() { <-semaphore }()
// 获取进程信息
p, err := process.NewProcess(pid)
if err != nil {
errCh <- nil // 忽略无法访问的进程
return
}
// 获取进程名
name, err := p.Name()
if err != nil {
errCh <- nil
return
}
// 获取用户名
username := ""
if uids, err := p.Uids(); err == nil && len(uids) > 0 {
// 简单实现实际需要映射UID到用户名
username = strconv.Itoa(int(uids[0]))
}
// 获取CPU使用率
cpuPercent, err := p.CPUPercent()
if err != nil {
errCh <- nil
return
}
// 获取内存使用率
memInfo, err := p.MemoryInfo()
if err != nil {
errCh <- nil
return
}
// 获取系统总内存
vmStat, err := mem.VirtualMemory()
if err != nil {
errCh <- nil
return
}
// 计算内存使用率百分比
memPercent := float64(memInfo.RSS) / float64(vmStat.Total) * 100
// 获取进程路径
path, err := p.Exe()
if err != nil {
path = ""
}
// 获取命令行
cmdline, err := p.Cmdline()
if err != nil {
cmdline = ""
}
// 获取占用端口
ports := []int{}
if connections, err := p.Connections(); err == nil {
for _, conn := range connections {
// 只添加监听或已建立连接的端口
if conn.Status == "LISTEN" || conn.Status == "ESTABLISHED" {
ports = append(ports, int(conn.Laddr.Port))
}
}
}
// 创建进程信息
procMetric := ProcessMetrics{
Name: name,
Username: username,
PID: pid,
CPU: cpuPercent,
Memory: memPercent,
Path: path,
Cmdline: cmdline,
Ports: ports,
}
// 添加到切片
mu.Lock()
processes = append(processes, procMetric)
mu.Unlock()
errCh <- nil
}(pid)
}
// 等待所有goroutine完成
wg.Wait()
close(errCh)
// 检查是否有错误
for err := range errCh {
if err != nil {
log.Printf("Warning: failed to collect process info: %v", err)
}
}
// 根据CPU使用率排序取前N个
sort.Slice(processes, func(i, j int) bool {
return processes[i].CPU > processes[j].CPU
})
// 限制返回的进程数量
if len(processes) > maxProcesses {
processes = processes[:maxProcesses]
}
return processes, nil
}
// 采集磁盘详细信息
func collectDiskDetails() ([]DiskDetailMetrics, error) {
// 获取所有挂载点信息
partitions, err := disk.Partitions(false)
if err != nil {
return nil, fmt.Errorf("failed to get disk partitions: %w", err)
}
// 创建磁盘详细信息切片
diskDetails := make([]DiskDetailMetrics, 0, len(partitions))
for _, partition := range partitions {
// 获取磁盘使用情况
usage, err := disk.Usage(partition.Mountpoint)
if err != nil {
continue // 忽略无法访问的分区
}
// 简单实现获取设备ID
deviceID := partition.Device
if len(deviceID) > 0 && deviceID[0] == '/' {
deviceID = deviceID[1:]
}
// 设备状态
status := "online"
// 设备类型
diskType := "unknown"
if partition.Fstype != "" {
diskType = partition.Fstype
}
// 设备大小(GB)
sizeGB := float64(usage.Total) / (1024 * 1024 * 1024)
// 设备型号 - 简化实现,实际需要更复杂的逻辑
model := partition.Device
// 接口类型 - 简化实现
interfaceType := "unknown"
if len(partition.Device) > 0 {
if partition.Device[:3] == "sda" || partition.Device[:3] == "sdb" {
interfaceType = "SATA"
} else if partition.Device[:3] == "nvme" {
interfaceType = "NVMe"
} else if partition.Device[:3] == "mmc" {
interfaceType = "MMC"
} else if partition.Device[:3] == "vda" || partition.Device[:3] == "vdb" {
interfaceType = "Virtual"
}
}
// 设备描述
description := fmt.Sprintf("%s (%s)", partition.Device, partition.Fstype)
// 创建磁盘详细信息
diskDetail := DiskDetailMetrics{
DeviceID: deviceID,
Status: status,
Type: diskType,
SizeGB: sizeGB,
Model: model,
InterfaceType: interfaceType,
Description: description,
}
diskDetails = append(diskDetails, diskDetail)
}
return diskDetails, nil
}
// 采集系统日志
func collectLogs() ([]LogEntry, error) {
// 日志文件路径
logFile := "/var/log/messages"
log.Printf("Attempting to collect logs from %s", logFile)
// 检查文件是否存在和权限
fileInfo, err := os.Stat(logFile)
if err != nil {
log.Printf("Failed to stat log file %s: %v", logFile, err)
return nil, fmt.Errorf("failed to stat log file %s: %w", logFile, err)
}
log.Printf("Log file %s exists, size: %d bytes", logFile, fileInfo.Size())
// 打开日志文件
file, err := os.Open(logFile)
if err != nil {
log.Printf("Failed to open log file %s: %v", logFile, err)
return nil, fmt.Errorf("failed to open log file %s: %w", logFile, err)
}
defer file.Close()
// 读取文件末尾内容
const maxReadSize = 1024 * 1024 // 最多读取1MB
readSize := maxReadSize
if fileInfo.Size() < int64(maxReadSize) {
readSize = int(fileInfo.Size())
}
log.Printf("Reading %d bytes from log file", readSize)
buf := make([]byte, readSize)
bytesRead, err := file.ReadAt(buf, fileInfo.Size()-int64(readSize))
if err != nil {
log.Printf("Failed to read log file: %v", err)
return nil, fmt.Errorf("failed to read log file: %w", err)
}
log.Printf("Successfully read %d bytes from log file", bytesRead)
// 分割日志行
lines := bytes.Split(buf[:bytesRead], []byte("\n"))
log.Printf("Found %d lines in log file", len(lines))
// 创建日志条目切片
logs := make([]LogEntry, 0, 50) // 最多保存50条日志
// 从后往前解析日志行
for i := len(lines) - 1; i >= 0 && len(logs) < 50; i-- {
line := bytes.TrimSpace(lines[i])
if len(line) == 0 {
continue
}
// 打印前5行日志行用于调试
if i >= len(lines)-5 {
log.Printf("Processing log line %d: %s", i, string(line))
}
// 使用字符串处理,更方便处理空格
lineStr := string(line)
// 使用strings.Fields分割日志行自动处理连续空格
fields := strings.Fields(lineStr)
log.Printf("Line %d fields: %v, length: %d", i, fields, len(fields))
if len(fields) < 6 {
log.Printf("Skipping line %d: not enough fields (%d) after splitting", i, len(fields))
continue
}
// 构建时间字符串:月份 日期 时间
timeStr := fmt.Sprintf("%s %s %s", fields[0], fields[1], fields[2])
log.Printf("Line %d timeStr: '%s'", i, timeStr)
// 解析时间
t, err := time.Parse("Jan 2 15:04:05", timeStr)
if err != nil {
log.Printf("Skipping line %d: failed to parse time '%s': %v", i, timeStr, err)
continue
}
// 设置当前年份
year, _, _ := time.Now().Date()
t = time.Date(year, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), 0, time.Local)
// 寻找第一个冒号用于分割source和message
colonIndex := strings.Index(lineStr, ": ")
if colonIndex == -1 {
log.Printf("Skipping line %d: no colon found to split source and message", i)
continue
}
// 解析source和message
source := lineStr[:colonIndex]
message := lineStr[colonIndex+2:]
// 创建日志条目
logEntry := LogEntry{
Sequence: len(logs) + 1,
Source: source,
Time: t,
Message: message,
}
// 添加到日志切片(注意顺序,后面的日志先解析,所以需要插入到前面)
logs = append([]LogEntry{logEntry}, logs...)
}
log.Printf("Successfully collected %d logs", len(logs))
return logs, nil
}
func collectMetrics() (*Metrics, error) {
metrics := &Metrics{}
// 初始化Network字段为非nil避免空指针问题
metrics.Network = make(map[string]NetworkInterfaceMetrics)
// 设置设备信息
metrics.AgentID = config.ID
metrics.Name = config.Name
// 尝试获取本机IP地址
metrics.IP = getLocalIP()
// 采集CPU使用率和频率
cpuUsage, cpuHz, err := collectCPU()
if err != nil {
// CPU采集失败时使用0值
log.Printf("Failed to collect CPU metrics: %v, using 0 values", err)
cpuUsage = 0
cpuHz = 0
}
metrics.CPU = cpuUsage
metrics.CPUHz = cpuHz
// 采集内存使用率
memoryUsage, err := collectMemory()
if err != nil {
// 内存采集失败时使用0值
log.Printf("Failed to collect memory metrics: %v, using 0 value", err)
memoryUsage = 0
}
metrics.Memory = memoryUsage
// 采集磁盘使用率和总容量
diskMetricsMap, err := collectDisk()
if err != nil {
// 磁盘采集失败时使用空map
log.Printf("Failed to collect disk metrics: %v, using empty map", err)
diskMetricsMap = make(map[string]DiskMetrics)
}
metrics.Disk = diskMetricsMap
// 采集磁盘详细信息
diskDetails, err := collectDiskDetails()
if err != nil {
// 磁盘详细信息采集失败时使用空切片
log.Printf("Failed to collect disk details: %v, using empty slice", err)
diskDetails = make([]DiskDetailMetrics, 0)
}
metrics.DiskDetails = diskDetails
// 采集进程信息
processes, err := collectProcessMetrics()
if err != nil {
// 进程信息采集失败时使用空切片
log.Printf("Failed to collect process metrics: %v, using empty slice", err)
processes = make([]ProcessMetrics, 0)
}
metrics.Processes = processes
// 采集网络流量
networkMetrics, rxTotal, txTotal, rxRate, txRate, err := collectNetwork()
if err != nil {
// 网络采集失败时使用0值实际上collectNetwork已经处理了错误情况
log.Printf("Failed to collect network metrics: %v, using 0 values", err)
networkMetrics = make(map[string]NetworkInterfaceMetrics)
rxTotal, txTotal, rxRate, txRate = 0, 0, 0, 0
}
// 直接使用采集到的网卡流量
metrics.Network = networkMetrics
metrics.RxTotal = rxTotal
metrics.TxTotal = txTotal
metrics.RxRate = rxRate
metrics.TxRate = txRate
// 采集系统日志
logs, err := collectLogs()
if err != nil {
// 日志采集失败时使用空切片
log.Printf("Failed to collect logs: %v, using empty slice", err)
logs = make([]LogEntry, 0)
}
metrics.Logs = logs
return metrics, nil
}
// 发送指标到服务器
func sendMetrics(metricsList []*Metrics) error {
// 创建HTTP客户端
client := &http.Client{
Timeout: 10 * time.Second,
}
// 将指标转换为JSON
jsonData, err := json.Marshal(metricsList)
if err != nil {
return err
}
// 创建请求
req, err := http.NewRequest("POST", fmt.Sprintf("%s/metrics/", config.ServerURL), bytes.NewBuffer(jsonData))
if err != nil {
return err
}
// 设置请求头
req.Header.Set("Content-Type", "application/json")
// 设置Agent名称
req.Header.Set("X-Agent-Name", config.Name)
// 设置设备认证令牌
req.Header.Set("X-Device-Token", config.Token)
// 发送请求
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
// 检查响应状态码
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("server returned status code %d", resp.StatusCode)
}
return nil
}
// 启动HTTP服务器提供本地指标查询
// 启动HTTP服务器
func startHTTPServer() {
// 指标查询端点
http.HandleFunc("/metrics", func(w http.ResponseWriter, r *http.Request) {
if config.Debug {
log.Printf("API Request: %s %s", r.Method, r.URL.Path)
}
metrics, err := collectMetrics()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// 设置响应头
w.Header().Set("Content-Type", "application/json")
// 返回JSON响应
json.NewEncoder(w).Encode(metrics)
})
// 健康检查端点
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
if config.Debug {
log.Printf("API Request: %s %s", r.Method, r.URL.Path)
}
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"status": "ok",
"agent_id": config.ID,
})
})
// 获取配置端点
http.HandleFunc("/config", func(w http.ResponseWriter, r *http.Request) {
if config.Debug {
log.Printf("API Request: %s %s", r.Method, r.URL.Path)
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(config)
})
// 获取状态端点
http.HandleFunc("/status", func(w http.ResponseWriter, r *http.Request) {
if config.Debug {
log.Printf("API Request: %s %s", r.Method, r.URL.Path)
}
// 采集当前状态
cpu, cpuHz, _ := collectCPU()
memory, _ := collectMemory()
disk, _ := collectDisk()
status := map[string]interface{}{
"status": "running",
"agent_id": config.ID,
"name": config.Name,
"debug": config.Debug,
"interval": config.Interval,
"cpu": cpu,
"cpu_hz": cpuHz,
"memory": memory,
"disk": disk,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(status)
})
// 立即采集和发送指标端点
http.HandleFunc("/collect", func(w http.ResponseWriter, r *http.Request) {
if config.Debug {
log.Printf("API Request: %s %s", r.Method, r.URL.Path)
}
go collectAndSendMetrics()
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"status": "ok",
"message": "Metrics collection triggered",
})
})
// 启动服务器
addr := fmt.Sprintf(":%d", config.APIPort)
log.Printf("Starting HTTP server on %s", addr)
if err := http.ListenAndServe(addr, nil); err != nil {
log.Fatalf("Failed to start HTTP server: %v", err)
}
}
func main() {
// 初始化配置
initConfig()
// 启动HTTP服务器异步
go startHTTPServer()
log.Printf("Agent started, reporting to %s every %v, collecting data every 1s", config.ServerURL, config.Interval)
// 启动时立即采集一次数据
collectMetricsToBuffer()
// 创建两个ticker一个用于每秒采集数据一个用于定期发送数据
collectTicker := time.NewTicker(1 * time.Second)
sendTicker := time.NewTicker(parsedInterval)
defer collectTicker.Stop()
defer sendTicker.Stop()
for {
select {
case <-collectTicker.C:
// 每秒采集一次数据
collectMetricsToBuffer()
case <-sendTicker.C:
// 定期发送采集到的数据
collectAndSendMetrics()
}
}
}
// 每秒采集数据并添加到缓冲区
func collectMetricsToBuffer() {
// 采集指标
metrics, err := collectMetrics()
if err != nil {
log.Printf("Failed to collect metrics: %v", err)
return
}
// 将指标添加到缓冲区
metricsBufferMutex.Lock()
metricsBuffer = append(metricsBuffer, metrics)
metricsBufferMutex.Unlock()
if config.Debug {
// 计算平均磁盘使用率
totalDiskUsage := 0.0
diskCount := 0
for _, usage := range metrics.Disk {
totalDiskUsage += usage.UsedPercent
diskCount++
}
averageDiskUsage := 0.0
if diskCount > 0 {
averageDiskUsage = totalDiskUsage / float64(diskCount)
}
log.Printf("Metrics collected: Agent=%s, CPU=%.2f%%, Memory=%.2f%%, Disk=%.2f%%",
config.Name, metrics.CPU, metrics.Memory, averageDiskUsage)
}
}
// 发送缓冲区中的所有指标
func collectAndSendMetrics() {
// 从缓冲区获取所有指标
metricsBufferMutex.Lock()
if len(metricsBuffer) == 0 {
metricsBufferMutex.Unlock()
return
}
// 创建一个副本并清空缓冲区
metricsToSend := make([]*Metrics, len(metricsBuffer))
copy(metricsToSend, metricsBuffer)
metricsBuffer = make([]*Metrics, 0)
metricsBufferMutex.Unlock()
// 发送指标
if err := sendMetrics(metricsToSend); err != nil {
log.Printf("Failed to send metrics: %v", err)
return
}
// 计算平均指标值
var totalCPU, totalMemory, totalDiskUsage float64
var diskCount int
pointCount := len(metricsToSend)
for _, metrics := range metricsToSend {
totalCPU += metrics.CPU
totalMemory += metrics.Memory
// 计算磁盘使用率
for _, usage := range metrics.Disk {
totalDiskUsage += usage.UsedPercent
diskCount++
}
}
// 计算平均值
averageCPU := 0.0
averageMemory := 0.0
averageDiskUsage := 0.0
if pointCount > 0 {
averageCPU = totalCPU / float64(pointCount)
averageMemory = totalMemory / float64(pointCount)
}
if diskCount > 0 {
averageDiskUsage = totalDiskUsage / float64(diskCount)
}
// 只在production模式下显示基本指标信息
log.Printf("Metrics sent successfully: Agent=%s, CPU=%.2f%%, Memory=%.2f%%, Disk=%.2f%%",
config.Name, averageCPU, averageMemory, averageDiskUsage)
}