增加日志进程等信息采集
This commit is contained in:
415
agent/main.go
415
agent/main.go
@@ -5,8 +5,10 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
stdnet "net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -15,6 +17,7 @@ import (
|
||||
"github.com/shirou/gopsutil/disk"
|
||||
"github.com/shirou/gopsutil/mem"
|
||||
"github.com/shirou/gopsutil/net"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
)
|
||||
|
||||
// Config Agent配置
|
||||
@@ -43,13 +46,56 @@ type DiskMetrics struct {
|
||||
Total uint64 `json:"total"` // 总容量 (bytes)
|
||||
}
|
||||
|
||||
// ProcessMetrics 进程监控指标
|
||||
type ProcessMetrics struct {
|
||||
Name string `json:"name"` // 进程名
|
||||
Username string `json:"username"` // 用户名
|
||||
PID int32 `json:"pid"` // 进程ID
|
||||
CPU float64 `json:"cpu"` // CPU使用率
|
||||
Memory float64 `json:"memory"` // 内存使用率
|
||||
Path string `json:"path"` // 路径
|
||||
Cmdline string `json:"cmdline"` // 命令行
|
||||
Ports []int `json:"ports"` // 占用端口
|
||||
}
|
||||
|
||||
// DiskDetailMetrics 磁盘详细信息
|
||||
type DiskDetailMetrics struct {
|
||||
DeviceID string `json:"device_id"` // 设备ID
|
||||
Status string `json:"status"` // 设备状态
|
||||
Type string `json:"type"` // 设备类型
|
||||
SizeGB float64 `json:"size_gb"` // 设备大小(GB)
|
||||
Model string `json:"model"` // 设备型号
|
||||
InterfaceType string `json:"interface_type"` // 接口类型
|
||||
Description string `json:"description"` // 设备描述
|
||||
}
|
||||
|
||||
// LogEntry 系统日志条目
|
||||
type LogEntry struct {
|
||||
Sequence int `json:"sequence"` // 日志序号
|
||||
Source string `json:"source"` // 来源
|
||||
Time time.Time `json:"time"` // 发生时间
|
||||
Message string `json:"message"` // 内容
|
||||
}
|
||||
|
||||
// Metrics 监控指标
|
||||
type Metrics struct {
|
||||
CPU float64 `json:"cpu"`
|
||||
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
|
||||
Memory float64 `json:"memory"`
|
||||
Disk map[string]DiskMetrics `json:"disk"`
|
||||
Network map[string]NetworkInterfaceMetrics `json:"network"`
|
||||
CPU float64 `json:"cpu"`
|
||||
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
|
||||
Memory float64 `json:"memory"`
|
||||
Disk map[string]DiskMetrics `json:"disk"`
|
||||
DiskDetails []DiskDetailMetrics `json:"disk_details"` // 磁盘详细信息
|
||||
Network map[string]NetworkInterfaceMetrics `json:"network"`
|
||||
Processes []ProcessMetrics `json:"processes"` // 进程信息
|
||||
Logs []LogEntry `json:"logs"` // 系统日志
|
||||
RxTotal uint64 `json:"rx_total"` // 所有网卡累计接收字节数总和
|
||||
TxTotal uint64 `json:"tx_total"` // 所有网卡累计发送字节数总和
|
||||
RxRate uint64 `json:"rx_rate"` // 所有网卡实时接收速率总和 (bytes/s)
|
||||
TxRate uint64 `json:"tx_rate"` // 所有网卡实时发送速率总和 (bytes/s)
|
||||
// 设备信息字段
|
||||
DeviceID string `json:"device_id"` // 设备ID
|
||||
AgentID string `json:"agent_id"` // Agent唯一标识
|
||||
Name string `json:"name"` // 设备名称
|
||||
IP string `json:"ip"` // 设备IP地址
|
||||
}
|
||||
|
||||
// 全局配置
|
||||
@@ -78,6 +124,60 @@ func init() {
|
||||
metricsBuffer = make([]*Metrics, 0)
|
||||
}
|
||||
|
||||
// getLocalIP 获取本机IP地址
|
||||
func getLocalIP() string {
|
||||
// 获取所有网络接口
|
||||
interfaces, err := stdnet.Interfaces()
|
||||
if err != nil {
|
||||
log.Printf("Failed to get network interfaces: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// 遍历网络接口查找非回环、UP状态的IP
|
||||
for _, iface := range interfaces {
|
||||
// 跳过回环接口和非UP状态的接口
|
||||
if iface.Flags&stdnet.FlagLoopback != 0 || iface.Flags&stdnet.FlagUp == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// 获取接口的IP地址
|
||||
addresses, err := iface.Addrs()
|
||||
if err != nil {
|
||||
log.Printf("Failed to get addresses for interface %s: %v", iface.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// 遍历地址并返回IPv4地址
|
||||
for _, addr := range addresses {
|
||||
var ip stdnet.IP
|
||||
switch v := addr.(type) {
|
||||
case *stdnet.IPNet:
|
||||
ip = v.IP
|
||||
case *stdnet.IPAddr:
|
||||
ip = v.IP
|
||||
}
|
||||
|
||||
// 跳过IPv6地址和回环地址
|
||||
if ip == nil || ip.IsLoopback() || ip.To4() == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
return ip.String()
|
||||
}
|
||||
}
|
||||
|
||||
// 如果找不到合适的IP,尝试另一种方法
|
||||
conn, err := stdnet.Dial("udp", "8.8.8.8:80")
|
||||
if err != nil {
|
||||
log.Printf("Failed to dial UDP: %v", err)
|
||||
return ""
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
localAddr := conn.LocalAddr().(*stdnet.UDPAddr)
|
||||
return localAddr.IP.String()
|
||||
}
|
||||
|
||||
// 初始化配置
|
||||
func initConfig() {
|
||||
// 默认配置
|
||||
@@ -353,25 +453,27 @@ func collectDisk() (map[string]DiskMetrics, error) {
|
||||
}
|
||||
|
||||
// 采集网络流量
|
||||
func collectNetwork() (map[string]NetworkInterfaceMetrics, error) {
|
||||
func collectNetwork() (map[string]NetworkInterfaceMetrics, uint64, uint64, uint64, uint64, error) {
|
||||
// 获取所有网卡的统计数据
|
||||
ioCounters, err := net.IOCounters(true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// 当获取网卡数据失败时,返回空map和0值
|
||||
return make(map[string]NetworkInterfaceMetrics), 0, 0, 0, 0, nil
|
||||
}
|
||||
|
||||
if len(ioCounters) == 0 {
|
||||
return make(map[string]NetworkInterfaceMetrics), nil
|
||||
}
|
||||
// 初始化返回值
|
||||
networkMetrics := make(map[string]NetworkInterfaceMetrics)
|
||||
var totalRxBytes, totalTxBytes, totalRxRate, totalTxRate uint64
|
||||
|
||||
// 获取当前时间
|
||||
currentTime := time.Now()
|
||||
|
||||
// 初始化返回值
|
||||
networkMetrics := make(map[string]NetworkInterfaceMetrics)
|
||||
|
||||
// 遍历所有网卡
|
||||
for _, counter := range ioCounters {
|
||||
// 跳过空名称的网卡
|
||||
if counter.Name == "" {
|
||||
continue
|
||||
}
|
||||
// 获取当前网卡的累计流量
|
||||
currentBytesSent := counter.BytesSent
|
||||
currentBytesReceived := counter.BytesRecv
|
||||
@@ -409,23 +511,265 @@ func collectNetwork() (map[string]NetworkInterfaceMetrics, error) {
|
||||
TxBytes: currentBytesSent,
|
||||
RxBytes: currentBytesReceived,
|
||||
}
|
||||
|
||||
// 累加总流量
|
||||
totalRxBytes += currentBytesReceived
|
||||
totalTxBytes += currentBytesSent
|
||||
totalRxRate += bytesReceivedRate
|
||||
totalTxRate += bytesSentRate
|
||||
}
|
||||
|
||||
// 更新上一次采集时间
|
||||
lastCollectTime = currentTime
|
||||
|
||||
// 返回所有网卡的速率和累计流量
|
||||
return networkMetrics, nil
|
||||
// 返回所有网卡的速率和累计流量,以及总和
|
||||
return networkMetrics, totalRxBytes, totalTxBytes, totalRxRate, totalTxRate, nil
|
||||
}
|
||||
|
||||
// 采集所有监控指标
|
||||
// 采集进程信息,返回CPU使用率较高的前N个进程
|
||||
func collectProcessMetrics() ([]ProcessMetrics, error) {
|
||||
// 只采集CPU使用率较高的前20个进程,避免性能问题
|
||||
const maxProcesses = 20
|
||||
|
||||
// 获取所有进程ID
|
||||
pids, err := process.Pids()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get process IDs: %w", err)
|
||||
}
|
||||
|
||||
// 创建进程信息切片
|
||||
processes := make([]ProcessMetrics, 0, maxProcesses)
|
||||
|
||||
// 用于并发采集进程信息
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
errCh := make(chan error, len(pids))
|
||||
|
||||
// 限制并发数量
|
||||
concurrencyLimit := 10
|
||||
semaphore := make(chan struct{}, concurrencyLimit)
|
||||
|
||||
for _, pid := range pids {
|
||||
wg.Add(1)
|
||||
|
||||
// 控制并发数量
|
||||
semaphore <- struct{}{}
|
||||
|
||||
go func(pid int32) {
|
||||
defer wg.Done()
|
||||
defer func() { <-semaphore }()
|
||||
|
||||
// 获取进程信息
|
||||
p, err := process.NewProcess(pid)
|
||||
if err != nil {
|
||||
errCh <- nil // 忽略无法访问的进程
|
||||
return
|
||||
}
|
||||
|
||||
// 获取进程名
|
||||
name, err := p.Name()
|
||||
if err != nil {
|
||||
errCh <- nil
|
||||
return
|
||||
}
|
||||
|
||||
// 获取用户名
|
||||
username := ""
|
||||
if uids, err := p.Uids(); err == nil && len(uids) > 0 {
|
||||
// 简单实现,实际需要映射UID到用户名
|
||||
username = strconv.Itoa(int(uids[0]))
|
||||
}
|
||||
|
||||
// 获取CPU使用率
|
||||
cpuPercent, err := p.CPUPercent()
|
||||
if err != nil {
|
||||
errCh <- nil
|
||||
return
|
||||
}
|
||||
|
||||
// 获取内存使用率
|
||||
memInfo, err := p.MemoryInfo()
|
||||
if err != nil {
|
||||
errCh <- nil
|
||||
return
|
||||
}
|
||||
|
||||
// 获取系统总内存
|
||||
vmStat, err := mem.VirtualMemory()
|
||||
if err != nil {
|
||||
errCh <- nil
|
||||
return
|
||||
}
|
||||
|
||||
// 计算内存使用率百分比
|
||||
memPercent := float64(memInfo.RSS) / float64(vmStat.Total) * 100
|
||||
|
||||
// 获取进程路径
|
||||
path, err := p.Exe()
|
||||
if err != nil {
|
||||
path = ""
|
||||
}
|
||||
|
||||
// 获取命令行
|
||||
cmdline, err := p.Cmdline()
|
||||
if err != nil {
|
||||
cmdline = ""
|
||||
}
|
||||
|
||||
// 获取占用端口
|
||||
ports := []int{}
|
||||
if connections, err := p.Connections(); err == nil {
|
||||
for _, conn := range connections {
|
||||
// 只添加监听或已建立连接的端口
|
||||
if conn.Status == "LISTEN" || conn.Status == "ESTABLISHED" {
|
||||
ports = append(ports, int(conn.Laddr.Port))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 创建进程信息
|
||||
procMetric := ProcessMetrics{
|
||||
Name: name,
|
||||
Username: username,
|
||||
PID: pid,
|
||||
CPU: cpuPercent,
|
||||
Memory: memPercent,
|
||||
Path: path,
|
||||
Cmdline: cmdline,
|
||||
Ports: ports,
|
||||
}
|
||||
|
||||
// 添加到切片
|
||||
mu.Lock()
|
||||
processes = append(processes, procMetric)
|
||||
mu.Unlock()
|
||||
|
||||
errCh <- nil
|
||||
}(pid)
|
||||
}
|
||||
|
||||
// 等待所有goroutine完成
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
// 检查是否有错误
|
||||
for err := range errCh {
|
||||
if err != nil {
|
||||
log.Printf("Warning: failed to collect process info: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 根据CPU使用率排序,取前N个
|
||||
sort.Slice(processes, func(i, j int) bool {
|
||||
return processes[i].CPU > processes[j].CPU
|
||||
})
|
||||
|
||||
// 限制返回的进程数量
|
||||
if len(processes) > maxProcesses {
|
||||
processes = processes[:maxProcesses]
|
||||
}
|
||||
|
||||
return processes, nil
|
||||
}
|
||||
|
||||
// 采集磁盘详细信息
|
||||
func collectDiskDetails() ([]DiskDetailMetrics, error) {
|
||||
// 获取所有挂载点信息
|
||||
partitions, err := disk.Partitions(false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get disk partitions: %w", err)
|
||||
}
|
||||
|
||||
// 创建磁盘详细信息切片
|
||||
diskDetails := make([]DiskDetailMetrics, 0, len(partitions))
|
||||
|
||||
for _, partition := range partitions {
|
||||
// 获取磁盘使用情况
|
||||
usage, err := disk.Usage(partition.Mountpoint)
|
||||
if err != nil {
|
||||
continue // 忽略无法访问的分区
|
||||
}
|
||||
|
||||
// 简单实现,获取设备ID
|
||||
deviceID := partition.Device
|
||||
if len(deviceID) > 0 && deviceID[0] == '/' {
|
||||
deviceID = deviceID[1:]
|
||||
}
|
||||
|
||||
// 设备状态
|
||||
status := "online"
|
||||
|
||||
// 设备类型
|
||||
diskType := "unknown"
|
||||
if partition.Fstype != "" {
|
||||
diskType = partition.Fstype
|
||||
}
|
||||
|
||||
// 设备大小(GB)
|
||||
sizeGB := float64(usage.Total) / (1024 * 1024 * 1024)
|
||||
|
||||
// 设备型号 - 简化实现,实际需要更复杂的逻辑
|
||||
model := partition.Device
|
||||
|
||||
// 接口类型 - 简化实现
|
||||
interfaceType := "unknown"
|
||||
if len(partition.Device) > 0 {
|
||||
if partition.Device[:3] == "sda" || partition.Device[:3] == "sdb" {
|
||||
interfaceType = "SATA"
|
||||
} else if partition.Device[:3] == "nvme" {
|
||||
interfaceType = "NVMe"
|
||||
} else if partition.Device[:3] == "mmc" {
|
||||
interfaceType = "MMC"
|
||||
} else if partition.Device[:3] == "vda" || partition.Device[:3] == "vdb" {
|
||||
interfaceType = "Virtual"
|
||||
}
|
||||
}
|
||||
|
||||
// 设备描述
|
||||
description := fmt.Sprintf("%s (%s)", partition.Device, partition.Fstype)
|
||||
|
||||
// 创建磁盘详细信息
|
||||
diskDetail := DiskDetailMetrics{
|
||||
DeviceID: deviceID,
|
||||
Status: status,
|
||||
Type: diskType,
|
||||
SizeGB: sizeGB,
|
||||
Model: model,
|
||||
InterfaceType: interfaceType,
|
||||
Description: description,
|
||||
}
|
||||
|
||||
diskDetails = append(diskDetails, diskDetail)
|
||||
}
|
||||
|
||||
return diskDetails, nil
|
||||
}
|
||||
|
||||
func collectMetrics() (*Metrics, error) {
|
||||
metrics := &Metrics{}
|
||||
|
||||
// 初始化Network字段为非nil,避免空指针问题
|
||||
metrics.Network = make(map[string]NetworkInterfaceMetrics)
|
||||
|
||||
// 设置设备信息
|
||||
deviceID := config.DeviceID
|
||||
if deviceID == "" {
|
||||
deviceID = config.ID
|
||||
}
|
||||
metrics.DeviceID = deviceID
|
||||
metrics.AgentID = config.ID
|
||||
metrics.Name = config.Name
|
||||
// 尝试获取本机IP地址
|
||||
metrics.IP = getLocalIP()
|
||||
|
||||
// 采集CPU使用率和频率
|
||||
cpuUsage, cpuHz, err := collectCPU()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to collect CPU metrics: %w", err)
|
||||
// CPU采集失败时使用0值
|
||||
log.Printf("Failed to collect CPU metrics: %v, using 0 values", err)
|
||||
cpuUsage = 0
|
||||
cpuHz = 0
|
||||
}
|
||||
metrics.CPU = cpuUsage
|
||||
metrics.CPUHz = cpuHz
|
||||
@@ -433,24 +777,53 @@ func collectMetrics() (*Metrics, error) {
|
||||
// 采集内存使用率
|
||||
memoryUsage, err := collectMemory()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to collect memory metrics: %w", err)
|
||||
// 内存采集失败时使用0值
|
||||
log.Printf("Failed to collect memory metrics: %v, using 0 value", err)
|
||||
memoryUsage = 0
|
||||
}
|
||||
metrics.Memory = memoryUsage
|
||||
|
||||
// 采集磁盘使用率和总容量
|
||||
diskMetricsMap, err := collectDisk()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to collect disk metrics: %w", err)
|
||||
// 磁盘采集失败时使用空map
|
||||
log.Printf("Failed to collect disk metrics: %v, using empty map", err)
|
||||
diskMetricsMap = make(map[string]DiskMetrics)
|
||||
}
|
||||
metrics.Disk = diskMetricsMap
|
||||
|
||||
// 采集网络流量
|
||||
networkMetrics, err := collectNetwork()
|
||||
// 采集磁盘详细信息
|
||||
diskDetails, err := collectDiskDetails()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to collect network metrics: %w", err)
|
||||
// 磁盘详细信息采集失败时使用空切片
|
||||
log.Printf("Failed to collect disk details: %v, using empty slice", err)
|
||||
diskDetails = make([]DiskDetailMetrics, 0)
|
||||
}
|
||||
metrics.DiskDetails = diskDetails
|
||||
|
||||
// 采集进程信息
|
||||
processes, err := collectProcessMetrics()
|
||||
if err != nil {
|
||||
// 进程信息采集失败时使用空切片
|
||||
log.Printf("Failed to collect process metrics: %v, using empty slice", err)
|
||||
processes = make([]ProcessMetrics, 0)
|
||||
}
|
||||
metrics.Processes = processes
|
||||
|
||||
// 采集网络流量
|
||||
networkMetrics, rxTotal, txTotal, rxRate, txRate, err := collectNetwork()
|
||||
if err != nil {
|
||||
// 网络采集失败时使用0值(实际上collectNetwork已经处理了错误情况)
|
||||
log.Printf("Failed to collect network metrics: %v, using 0 values", err)
|
||||
networkMetrics = make(map[string]NetworkInterfaceMetrics)
|
||||
rxTotal, txTotal, rxRate, txRate = 0, 0, 0, 0
|
||||
}
|
||||
// 直接使用采集到的网卡流量
|
||||
metrics.Network = networkMetrics
|
||||
metrics.RxTotal = rxTotal
|
||||
metrics.TxTotal = txTotal
|
||||
metrics.RxRate = rxRate
|
||||
metrics.TxRate = txRate
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user