增加日志进程等信息采集

This commit is contained in:
Alex Yang
2025-12-04 16:29:05 +08:00
parent 1a80c5acb8
commit 057a2ea9ee
15 changed files with 2090 additions and 379 deletions

View File

@@ -41,6 +41,8 @@ func RegisterRoutes(r *gin.Engine) {
metrics.GET("/memory", GetMemoryMetrics)
metrics.GET("/disk", GetDiskMetrics)
metrics.GET("/network", GetNetworkMetrics)
metrics.GET("/processes", GetProcessMetrics) // 添加进程信息查询端点
metrics.GET("/disk_details", GetDiskDetails) // 添加磁盘详细信息查询端点
// 添加POST端点接收Agent发送的指标数据
metrics.POST("/", HandleMetricsPost)
}
@@ -73,6 +75,29 @@ type DiskMetrics struct {
Total uint64 `json:"total"` // 总容量 (bytes)
}
// ProcessMetrics 进程监控指标
type ProcessMetrics struct {
Name string `json:"name"` // 进程名
Username string `json:"username"` // 用户名
PID int32 `json:"pid"` // 进程ID
CPU float64 `json:"cpu"` // CPU使用率
Memory float64 `json:"memory"` // 内存使用率
Path string `json:"path"` // 路径
Cmdline string `json:"cmdline"` // 命令行
Ports []int `json:"ports"` // 占用端口
}
// DiskDetailMetrics 磁盘详细信息
type DiskDetailMetrics struct {
DeviceID string `json:"device_id"` // 设备ID
Status string `json:"status"` // 设备状态
Type string `json:"type"` // 设备类型
SizeGB float64 `json:"size_gb"` // 设备大小(GB)
Model string `json:"model"` // 设备型号
InterfaceType string `json:"interface_type"` // 接口类型
Description string `json:"description"` // 设备描述
}
// NetworkInterfaceMetrics 网卡监控指标
type NetworkInterfaceMetrics struct {
BytesSent uint64 `json:"bytes_sent"` // 发送速率 (bytes/s)
@@ -83,11 +108,17 @@ type NetworkInterfaceMetrics struct {
// MetricsRequest 指标请求结构
type MetricsRequest struct {
CPU float64 `json:"cpu"`
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
Memory float64 `json:"memory"`
Disk map[string]DiskMetrics `json:"disk"`
Network map[string]NetworkInterfaceMetrics `json:"network"`
CPU float64 `json:"cpu"`
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
Memory float64 `json:"memory"`
Disk map[string]DiskMetrics `json:"disk"`
DiskDetails []DiskDetailMetrics `json:"disk_details"` // 磁盘详细信息
Network map[string]NetworkInterfaceMetrics `json:"network"`
Processes []ProcessMetrics `json:"processes"` // 进程信息
RxTotal uint64 `json:"rx_total"` // 所有网卡累计接收字节数总和
TxTotal uint64 `json:"tx_total"` // 所有网卡累计发送字节数总和
RxRate uint64 `json:"rx_rate"` // 所有网卡实时接收速率总和 (bytes/s)
TxRate uint64 `json:"tx_rate"` // 所有网卡实时发送速率总和 (bytes/s)
}
// HandleMetricsPost 处理Agent发送的指标数据
@@ -162,24 +193,28 @@ func HandleMetricsPost(c *gin.Context) {
metricsList = append(metricsList, singleMetric)
}
// 创建单独的上下文用于InfluxDB写入避免HTTP请求结束时上下文被取消
writeCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// 处理所有指标
for i, req := range metricsList {
// 写入CPU使用率指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "cpu", req.CPU, baseTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "cpu", req.CPU, baseTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write CPU metrics: %v", err)
}
// 写入CPU频率指标如果有
if req.CPUHz > 0 {
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "cpu_hz", req.CPUHz, baseTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "cpu_hz", req.CPUHz, baseTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write CPU Hz metrics: %v", err)
}
}
// 写入内存指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "memory", req.Memory, baseTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "memory", req.Memory, baseTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write memory metrics: %v", err)
}
@@ -196,7 +231,7 @@ func HandleMetricsPost(c *gin.Context) {
tags["mountpoint"] = mountpoint
// 写入磁盘使用率指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "disk", diskMetrics.UsedPercent, tags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "disk", diskMetrics.UsedPercent, tags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write disk metrics for mountpoint %s: %v", mountpoint, err)
}
@@ -206,6 +241,10 @@ func HandleMetricsPost(c *gin.Context) {
var totalBytesSent, totalBytesReceived uint64
var totalTxBytes, totalRxBytes uint64 // 累计总流量
for interfaceName, networkMetrics := range req.Network {
// 跳过空名称的网卡
if interfaceName == "" {
continue
}
// 为每个网卡创建标签,包含基础标签和网卡名称
interfaceTags := make(map[string]string)
// 复制基础标签
@@ -216,25 +255,25 @@ func HandleMetricsPost(c *gin.Context) {
interfaceTags["interface"] = interfaceName
// 写入网络发送速率指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_sent", float64(networkMetrics.BytesSent), interfaceTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_sent", float64(networkMetrics.BytesSent), interfaceTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write network sent metrics for interface %s: %v", interfaceName, err)
}
// 写入网络接收速率指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_received", float64(networkMetrics.BytesReceived), interfaceTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_received", float64(networkMetrics.BytesReceived), interfaceTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write network received metrics for interface %s: %v", interfaceName, err)
}
// 写入累计发送字节数指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_tx_bytes", float64(networkMetrics.TxBytes), interfaceTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_tx_bytes", float64(networkMetrics.TxBytes), interfaceTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write network tx_bytes metrics for interface %s: %v", interfaceName, err)
}
// 写入累计接收字节数指标
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_rx_bytes", float64(networkMetrics.RxBytes), interfaceTags); err != nil {
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_rx_bytes", float64(networkMetrics.RxBytes), interfaceTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write network rx_bytes metrics for interface %s: %v", interfaceName, err)
}
@@ -248,6 +287,22 @@ func HandleMetricsPost(c *gin.Context) {
totalRxBytes += networkMetrics.RxBytes
}
// 写入进程信息
for _, proc := range req.Processes {
if err := globalStorage.WriteProcessMetric(writeCtx, deviceID, proc.Name, proc.Username, proc.PID, proc.CPU, proc.Memory, proc.Path, proc.Cmdline, proc.Ports, baseTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write process metrics for PID %d: %v", proc.PID, err)
}
}
// 写入磁盘详细信息
for _, diskDetail := range req.DiskDetails {
if err := globalStorage.WriteDiskDetailMetric(writeCtx, deviceID, diskDetail.DeviceID, diskDetail.Status, diskDetail.Type, diskDetail.SizeGB, diskDetail.Model, diskDetail.InterfaceType, diskDetail.Description, baseTags); err != nil {
// 只记录警告,不影响后续指标处理
log.Printf("Warning: Failed to write disk details for device %s: %v", diskDetail.DeviceID, err)
}
}
// 广播指标更新消息,只广播最后一个指标
if i == len(metricsList)-1 {
// 准备广播的磁盘使用率数据(兼容旧格式)
@@ -412,7 +467,7 @@ func GetCPUMetrics(c *gin.Context) {
}
// 处理数据传递interval、startTime和endTime参数
processedData := ProcessMetrics(points, aggregation, interval, startTime, endTime)
processedData := ProcessMetricData(points, aggregation, interval, startTime, endTime)
c.JSON(http.StatusOK, gin.H{
"data": processedData,
@@ -440,7 +495,7 @@ func GetMemoryMetrics(c *gin.Context) {
}
// 处理数据传递interval、startTime和endTime参数
processedData := ProcessMetrics(points, aggregation, interval, startTime, endTime)
processedData := ProcessMetricData(points, aggregation, interval, startTime, endTime)
c.JSON(http.StatusOK, gin.H{
"data": processedData,
@@ -481,7 +536,7 @@ func GetDiskMetrics(c *gin.Context) {
// 处理数据,为每个挂载点创建独立的数据集
result := make(map[string][]MetricData)
for mountpoint, mountpointPoints := range mountpointData {
processedData := ProcessMetrics(mountpointPoints, aggregation, interval, startTime, endTime)
processedData := ProcessMetricData(mountpointPoints, aggregation, interval, startTime, endTime)
result[mountpoint] = processedData
}
@@ -499,10 +554,14 @@ func GetNetworkMetrics(c *gin.Context) {
aggregation := c.DefaultQuery("aggregation", "average")
interval := c.DefaultQuery("interval", "10s") // 添加interval参数默认10秒
// 查询发送和接收的网络指标
// 查询发送和接收的网络速率指标
sentPoints, err1 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_sent", startTime, endTime)
receivedPoints, err2 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_received", startTime, endTime)
// 查询发送和接收的累积总流量指标
txBytesPoints, err3 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_total_tx_bytes", startTime, endTime)
rxBytesPoints, err4 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_total_rx_bytes", startTime, endTime)
// 处理错误
if err1 != nil {
log.Printf("Warning: Failed to query network sent metrics: %v", err1)
@@ -512,12 +571,24 @@ func GetNetworkMetrics(c *gin.Context) {
log.Printf("Warning: Failed to query network received metrics: %v", err2)
receivedPoints = []storage.MetricPoint{}
}
if err3 != nil {
log.Printf("Warning: Failed to query network_total_tx_bytes metrics: %v", err3)
txBytesPoints = []storage.MetricPoint{}
}
if err4 != nil {
log.Printf("Warning: Failed to query network_total_rx_bytes metrics: %v", err4)
rxBytesPoints = []storage.MetricPoint{}
}
// 按网卡名称分组发送和接收的指标
// 按网卡名称分组发送和接收的速率指标
sentByInterface := make(map[string][]storage.MetricPoint)
receivedByInterface := make(map[string][]storage.MetricPoint)
// 分组发送的网络指标
// 按网卡名称分组发送和接收的累积总流量指标
txBytesByInterface := make(map[string][]storage.MetricPoint)
rxBytesByInterface := make(map[string][]storage.MetricPoint)
// 分组发送的网络速率指标
for _, point := range sentPoints {
// 获取网卡名称,默认使用"all"表示所有网卡
interfaceName := point.Tags["interface"]
@@ -527,7 +598,7 @@ func GetNetworkMetrics(c *gin.Context) {
sentByInterface[interfaceName] = append(sentByInterface[interfaceName], point)
}
// 分组接收的网络指标
// 分组接收的网络速率指标
for _, point := range receivedPoints {
// 获取网卡名称,默认使用"all"表示所有网卡
interfaceName := point.Tags["interface"]
@@ -537,6 +608,26 @@ func GetNetworkMetrics(c *gin.Context) {
receivedByInterface[interfaceName] = append(receivedByInterface[interfaceName], point)
}
// 分组发送的累积总流量指标
for _, point := range txBytesPoints {
// 获取网卡名称,默认使用"all"表示所有网卡
interfaceName := point.Tags["interface"]
if interfaceName == "" {
interfaceName = "all"
}
txBytesByInterface[interfaceName] = append(txBytesByInterface[interfaceName], point)
}
// 分组接收的累积总流量指标
for _, point := range rxBytesPoints {
// 获取网卡名称,默认使用"all"表示所有网卡
interfaceName := point.Tags["interface"]
if interfaceName == "" {
interfaceName = "all"
}
rxBytesByInterface[interfaceName] = append(rxBytesByInterface[interfaceName], point)
}
// 处理数据,为每个网卡创建独立的数据集
result := make(map[string]map[string][]MetricData)
@@ -548,21 +639,37 @@ func GetNetworkMetrics(c *gin.Context) {
for iface := range receivedByInterface {
allInterfaces[iface] = true
}
for iface := range txBytesByInterface {
allInterfaces[iface] = true
}
for iface := range rxBytesByInterface {
allInterfaces[iface] = true
}
// 为每个网卡处理数据
for iface := range allInterfaces {
// 获取该网卡的发送和接收指标
// 获取该网卡的速率指标
ifaceSentPoints := sentByInterface[iface]
ifaceReceivedPoints := receivedByInterface[iface]
// 处理数据
processedSentData := ProcessMetrics(ifaceSentPoints, aggregation, interval, startTime, endTime)
processedReceivedData := ProcessMetrics(ifaceReceivedPoints, aggregation, interval, startTime, endTime)
// 获取该网卡的累积总流量指标
ifaceTxBytesPoints := txBytesByInterface[iface]
ifaceRxBytesPoints := rxBytesByInterface[iface]
// 处理速率数据
processedSentData := ProcessMetricData(ifaceSentPoints, aggregation, interval, startTime, endTime)
processedReceivedData := ProcessMetricData(ifaceReceivedPoints, aggregation, interval, startTime, endTime)
// 处理累积总流量数据
processedTxBytesData := ProcessMetricData(ifaceTxBytesPoints, aggregation, interval, startTime, endTime)
processedRxBytesData := ProcessMetricData(ifaceRxBytesPoints, aggregation, interval, startTime, endTime)
// 保存结果
result[iface] = map[string][]MetricData{
"sent": processedSentData,
"received": processedReceivedData,
"sent": processedSentData, // 发送速率数据
"received": processedReceivedData, // 接收速率数据
"tx_bytes": processedTxBytesData, // 发送累积总流量数据
"rx_bytes": processedRxBytesData, // 接收累积总流量数据
}
}
@@ -806,3 +913,49 @@ func GetAllDevices(c *gin.Context) {
"devices": devices,
})
}
// GetProcessMetrics 获取进程指标
func GetProcessMetrics(c *gin.Context) {
// 获取查询参数
deviceID := c.Query("device_id") // 不使用默认值,空值表示查询所有设备
startTime := c.DefaultQuery("start_time", "-1h")
endTime := c.DefaultQuery("end_time", "now()")
// 查询数据
processes, err := globalStorage.QueryProcessMetrics(context.Background(), deviceID, startTime, endTime)
if err != nil {
// 只记录警告,返回空数据
log.Printf("Warning: Failed to query process metrics: %v", err)
c.JSON(http.StatusOK, gin.H{
"data": []map[string]interface{}{},
})
return
}
c.JSON(http.StatusOK, gin.H{
"data": processes,
})
}
// GetDiskDetails 获取磁盘详细信息
func GetDiskDetails(c *gin.Context) {
// 获取查询参数
deviceID := c.Query("device_id") // 不使用默认值,空值表示查询所有设备
startTime := c.DefaultQuery("start_time", "-1h")
endTime := c.DefaultQuery("end_time", "now()")
// 查询数据
diskDetails, err := globalStorage.QueryDiskDetails(context.Background(), deviceID, startTime, endTime)
if err != nil {
// 只记录警告,返回空数据
log.Printf("Warning: Failed to query disk details: %v", err)
c.JSON(http.StatusOK, gin.H{
"data": []map[string]interface{}{},
})
return
}
c.JSON(http.StatusOK, gin.H{
"data": diskDetails,
})
}