增加日志进程等信息采集
This commit is contained in:
@@ -41,6 +41,8 @@ func RegisterRoutes(r *gin.Engine) {
|
||||
metrics.GET("/memory", GetMemoryMetrics)
|
||||
metrics.GET("/disk", GetDiskMetrics)
|
||||
metrics.GET("/network", GetNetworkMetrics)
|
||||
metrics.GET("/processes", GetProcessMetrics) // 添加进程信息查询端点
|
||||
metrics.GET("/disk_details", GetDiskDetails) // 添加磁盘详细信息查询端点
|
||||
// 添加POST端点,接收Agent发送的指标数据
|
||||
metrics.POST("/", HandleMetricsPost)
|
||||
}
|
||||
@@ -73,6 +75,29 @@ type DiskMetrics struct {
|
||||
Total uint64 `json:"total"` // 总容量 (bytes)
|
||||
}
|
||||
|
||||
// ProcessMetrics 进程监控指标
|
||||
type ProcessMetrics struct {
|
||||
Name string `json:"name"` // 进程名
|
||||
Username string `json:"username"` // 用户名
|
||||
PID int32 `json:"pid"` // 进程ID
|
||||
CPU float64 `json:"cpu"` // CPU使用率
|
||||
Memory float64 `json:"memory"` // 内存使用率
|
||||
Path string `json:"path"` // 路径
|
||||
Cmdline string `json:"cmdline"` // 命令行
|
||||
Ports []int `json:"ports"` // 占用端口
|
||||
}
|
||||
|
||||
// DiskDetailMetrics 磁盘详细信息
|
||||
type DiskDetailMetrics struct {
|
||||
DeviceID string `json:"device_id"` // 设备ID
|
||||
Status string `json:"status"` // 设备状态
|
||||
Type string `json:"type"` // 设备类型
|
||||
SizeGB float64 `json:"size_gb"` // 设备大小(GB)
|
||||
Model string `json:"model"` // 设备型号
|
||||
InterfaceType string `json:"interface_type"` // 接口类型
|
||||
Description string `json:"description"` // 设备描述
|
||||
}
|
||||
|
||||
// NetworkInterfaceMetrics 网卡监控指标
|
||||
type NetworkInterfaceMetrics struct {
|
||||
BytesSent uint64 `json:"bytes_sent"` // 发送速率 (bytes/s)
|
||||
@@ -83,11 +108,17 @@ type NetworkInterfaceMetrics struct {
|
||||
|
||||
// MetricsRequest 指标请求结构
|
||||
type MetricsRequest struct {
|
||||
CPU float64 `json:"cpu"`
|
||||
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
|
||||
Memory float64 `json:"memory"`
|
||||
Disk map[string]DiskMetrics `json:"disk"`
|
||||
Network map[string]NetworkInterfaceMetrics `json:"network"`
|
||||
CPU float64 `json:"cpu"`
|
||||
CPUHz float64 `json:"cpu_hz"` // CPU频率 (MHz)
|
||||
Memory float64 `json:"memory"`
|
||||
Disk map[string]DiskMetrics `json:"disk"`
|
||||
DiskDetails []DiskDetailMetrics `json:"disk_details"` // 磁盘详细信息
|
||||
Network map[string]NetworkInterfaceMetrics `json:"network"`
|
||||
Processes []ProcessMetrics `json:"processes"` // 进程信息
|
||||
RxTotal uint64 `json:"rx_total"` // 所有网卡累计接收字节数总和
|
||||
TxTotal uint64 `json:"tx_total"` // 所有网卡累计发送字节数总和
|
||||
RxRate uint64 `json:"rx_rate"` // 所有网卡实时接收速率总和 (bytes/s)
|
||||
TxRate uint64 `json:"tx_rate"` // 所有网卡实时发送速率总和 (bytes/s)
|
||||
}
|
||||
|
||||
// HandleMetricsPost 处理Agent发送的指标数据
|
||||
@@ -162,24 +193,28 @@ func HandleMetricsPost(c *gin.Context) {
|
||||
metricsList = append(metricsList, singleMetric)
|
||||
}
|
||||
|
||||
// 创建单独的上下文用于InfluxDB写入,避免HTTP请求结束时上下文被取消
|
||||
writeCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// 处理所有指标
|
||||
for i, req := range metricsList {
|
||||
// 写入CPU使用率指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "cpu", req.CPU, baseTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "cpu", req.CPU, baseTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write CPU metrics: %v", err)
|
||||
}
|
||||
|
||||
// 写入CPU频率指标(如果有)
|
||||
if req.CPUHz > 0 {
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "cpu_hz", req.CPUHz, baseTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "cpu_hz", req.CPUHz, baseTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write CPU Hz metrics: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 写入内存指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "memory", req.Memory, baseTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "memory", req.Memory, baseTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write memory metrics: %v", err)
|
||||
}
|
||||
@@ -196,7 +231,7 @@ func HandleMetricsPost(c *gin.Context) {
|
||||
tags["mountpoint"] = mountpoint
|
||||
|
||||
// 写入磁盘使用率指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "disk", diskMetrics.UsedPercent, tags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "disk", diskMetrics.UsedPercent, tags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write disk metrics for mountpoint %s: %v", mountpoint, err)
|
||||
}
|
||||
@@ -206,6 +241,10 @@ func HandleMetricsPost(c *gin.Context) {
|
||||
var totalBytesSent, totalBytesReceived uint64
|
||||
var totalTxBytes, totalRxBytes uint64 // 累计总流量
|
||||
for interfaceName, networkMetrics := range req.Network {
|
||||
// 跳过空名称的网卡
|
||||
if interfaceName == "" {
|
||||
continue
|
||||
}
|
||||
// 为每个网卡创建标签,包含基础标签和网卡名称
|
||||
interfaceTags := make(map[string]string)
|
||||
// 复制基础标签
|
||||
@@ -216,25 +255,25 @@ func HandleMetricsPost(c *gin.Context) {
|
||||
interfaceTags["interface"] = interfaceName
|
||||
|
||||
// 写入网络发送速率指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_sent", float64(networkMetrics.BytesSent), interfaceTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_sent", float64(networkMetrics.BytesSent), interfaceTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write network sent metrics for interface %s: %v", interfaceName, err)
|
||||
}
|
||||
|
||||
// 写入网络接收速率指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_received", float64(networkMetrics.BytesReceived), interfaceTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_received", float64(networkMetrics.BytesReceived), interfaceTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write network received metrics for interface %s: %v", interfaceName, err)
|
||||
}
|
||||
|
||||
// 写入累计发送字节数指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_tx_bytes", float64(networkMetrics.TxBytes), interfaceTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_tx_bytes", float64(networkMetrics.TxBytes), interfaceTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write network tx_bytes metrics for interface %s: %v", interfaceName, err)
|
||||
}
|
||||
|
||||
// 写入累计接收字节数指标
|
||||
if err := globalStorage.WriteMetric(c.Request.Context(), deviceID, "network_rx_bytes", float64(networkMetrics.RxBytes), interfaceTags); err != nil {
|
||||
if err := globalStorage.WriteMetric(writeCtx, deviceID, "network_rx_bytes", float64(networkMetrics.RxBytes), interfaceTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write network rx_bytes metrics for interface %s: %v", interfaceName, err)
|
||||
}
|
||||
@@ -248,6 +287,22 @@ func HandleMetricsPost(c *gin.Context) {
|
||||
totalRxBytes += networkMetrics.RxBytes
|
||||
}
|
||||
|
||||
// 写入进程信息
|
||||
for _, proc := range req.Processes {
|
||||
if err := globalStorage.WriteProcessMetric(writeCtx, deviceID, proc.Name, proc.Username, proc.PID, proc.CPU, proc.Memory, proc.Path, proc.Cmdline, proc.Ports, baseTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write process metrics for PID %d: %v", proc.PID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 写入磁盘详细信息
|
||||
for _, diskDetail := range req.DiskDetails {
|
||||
if err := globalStorage.WriteDiskDetailMetric(writeCtx, deviceID, diskDetail.DeviceID, diskDetail.Status, diskDetail.Type, diskDetail.SizeGB, diskDetail.Model, diskDetail.InterfaceType, diskDetail.Description, baseTags); err != nil {
|
||||
// 只记录警告,不影响后续指标处理
|
||||
log.Printf("Warning: Failed to write disk details for device %s: %v", diskDetail.DeviceID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 广播指标更新消息,只广播最后一个指标
|
||||
if i == len(metricsList)-1 {
|
||||
// 准备广播的磁盘使用率数据(兼容旧格式)
|
||||
@@ -412,7 +467,7 @@ func GetCPUMetrics(c *gin.Context) {
|
||||
}
|
||||
|
||||
// 处理数据,传递interval、startTime和endTime参数
|
||||
processedData := ProcessMetrics(points, aggregation, interval, startTime, endTime)
|
||||
processedData := ProcessMetricData(points, aggregation, interval, startTime, endTime)
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": processedData,
|
||||
@@ -440,7 +495,7 @@ func GetMemoryMetrics(c *gin.Context) {
|
||||
}
|
||||
|
||||
// 处理数据,传递interval、startTime和endTime参数
|
||||
processedData := ProcessMetrics(points, aggregation, interval, startTime, endTime)
|
||||
processedData := ProcessMetricData(points, aggregation, interval, startTime, endTime)
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": processedData,
|
||||
@@ -481,7 +536,7 @@ func GetDiskMetrics(c *gin.Context) {
|
||||
// 处理数据,为每个挂载点创建独立的数据集
|
||||
result := make(map[string][]MetricData)
|
||||
for mountpoint, mountpointPoints := range mountpointData {
|
||||
processedData := ProcessMetrics(mountpointPoints, aggregation, interval, startTime, endTime)
|
||||
processedData := ProcessMetricData(mountpointPoints, aggregation, interval, startTime, endTime)
|
||||
result[mountpoint] = processedData
|
||||
}
|
||||
|
||||
@@ -499,10 +554,14 @@ func GetNetworkMetrics(c *gin.Context) {
|
||||
aggregation := c.DefaultQuery("aggregation", "average")
|
||||
interval := c.DefaultQuery("interval", "10s") // 添加interval参数,默认10秒
|
||||
|
||||
// 查询发送和接收的网络指标
|
||||
// 查询发送和接收的网络速率指标
|
||||
sentPoints, err1 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_sent", startTime, endTime)
|
||||
receivedPoints, err2 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_received", startTime, endTime)
|
||||
|
||||
// 查询发送和接收的累积总流量指标
|
||||
txBytesPoints, err3 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_total_tx_bytes", startTime, endTime)
|
||||
rxBytesPoints, err4 := globalStorage.QueryMetrics(context.Background(), deviceID, "network_total_rx_bytes", startTime, endTime)
|
||||
|
||||
// 处理错误
|
||||
if err1 != nil {
|
||||
log.Printf("Warning: Failed to query network sent metrics: %v", err1)
|
||||
@@ -512,12 +571,24 @@ func GetNetworkMetrics(c *gin.Context) {
|
||||
log.Printf("Warning: Failed to query network received metrics: %v", err2)
|
||||
receivedPoints = []storage.MetricPoint{}
|
||||
}
|
||||
if err3 != nil {
|
||||
log.Printf("Warning: Failed to query network_total_tx_bytes metrics: %v", err3)
|
||||
txBytesPoints = []storage.MetricPoint{}
|
||||
}
|
||||
if err4 != nil {
|
||||
log.Printf("Warning: Failed to query network_total_rx_bytes metrics: %v", err4)
|
||||
rxBytesPoints = []storage.MetricPoint{}
|
||||
}
|
||||
|
||||
// 按网卡名称分组发送和接收的指标
|
||||
// 按网卡名称分组发送和接收的速率指标
|
||||
sentByInterface := make(map[string][]storage.MetricPoint)
|
||||
receivedByInterface := make(map[string][]storage.MetricPoint)
|
||||
|
||||
// 分组发送的网络指标
|
||||
// 按网卡名称分组发送和接收的累积总流量指标
|
||||
txBytesByInterface := make(map[string][]storage.MetricPoint)
|
||||
rxBytesByInterface := make(map[string][]storage.MetricPoint)
|
||||
|
||||
// 分组发送的网络速率指标
|
||||
for _, point := range sentPoints {
|
||||
// 获取网卡名称,默认使用"all"表示所有网卡
|
||||
interfaceName := point.Tags["interface"]
|
||||
@@ -527,7 +598,7 @@ func GetNetworkMetrics(c *gin.Context) {
|
||||
sentByInterface[interfaceName] = append(sentByInterface[interfaceName], point)
|
||||
}
|
||||
|
||||
// 分组接收的网络指标
|
||||
// 分组接收的网络速率指标
|
||||
for _, point := range receivedPoints {
|
||||
// 获取网卡名称,默认使用"all"表示所有网卡
|
||||
interfaceName := point.Tags["interface"]
|
||||
@@ -537,6 +608,26 @@ func GetNetworkMetrics(c *gin.Context) {
|
||||
receivedByInterface[interfaceName] = append(receivedByInterface[interfaceName], point)
|
||||
}
|
||||
|
||||
// 分组发送的累积总流量指标
|
||||
for _, point := range txBytesPoints {
|
||||
// 获取网卡名称,默认使用"all"表示所有网卡
|
||||
interfaceName := point.Tags["interface"]
|
||||
if interfaceName == "" {
|
||||
interfaceName = "all"
|
||||
}
|
||||
txBytesByInterface[interfaceName] = append(txBytesByInterface[interfaceName], point)
|
||||
}
|
||||
|
||||
// 分组接收的累积总流量指标
|
||||
for _, point := range rxBytesPoints {
|
||||
// 获取网卡名称,默认使用"all"表示所有网卡
|
||||
interfaceName := point.Tags["interface"]
|
||||
if interfaceName == "" {
|
||||
interfaceName = "all"
|
||||
}
|
||||
rxBytesByInterface[interfaceName] = append(rxBytesByInterface[interfaceName], point)
|
||||
}
|
||||
|
||||
// 处理数据,为每个网卡创建独立的数据集
|
||||
result := make(map[string]map[string][]MetricData)
|
||||
|
||||
@@ -548,21 +639,37 @@ func GetNetworkMetrics(c *gin.Context) {
|
||||
for iface := range receivedByInterface {
|
||||
allInterfaces[iface] = true
|
||||
}
|
||||
for iface := range txBytesByInterface {
|
||||
allInterfaces[iface] = true
|
||||
}
|
||||
for iface := range rxBytesByInterface {
|
||||
allInterfaces[iface] = true
|
||||
}
|
||||
|
||||
// 为每个网卡处理数据
|
||||
for iface := range allInterfaces {
|
||||
// 获取该网卡的发送和接收指标
|
||||
// 获取该网卡的速率指标
|
||||
ifaceSentPoints := sentByInterface[iface]
|
||||
ifaceReceivedPoints := receivedByInterface[iface]
|
||||
|
||||
// 处理数据
|
||||
processedSentData := ProcessMetrics(ifaceSentPoints, aggregation, interval, startTime, endTime)
|
||||
processedReceivedData := ProcessMetrics(ifaceReceivedPoints, aggregation, interval, startTime, endTime)
|
||||
// 获取该网卡的累积总流量指标
|
||||
ifaceTxBytesPoints := txBytesByInterface[iface]
|
||||
ifaceRxBytesPoints := rxBytesByInterface[iface]
|
||||
|
||||
// 处理速率数据
|
||||
processedSentData := ProcessMetricData(ifaceSentPoints, aggregation, interval, startTime, endTime)
|
||||
processedReceivedData := ProcessMetricData(ifaceReceivedPoints, aggregation, interval, startTime, endTime)
|
||||
|
||||
// 处理累积总流量数据
|
||||
processedTxBytesData := ProcessMetricData(ifaceTxBytesPoints, aggregation, interval, startTime, endTime)
|
||||
processedRxBytesData := ProcessMetricData(ifaceRxBytesPoints, aggregation, interval, startTime, endTime)
|
||||
|
||||
// 保存结果
|
||||
result[iface] = map[string][]MetricData{
|
||||
"sent": processedSentData,
|
||||
"received": processedReceivedData,
|
||||
"sent": processedSentData, // 发送速率数据
|
||||
"received": processedReceivedData, // 接收速率数据
|
||||
"tx_bytes": processedTxBytesData, // 发送累积总流量数据
|
||||
"rx_bytes": processedRxBytesData, // 接收累积总流量数据
|
||||
}
|
||||
}
|
||||
|
||||
@@ -806,3 +913,49 @@ func GetAllDevices(c *gin.Context) {
|
||||
"devices": devices,
|
||||
})
|
||||
}
|
||||
|
||||
// GetProcessMetrics 获取进程指标
|
||||
func GetProcessMetrics(c *gin.Context) {
|
||||
// 获取查询参数
|
||||
deviceID := c.Query("device_id") // 不使用默认值,空值表示查询所有设备
|
||||
startTime := c.DefaultQuery("start_time", "-1h")
|
||||
endTime := c.DefaultQuery("end_time", "now()")
|
||||
|
||||
// 查询数据
|
||||
processes, err := globalStorage.QueryProcessMetrics(context.Background(), deviceID, startTime, endTime)
|
||||
if err != nil {
|
||||
// 只记录警告,返回空数据
|
||||
log.Printf("Warning: Failed to query process metrics: %v", err)
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": []map[string]interface{}{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": processes,
|
||||
})
|
||||
}
|
||||
|
||||
// GetDiskDetails 获取磁盘详细信息
|
||||
func GetDiskDetails(c *gin.Context) {
|
||||
// 获取查询参数
|
||||
deviceID := c.Query("device_id") // 不使用默认值,空值表示查询所有设备
|
||||
startTime := c.DefaultQuery("start_time", "-1h")
|
||||
endTime := c.DefaultQuery("end_time", "now()")
|
||||
|
||||
// 查询数据
|
||||
diskDetails, err := globalStorage.QueryDiskDetails(context.Background(), deviceID, startTime, endTime)
|
||||
if err != nil {
|
||||
// 只记录警告,返回空数据
|
||||
log.Printf("Warning: Failed to query disk details: %v", err)
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": []map[string]interface{}{},
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"data": diskDetails,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -71,8 +71,8 @@ func FormatTimeByInterval(t time.Time, intervalSeconds int) string {
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessMetrics 处理监控数据,支持动态时间区间
|
||||
func ProcessMetrics(points []storage.MetricPoint, aggregation string, intervalStr string, startTime, endTime string) []MetricData {
|
||||
// ProcessMetricData 处理监控数据,支持动态时间区间
|
||||
func ProcessMetricData(points []storage.MetricPoint, aggregation string, intervalStr string, startTime, endTime string) []MetricData {
|
||||
// 解析时间区间
|
||||
intervalSeconds, err := ParseInterval(intervalStr)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,7 +2,9 @@ package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -10,6 +12,28 @@ import (
|
||||
"github.com/monitor/backend/config"
|
||||
)
|
||||
|
||||
// formatTags 将标签映射格式化为InfluxDB行协议格式
|
||||
func formatTags(tags map[string]string) string {
|
||||
var tagList []string
|
||||
for k, v := range tags {
|
||||
// 跳过空值的标签,避免InfluxDB解析错误
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
tagList = append(tagList, fmt.Sprintf("%s=%s", k, escapeTagValue(v)))
|
||||
}
|
||||
return strings.Join(tagList, ",")
|
||||
}
|
||||
|
||||
// escapeTagValue 转义标签值中的特殊字符
|
||||
func escapeTagValue(value string) string {
|
||||
// 替换逗号、空格和等号为转义后的形式
|
||||
escaped := strings.ReplaceAll(value, ",", "\\,")
|
||||
escaped = strings.ReplaceAll(escaped, " ", "\\ ")
|
||||
escaped = strings.ReplaceAll(escaped, "=", "\\=")
|
||||
return escaped
|
||||
}
|
||||
|
||||
// MetricPoint 自定义监控指标点
|
||||
type MetricPoint struct {
|
||||
Time time.Time `json:"time"`
|
||||
@@ -39,8 +63,10 @@ func NewStorage(cfg *config.Config) *Storage {
|
||||
client = influxdb2.NewClient(cfg.InfluxDB.URL, "")
|
||||
}
|
||||
|
||||
// 配置InfluxDB客户端选项
|
||||
options := client.Options()
|
||||
// 禁用InfluxDB客户端的调试日志
|
||||
client.Options().SetLogLevel(0)
|
||||
options.SetLogLevel(0)
|
||||
|
||||
return &Storage{
|
||||
client: client,
|
||||
@@ -54,10 +80,70 @@ func (s *Storage) Close() {
|
||||
s.client.Close()
|
||||
}
|
||||
|
||||
// WriteMetric 写入监控指标
|
||||
func (s *Storage) WriteMetric(ctx context.Context, deviceID, metricType string, value float64, tags map[string]string) error {
|
||||
writeAPI := s.client.WriteAPIBlocking(s.org, s.bucket)
|
||||
// 写入数据到InfluxDB,带重试机制
|
||||
func (s *Storage) writeData(ctx context.Context, measurement string, tags map[string]string, fields map[string]interface{}, deviceID, metricType string) error {
|
||||
// 重试配置 - 减少重试次数和延迟,确保在超时时间内完成
|
||||
maxRetries := 2
|
||||
baseDelay := 200 * time.Millisecond
|
||||
|
||||
for i := 0; i <= maxRetries; i++ {
|
||||
// 如果上下文已取消,直接返回
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
// 写入数据点
|
||||
writeAPI := s.client.WriteAPIBlocking(s.org, s.bucket)
|
||||
// 构建行协议字符串
|
||||
var fieldList []string
|
||||
for k, v := range fields {
|
||||
var fieldStr string
|
||||
// 根据字段类型格式化
|
||||
switch v := v.(type) {
|
||||
case string:
|
||||
fieldStr = fmt.Sprintf("%s=%q", k, v)
|
||||
case float64, int, int32, int64:
|
||||
fieldStr = fmt.Sprintf("%s=%v", k, v)
|
||||
case bool:
|
||||
fieldStr = fmt.Sprintf("%s=%t", k, v)
|
||||
default:
|
||||
// 转换为字符串
|
||||
fieldStr = fmt.Sprintf("%s=%q", k, fmt.Sprintf("%v", v))
|
||||
}
|
||||
fieldList = append(fieldList, fieldStr)
|
||||
}
|
||||
line := fmt.Sprintf("%s,%s %s %d", measurement, formatTags(tags), strings.Join(fieldList, ","), time.Now().UnixNano())
|
||||
err := writeAPI.WriteRecord(ctx, line)
|
||||
|
||||
if err == nil {
|
||||
// 写入成功,直接返回
|
||||
return nil
|
||||
}
|
||||
|
||||
// 如果是最后一次重试,返回错误
|
||||
if i == maxRetries {
|
||||
return err
|
||||
}
|
||||
|
||||
// 计算重试延迟(指数退避)
|
||||
delay := baseDelay*time.Duration(1<<i) + time.Duration(rand.Intn(50))*time.Millisecond
|
||||
log.Printf("Warning: InfluxDB write failed for device %s, metric %s, retrying in %v... (Attempt %d/%d)\nError: %v", deviceID, metricType, delay, i+1, maxRetries, err)
|
||||
|
||||
// 等待重试
|
||||
select {
|
||||
case <-time.After(delay):
|
||||
// 继续重试
|
||||
case <-ctx.Done():
|
||||
// 上下文取消,返回错误
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteMetric 写入监控指标,带重试机制
|
||||
func (s *Storage) WriteMetric(ctx context.Context, deviceID, metricType string, value float64, tags map[string]string) error {
|
||||
// 创建标签映射,合并原有标签和新标签
|
||||
allTags := make(map[string]string)
|
||||
// 复制原有标签
|
||||
@@ -69,18 +155,77 @@ func (s *Storage) WriteMetric(ctx context.Context, deviceID, metricType string,
|
||||
// 添加指标类型标签
|
||||
allTags["type"] = metricType
|
||||
|
||||
// 创建数据点
|
||||
point := influxdb2.NewPoint(
|
||||
"metrics",
|
||||
allTags,
|
||||
map[string]interface{}{
|
||||
"value": value,
|
||||
},
|
||||
time.Now(),
|
||||
)
|
||||
// 创建字段映射
|
||||
fields := map[string]interface{}{
|
||||
"value": value,
|
||||
}
|
||||
|
||||
// 写入数据点
|
||||
return writeAPI.WritePoint(ctx, point)
|
||||
// 使用新的writeData方法
|
||||
return s.writeData(ctx, "metrics", allTags, fields, deviceID, metricType)
|
||||
}
|
||||
|
||||
// WriteProcessMetric 写入进程指标
|
||||
func (s *Storage) WriteProcessMetric(ctx context.Context, deviceID string, processName, username string, pid int32, cpu, memory float64, path, cmdline string, ports []int, tags map[string]string) error {
|
||||
// 创建标签映射,合并原有标签和新标签
|
||||
allTags := make(map[string]string)
|
||||
// 复制原有标签
|
||||
for k, v := range tags {
|
||||
allTags[k] = v
|
||||
}
|
||||
// 添加设备ID标签
|
||||
allTags["device_id"] = deviceID
|
||||
// 添加进程相关标签
|
||||
allTags["process_name"] = processName
|
||||
allTags["username"] = username
|
||||
allTags["pid"] = fmt.Sprintf("%d", pid)
|
||||
|
||||
// 处理端口标签,只取前5个端口
|
||||
portsStr := make([]string, 0, len(ports))
|
||||
for i, port := range ports {
|
||||
if i >= 5 {
|
||||
break
|
||||
}
|
||||
portsStr = append(portsStr, fmt.Sprintf("%d", port))
|
||||
}
|
||||
allTags["ports"] = strings.Join(portsStr, ",")
|
||||
|
||||
// 创建字段映射
|
||||
fields := map[string]interface{}{
|
||||
"cpu_usage": cpu,
|
||||
"memory_usage": memory,
|
||||
"path": path,
|
||||
"cmdline": cmdline,
|
||||
}
|
||||
|
||||
// 使用新的writeData方法
|
||||
return s.writeData(ctx, "processes", allTags, fields, deviceID, "process")
|
||||
}
|
||||
|
||||
// WriteDiskDetailMetric 写入磁盘详细信息
|
||||
func (s *Storage) WriteDiskDetailMetric(ctx context.Context, deviceID, diskDeviceID, status, diskType string, sizeGB float64, model, interfaceType, description string, tags map[string]string) error {
|
||||
// 创建标签映射,合并原有标签和新标签
|
||||
allTags := make(map[string]string)
|
||||
// 复制原有标签
|
||||
for k, v := range tags {
|
||||
allTags[k] = v
|
||||
}
|
||||
// 添加设备ID标签
|
||||
allTags["device_id"] = deviceID
|
||||
// 添加磁盘相关标签
|
||||
allTags["disk_id"] = diskDeviceID
|
||||
allTags["status"] = status
|
||||
allTags["type"] = diskType
|
||||
allTags["model"] = model
|
||||
allTags["interface_type"] = interfaceType
|
||||
|
||||
// 创建字段映射
|
||||
fields := map[string]interface{}{
|
||||
"size_gb": sizeGB,
|
||||
"description": description,
|
||||
}
|
||||
|
||||
// 使用新的writeData方法
|
||||
return s.writeData(ctx, "disk_details", allTags, fields, deviceID, "disk_detail")
|
||||
}
|
||||
|
||||
// QueryMetrics 查询监控指标
|
||||
@@ -315,3 +460,132 @@ func (s *Storage) QueryDeviceStatus(ctx context.Context, deviceID string) (strin
|
||||
|
||||
return agentName, status, nil
|
||||
}
|
||||
|
||||
// QueryProcessMetrics 查询进程指标
|
||||
func (s *Storage) QueryProcessMetrics(ctx context.Context, deviceID string, startTime, endTime string) ([]map[string]interface{}, error) {
|
||||
queryAPI := s.client.QueryAPI(s.org)
|
||||
|
||||
// 构建查询语句
|
||||
query := `from(bucket: "` + s.bucket + `")
|
||||
|> range(start: ` + startTime + `, stop: ` + endTime + `)
|
||||
|> filter(fn: (r) => r["_measurement"] == "processes")`
|
||||
|
||||
// 如果指定了设备ID,添加设备ID过滤
|
||||
if deviceID != "" {
|
||||
query += `
|
||||
|> filter(fn: (r) => r["device_id"] == "` + deviceID + `")`
|
||||
}
|
||||
|
||||
// 获取最新的进程数据
|
||||
query += `
|
||||
|> last()`
|
||||
|
||||
// 执行查询
|
||||
queryResult, err := queryAPI.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer queryResult.Close()
|
||||
|
||||
// 存储进程数据
|
||||
processes := make([]map[string]interface{}, 0)
|
||||
|
||||
// 处理查询结果
|
||||
for queryResult.Next() {
|
||||
if queryResult.TableChanged() {
|
||||
// 表结构变化,跳过
|
||||
continue
|
||||
}
|
||||
|
||||
// 获取记录
|
||||
record := queryResult.Record()
|
||||
|
||||
// 构建进程数据
|
||||
processData := map[string]interface{}{
|
||||
"time": record.Time(),
|
||||
"device_id": record.ValueByKey("device_id"),
|
||||
"process_name": record.ValueByKey("process_name"),
|
||||
"username": record.ValueByKey("username"),
|
||||
"pid": record.ValueByKey("pid"),
|
||||
"cpu_usage": record.ValueByKey("cpu_usage"),
|
||||
"memory_usage": record.ValueByKey("memory_usage"),
|
||||
"path": record.ValueByKey("path"),
|
||||
"cmdline": record.ValueByKey("cmdline"),
|
||||
"ports": record.ValueByKey("ports"),
|
||||
"agent_name": record.ValueByKey("agent_name"),
|
||||
}
|
||||
|
||||
// 添加到进程列表
|
||||
processes = append(processes, processData)
|
||||
}
|
||||
|
||||
if queryResult.Err() != nil {
|
||||
return nil, queryResult.Err()
|
||||
}
|
||||
|
||||
return processes, nil
|
||||
}
|
||||
|
||||
// QueryDiskDetails 查询磁盘详细信息
|
||||
func (s *Storage) QueryDiskDetails(ctx context.Context, deviceID string, startTime, endTime string) ([]map[string]interface{}, error) {
|
||||
queryAPI := s.client.QueryAPI(s.org)
|
||||
|
||||
// 构建查询语句
|
||||
query := `from(bucket: "` + s.bucket + `")
|
||||
|> range(start: ` + startTime + `, stop: ` + endTime + `)
|
||||
|> filter(fn: (r) => r["_measurement"] == "disk_details")`
|
||||
|
||||
// 如果指定了设备ID,添加设备ID过滤
|
||||
if deviceID != "" {
|
||||
query += `
|
||||
|> filter(fn: (r) => r["device_id"] == "` + deviceID + `")`
|
||||
}
|
||||
|
||||
// 获取最新的磁盘详细信息
|
||||
query += `
|
||||
|> last()`
|
||||
|
||||
// 执行查询
|
||||
queryResult, err := queryAPI.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer queryResult.Close()
|
||||
|
||||
// 存储磁盘详细信息
|
||||
diskDetails := make([]map[string]interface{}, 0)
|
||||
|
||||
// 处理查询结果
|
||||
for queryResult.Next() {
|
||||
if queryResult.TableChanged() {
|
||||
// 表结构变化,跳过
|
||||
continue
|
||||
}
|
||||
|
||||
// 获取记录
|
||||
record := queryResult.Record()
|
||||
|
||||
// 构建磁盘详细信息
|
||||
diskData := map[string]interface{}{
|
||||
"time": record.Time(),
|
||||
"device_id": record.ValueByKey("device_id"),
|
||||
"disk_id": record.ValueByKey("disk_id"),
|
||||
"status": record.ValueByKey("status"),
|
||||
"type": record.ValueByKey("type"),
|
||||
"size_gb": record.ValueByKey("size_gb"),
|
||||
"model": record.ValueByKey("model"),
|
||||
"interface_type": record.ValueByKey("interface_type"),
|
||||
"description": record.ValueByKey("description"),
|
||||
"agent_name": record.ValueByKey("agent_name"),
|
||||
}
|
||||
|
||||
// 添加到磁盘详细信息列表
|
||||
diskDetails = append(diskDetails, diskData)
|
||||
}
|
||||
|
||||
if queryResult.Err() != nil {
|
||||
return nil, queryResult.Err()
|
||||
}
|
||||
|
||||
return diskDetails, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user