This commit is contained in:
zxr
2026-04-27 19:26:57 +08:00
parent 01c807b953
commit 694893eea3
26 changed files with 1901 additions and 15 deletions

View File

@@ -21,6 +21,13 @@ type IngestConf struct {
RuleRefreshSecs int `yaml:"rule_refresh_secs"`
}
type ResourceEventConf struct {
// HMACSecret 用于校验 dc-control 推送签名X-Event-Signature
HMACSecret string `yaml:"hmac_secret"`
// MaxSkewSecs 允许事件时间与服务端时间的最大偏差(秒)。
MaxSkewSecs int `yaml:"max_skew_secs"`
}
type SrvConfig struct {
conf.Base `yaml:",inline"`
Databases *conf.DBConf `yaml:"Databases"`
@@ -31,6 +38,7 @@ type SrvConfig struct {
Etcd *conf.EtcdConf `yaml:"Etcd"`
AlertForward *AlertForwardConf `yaml:"AlertForward"`
Ingest IngestConf `yaml:"Ingest"`
ResourceEvent ResourceEventConf `yaml:"ResourceEvent"`
}
func New(srvKey string) {

View File

@@ -25,7 +25,7 @@ func NewImpl() {
if err := DBService.AutoMigrate(models.GetAllModels()...); err != nil {
panic(fmt.Sprintf("logs migrate: %v", err))
}
if err := models.InitData(); err != nil {
if err := models.InitData(DBService); err != nil {
panic(fmt.Sprintf("logs init data: %v", err))
}
}

View File

@@ -0,0 +1,125 @@
package ingest
import (
"encoding/json"
"strings"
"time"
"git.apinb.com/ops/logs/internal/impl"
"git.apinb.com/ops/logs/internal/models"
)
const (
outboxStatusPending = "pending"
outboxStatusRetrying = "retrying"
outboxStatusSent = "sent"
outboxStatusDead = "dead"
)
func enqueueAlert(logEventID uint, body AlertReceiveBody) error {
payload, err := json.Marshal(body)
if err != nil {
return err
}
row := models.AlertOutbox{
LogEventID: logEventID,
PayloadJSON: string(payload),
Status: outboxStatusPending,
RetryCount: 0,
NextRetryAt: time.Now(),
LastError: "",
}
return impl.DBService.Create(&row).Error
}
func StartAlertDispatcher() {
go func() {
ticker := time.NewTicker(2 * time.Second)
defer ticker.Stop()
for range ticker.C {
processAlertOutboxBatch(20)
}
}()
}
func processAlertOutboxBatch(limit int) {
if limit <= 0 {
limit = 20
}
var rows []models.AlertOutbox
now := time.Now()
err := impl.DBService.
Where("status IN ? AND next_retry_at <= ?", []string{outboxStatusPending, outboxStatusRetrying}, now).
Order("id asc").
Limit(limit).
Find(&rows).Error
if err != nil || len(rows) == 0 {
return
}
for _, row := range rows {
processOneOutbox(row)
}
}
func processOneOutbox(row models.AlertOutbox) {
var body AlertReceiveBody
if err := json.Unmarshal([]byte(row.PayloadJSON), &body); err != nil {
markOutboxDead(row.ID, row.RetryCount, "invalid_payload: "+err.Error())
return
}
if err := forwardAlert(body); err != nil {
markOutboxRetry(row, err.Error())
return
}
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", row.ID).Updates(map[string]interface{}{
"status": outboxStatusSent,
"last_error": "",
"next_retry_at": time.Now(),
}).Error
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Updates(map[string]interface{}{
"alert_sent": true,
"dispatch_status": "sent",
}).Error
}
func markOutboxRetry(row models.AlertOutbox, msg string) {
retry := row.RetryCount + 1
const maxRetry = 5
if retry > maxRetry {
markOutboxDead(row.ID, retry, msg)
return
}
backoff := time.Duration(retry*retry) * time.Second
if backoff > 60*time.Second {
backoff = 60 * time.Second
}
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", row.ID).Updates(map[string]interface{}{
"status": outboxStatusRetrying,
"retry_count": retry,
"next_retry_at": time.Now().Add(backoff),
"last_error": truncateError(msg, 1024),
}).Error
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Update("dispatch_status", "retrying").Error
}
func markOutboxDead(id uint, retry int, msg string) {
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", id).Updates(map[string]interface{}{
"status": outboxStatusDead,
"retry_count": retry,
"next_retry_at": time.Now(),
"last_error": truncateError(msg, 1024),
}).Error
var row models.AlertOutbox
if err := impl.DBService.Select("log_event_id").First(&row, id).Error; err == nil && row.LogEventID > 0 {
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Update("dispatch_status", "dead").Error
}
}
func truncateError(s string, n int) string {
s = strings.TrimSpace(s)
if len(s) <= n {
return s
}
return s[:n]
}

View File

@@ -0,0 +1,11 @@
package ingest
import "testing"
func TestTruncateError(t *testing.T) {
got := truncateError(" abcdef ", 3)
if got != "abc" {
t.Fatalf("unexpected value: %q", got)
}
}

View File

@@ -24,6 +24,27 @@ type Engine struct {
syslogRules []models.SyslogRule
trapRules []models.TrapRule
shields []models.TrapShield
resourceByIP map[string]resourceRef
resourceByHN map[string]resourceRef
}
type resourceRef struct {
ResourceType string
ResourceID string
ResourceName string
}
func resourceTypePriority(resourceType string) int {
switch strings.ToLower(strings.TrimSpace(resourceType)) {
case "server":
return 3
case "collector":
return 2
case "device":
return 1
default:
return 0
}
}
var Global = &Engine{}
@@ -33,6 +54,7 @@ func (e *Engine) Refresh() error {
var syslog []models.SyslogRule
var trap []models.TrapRule
var shield []models.TrapShield
var mappings []models.ResourceMapping
if err := impl.DBService.Where("enabled = ?", true).Find(&dict).Error; err != nil {
return err
@@ -54,12 +76,51 @@ func (e *Engine) Refresh() error {
if err := impl.DBService.Where("enabled = ?", true).Find(&shield).Error; err != nil {
return err
}
if err := impl.DBService.Where("is_deleted = ?", false).Order("updated_at desc, id desc").Find(&mappings).Error; err != nil {
return err
}
ipMap := make(map[string]resourceRef)
hnMap := make(map[string]resourceRef)
for _, m := range mappings {
ref := resourceRef{
ResourceType: m.ResourceType,
ResourceID: m.ResourceID,
ResourceName: m.ResourceName,
}
var ips []string
if err := json.Unmarshal([]byte(m.IPsJSON), &ips); err == nil {
for _, ip := range ips {
key := strings.TrimSpace(ip)
if key == "" {
continue
}
if cur, exists := ipMap[key]; !exists || resourceTypePriority(ref.ResourceType) > resourceTypePriority(cur.ResourceType) {
ipMap[key] = ref
}
}
}
var hostnames []string
if err := json.Unmarshal([]byte(m.HostnamesJSON), &hostnames); err == nil {
for _, hn := range hostnames {
key := strings.ToLower(strings.TrimSpace(hn))
if key == "" {
continue
}
if cur, exists := hnMap[key]; !exists || resourceTypePriority(ref.ResourceType) > resourceTypePriority(cur.ResourceType) {
hnMap[key] = ref
}
}
}
}
e.mu.Lock()
e.trapDict = dict
e.syslogRules = syslog
e.trapRules = trap
e.shields = shield
e.resourceByIP = ipMap
e.resourceByHN = hnMap
e.mu.Unlock()
return nil
}
@@ -99,14 +160,21 @@ func (e *Engine) HandleSyslog(addr *net.UDPAddr, payload []byte) {
detailBytes, _ := json.Marshal(detailObj)
summary := formatSyslogSummary(parsed)
sev := syslogPriorityToSeverity(parsed.Priority)
ref, method := e.resolveResource(addr.IP.String(), device)
ev := models.LogEvent{
SourceKind: "syslog",
RemoteAddr: addr.String(),
SourceIP: addr.IP.String(),
RawPayload: string(payload),
NormalizedSummary: summary,
NormalizedDetail: string(detailBytes),
DeviceName: device,
ResourceType: ref.ResourceType,
ResourceID: ref.ResourceID,
ResourceName: ref.ResourceName,
MatchMethod: method,
DispatchStatus: "not_applicable",
SeverityCode: sev,
}
@@ -166,8 +234,8 @@ func (e *Engine) HandleSyslog(addr *net.UDPAddr, payload []byte) {
PolicyID: matched.PolicyID,
RawData: rawBytes,
}
if err := forwardAlert(body); err == nil {
_ = impl.DBService.Model(&ev).Update("alert_sent", true).Error
if err := enqueueAlert(ev.ID, body); err == nil {
_ = impl.DBService.Model(&ev).Update("dispatch_status", "pending").Error
}
}
@@ -204,10 +272,7 @@ func trapShielded(e *Engine, addr *net.UDPAddr, trapOID string, pkt *gosnmp.Snmp
if !s.Enabled {
continue
}
if strings.TrimSpace(s.SourceIPCIDR) == "" {
continue
}
if !ipMatchesCIDR(ip, s.SourceIPCIDR) {
if cidr := strings.TrimSpace(s.SourceIPCIDR); cidr != "" && !ipMatchesCIDR(ip, cidr) {
continue
}
if p := strings.TrimSpace(s.OIDPrefix); p != "" && !strings.HasPrefix(normOID(trapOID), normOID(p)) {
@@ -265,14 +330,21 @@ func (e *Engine) HandleTrap(addr *net.UDPAddr, pkt *gosnmp.SnmpPacket) {
}
}
detailBytes, _ := json.Marshal(detailObj)
ref, method := e.resolveResource(addr.IP.String(), addr.IP.String())
ev := models.LogEvent{
SourceKind: "snmp_trap",
RemoteAddr: addr.String(),
SourceIP: addr.IP.String(),
RawPayload: fp,
NormalizedSummary: readable,
NormalizedDetail: string(detailBytes),
DeviceName: addr.IP.String(),
ResourceType: ref.ResourceType,
ResourceID: ref.ResourceID,
ResourceName: ref.ResourceName,
MatchMethod: method,
DispatchStatus: "not_applicable",
SeverityCode: sev,
TrapOID: trapOID,
}
@@ -360,8 +432,8 @@ func (e *Engine) HandleTrap(addr *net.UDPAddr, pkt *gosnmp.SnmpPacket) {
PolicyID: matched.PolicyID,
RawData: rawBytes,
}
if err := forwardAlert(body); err == nil {
_ = impl.DBService.Model(&ev).Update("alert_sent", true).Error
if err := enqueueAlert(ev.ID, body); err == nil {
_ = impl.DBService.Model(&ev).Update("dispatch_status", "pending").Error
}
}
@@ -440,3 +512,18 @@ func firstNonEmpty(a, b string) string {
}
return b
}
func (e *Engine) resolveResource(sourceIP, hostname string) (resourceRef, string) {
e.mu.RLock()
ipMap := e.resourceByIP
hnMap := e.resourceByHN
e.mu.RUnlock()
if ref, ok := ipMap[strings.TrimSpace(sourceIP)]; ok {
return ref, "ip"
}
if ref, ok := hnMap[strings.ToLower(strings.TrimSpace(hostname))]; ok {
return ref, "hostname"
}
return resourceRef{}, "none"
}

View File

@@ -0,0 +1,49 @@
package ingest
import "testing"
func TestResolveResourceByIPFirst(t *testing.T) {
e := &Engine{
resourceByIP: map[string]resourceRef{
"10.0.0.10": {ResourceType: "server", ResourceID: "srv-10", ResourceName: "s10"},
},
resourceByHN: map[string]resourceRef{
"host-a": {ResourceType: "device", ResourceID: "dev-a", ResourceName: "a"},
},
}
ref, method := e.resolveResource("10.0.0.10", "host-a")
if method != "ip" {
t.Fatalf("method=%s", method)
}
if ref.ResourceID != "srv-10" {
t.Fatalf("resource id=%s", ref.ResourceID)
}
}
func TestResolveResourceByHostname(t *testing.T) {
e := &Engine{
resourceByIP: map[string]resourceRef{},
resourceByHN: map[string]resourceRef{
"host-a": {ResourceType: "device", ResourceID: "dev-a", ResourceName: "a"},
},
}
ref, method := e.resolveResource("10.0.0.20", "HOST-A")
if method != "hostname" {
t.Fatalf("method=%s", method)
}
if ref.ResourceID != "dev-a" {
t.Fatalf("resource id=%s", ref.ResourceID)
}
}
func TestResolveResourceNoMatch(t *testing.T) {
e := &Engine{
resourceByIP: map[string]resourceRef{},
resourceByHN: map[string]resourceRef{},
}
_, method := e.resolveResource("10.0.0.20", "host-b")
if method != "none" {
t.Fatalf("method=%s", method)
}
}

View File

@@ -40,7 +40,7 @@ func inTimeWindows(now time.Time, jsonStr string) bool {
}
var windows []timeWindow
if err := json.Unmarshal([]byte(s), &windows); err != nil || len(windows) == 0 {
return true
return false
}
tod := now.Hour()*60 + now.Minute()
wd := int(now.Weekday())

View File

@@ -46,8 +46,20 @@ func parseSyslogPayload(payload []byte) ParsedSyslog {
tokens := strings.SplitN(rest, " ", 3)
if len(tokens) >= 2 {
if len(tokens) >= 3 && isMonthAbbr(tokens[0]) {
p.Hostname = tokens[2]
if idx := strings.Index(rest, ": "); idx > 0 {
parts := strings.Fields(rest)
if len(parts) >= 4 && isDayOfMonth(parts[1]) && isHHMMSS(parts[2]) {
p.Hostname = parts[3]
if len(parts) > 4 {
tagMsg := strings.Join(parts[4:], " ")
if idx := strings.Index(tagMsg, ": "); idx > 0 {
p.Tag = tagMsg[:idx]
p.Message = strings.TrimSpace(tagMsg[idx+2:])
} else {
p.Message = tagMsg
}
}
} else if idx := strings.Index(rest, ": "); idx > 0 {
// 兼容无法严格按 RFC3164 切分的历史格式。
p.Message = strings.TrimSpace(rest[idx+2:])
}
} else {
@@ -66,6 +78,28 @@ func parseSyslogPayload(payload []byte) ParsedSyslog {
return p
}
func isDayOfMonth(s string) bool {
n, err := strconv.Atoi(s)
if err != nil {
return false
}
return n >= 1 && n <= 31
}
func isHHMMSS(s string) bool {
parts := strings.Split(s, ":")
if len(parts) != 3 {
return false
}
h, err1 := strconv.Atoi(parts[0])
m, err2 := strconv.Atoi(parts[1])
sec, err3 := strconv.Atoi(parts[2])
if err1 != nil || err2 != nil || err3 != nil {
return false
}
return h >= 0 && h <= 23 && m >= 0 && m <= 59 && sec >= 0 && sec <= 59
}
func isMonthAbbr(s string) bool {
if len(s) < 3 {
return false

View File

@@ -2,7 +2,12 @@ package ingest
import (
"encoding/json"
"net"
"testing"
"time"
"git.apinb.com/ops/logs/internal/models"
"github.com/gosnmp/gosnmp"
)
func TestParseSyslogPayloadPri(t *testing.T) {
@@ -12,6 +17,19 @@ func TestParseSyslogPayloadPri(t *testing.T) {
}
}
func TestParseSyslogPayloadRFC3164Hostname(t *testing.T) {
p := parseSyslogPayload([]byte("Oct 11 22:14:15 mymachine su: failed"))
if p.Hostname != "mymachine" {
t.Fatalf("hostname=%q", p.Hostname)
}
if p.Tag != "su" {
t.Fatalf("tag=%q", p.Tag)
}
if p.Message != "failed" {
t.Fatalf("message=%q", p.Message)
}
}
func TestForwardAlertBodyIncludesRawData(t *testing.T) {
raw := []byte(`{"source":"syslog","parsed":{}}`)
b := AlertReceiveBody{
@@ -30,3 +48,29 @@ func TestForwardAlertBodyIncludesRawData(t *testing.T) {
t.Fatalf("raw_data %s", dec["raw_data"])
}
}
func TestInTimeWindowsInvalidJSONReturnsFalse(t *testing.T) {
now := time.Date(2026, 1, 1, 10, 0, 0, 0, time.Local)
if inTimeWindows(now, "{invalid") {
t.Fatal("invalid json should not be treated as always effective")
}
}
func TestTrapShieldedAllowsEmptySourceIPCIDR(t *testing.T) {
e := &Engine{
shields: []models.TrapShield{
{
Enabled: true,
SourceIPCIDR: "",
OIDPrefix: "1.3.6.1.4.1",
InterfaceHint: "",
TimeWindowsJSON: "",
},
},
}
addr := &net.UDPAddr{IP: net.ParseIP("10.0.0.1"), Port: 162}
pkt := &gosnmp.SnmpPacket{}
if !trapShielded(e, addr, "1.3.6.1.4.1.999", pkt) {
t.Fatal("shield should match when source_ip_cidr is empty and other conditions match")
}
}

View File

@@ -273,6 +273,10 @@ func DeleteTrapShield(ctx *gin.Context) {
func ListLogEvents(ctx *gin.Context) {
kind := ctx.Query("source_kind")
resourceType := ctx.Query("resource_type")
resourceID := ctx.Query("resource_id")
dispatchStatus := ctx.Query("dispatch_status")
logEventID, _ := strconv.ParseUint(ctx.DefaultQuery("log_event_id", "0"), 10, 64)
page, _ := strconv.Atoi(ctx.DefaultQuery("page", "1"))
size, _ := strconv.Atoi(ctx.DefaultQuery("page_size", "50"))
if page < 1 {
@@ -286,6 +290,18 @@ func ListLogEvents(ctx *gin.Context) {
if kind != "" {
q = q.Where("source_kind = ?", kind)
}
if resourceType != "" {
q = q.Where("resource_type = ?", resourceType)
}
if resourceID != "" {
q = q.Where("resource_id = ?", resourceID)
}
if dispatchStatus != "" {
q = q.Where("dispatch_status = ?", dispatchStatus)
}
if logEventID > 0 {
q = q.Where("id = ?", uint(logEventID))
}
var total int64
_ = q.Count(&total).Error
var rows []models.LogEvent

View File

@@ -0,0 +1,73 @@
package controllers
import (
"errors"
"strconv"
"strings"
"time"
"git.apinb.com/bsm-sdk/core/infra"
"git.apinb.com/ops/logs/internal/impl"
"git.apinb.com/ops/logs/internal/models"
"github.com/gin-gonic/gin"
)
func ListAlertOutbox(ctx *gin.Context) {
status := strings.TrimSpace(ctx.Query("status"))
page, _ := strconv.Atoi(ctx.DefaultQuery("page", "1"))
size, _ := strconv.Atoi(ctx.DefaultQuery("page_size", "50"))
if page < 1 {
page = 1
}
if size < 1 || size > 500 {
size = 50
}
offset := (page - 1) * size
q := impl.DBService.Model(&models.AlertOutbox{})
if status != "" {
q = q.Where("status = ?", status)
}
var total int64
_ = q.Count(&total).Error
var rows []models.AlertOutbox
if err := q.Order("id desc").Offset(offset).Limit(size).Find(&rows).Error; err != nil {
infra.Response.Error(ctx, err)
return
}
infra.Response.Success(ctx, gin.H{
"total": total,
"page": page,
"page_size": size,
"items": rows,
})
}
func RetryAlertOutbox(ctx *gin.Context) {
id, err := parseID(ctx)
if err != nil {
infra.Response.Error(ctx, errors.New("invalid id"))
return
}
var row models.AlertOutbox
if err := impl.DBService.First(&row, id).Error; err != nil {
infra.Response.Error(ctx, err)
return
}
// 手工重试时,无论失败原因如何都重置为 pending 并立即可被 worker 消费。
if err := impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", id).Updates(map[string]interface{}{
"status": "pending",
"next_retry_at": time.Now(),
"last_error": "",
}).Error; err != nil {
infra.Response.Error(ctx, err)
return
}
infra.Response.Success(ctx, gin.H{
"id": id,
"status": "pending",
})
}

View File

@@ -0,0 +1,228 @@
package controllers
import (
"crypto/hmac"
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"git.apinb.com/bsm-sdk/core/infra"
"git.apinb.com/ops/logs/internal/config"
"git.apinb.com/ops/logs/internal/impl"
"git.apinb.com/ops/logs/internal/models"
"github.com/gin-gonic/gin"
"gorm.io/gorm"
)
const (
resourceEventUpsert = "resource.upsert"
resourceEventDelete = "resource.delete"
)
type resourceEventRequest struct {
EventID string `json:"event_id"`
EventTime string `json:"event_time"`
EventType string `json:"event_type"`
ResourceType string `json:"resource_type"`
ResourceID string `json:"resource_id"`
ResourceName string `json:"resource_name"`
IPs []string `json:"ips"`
Hostnames []string `json:"hostnames"`
Labels map[string]string `json:"labels"`
Version int64 `json:"version"`
}
// ReceiveResourceEvent 接收 dc-control 推送的资源变更事件并落库。
func ReceiveResourceEvent(ctx *gin.Context) {
raw, err := ctx.GetRawData()
if err != nil {
infra.Response.Error(ctx, err)
return
}
if err := verifyResourceEventSignature(ctx.GetHeader("X-Event-Signature"), raw); err != nil {
infra.Response.Error(ctx, err)
return
}
var req resourceEventRequest
if err := json.Unmarshal(raw, &req); err != nil {
infra.Response.Error(ctx, err)
return
}
eventTime, err := validateResourceEventRequest(&req)
if err != nil {
infra.Response.Error(ctx, err)
return
}
if err := validateEventTimeSkew(eventTime); err != nil {
infra.Response.Error(ctx, err)
return
}
if ok, err := tryInsertResourceEventDedup(req.EventID, eventTime, req.ResourceType, req.ResourceID); err != nil {
infra.Response.Error(ctx, err)
return
} else if !ok {
infra.Response.Success(ctx, gin.H{
"ignored": true,
"reason": "duplicate_event_id",
"event_id": req.EventID,
})
return
}
var row models.ResourceMapping
err = impl.DBService.Where("resource_type = ? AND resource_id = ?", req.ResourceType, req.ResourceID).First(&row).Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
infra.Response.Error(ctx, err)
return
}
// 已存在记录且版本回退时忽略该事件,避免乱序覆盖。
if err == nil && row.Version > req.Version {
infra.Response.Success(ctx, gin.H{
"ignored": true,
"reason": "stale_version",
"current": row.Version,
"incoming": req.Version,
})
return
}
ipsJSON, _ := json.Marshal(nonEmptyUnique(req.IPs))
hostnamesJSON, _ := json.Marshal(nonEmptyUnique(req.Hostnames))
labelsJSON, _ := json.Marshal(req.Labels)
row.ResourceType = req.ResourceType
row.ResourceID = req.ResourceID
row.ResourceName = req.ResourceName
row.IPsJSON = string(ipsJSON)
row.HostnamesJSON = string(hostnamesJSON)
row.LabelsJSON = string(labelsJSON)
row.Version = req.Version
row.LastEventID = req.EventID
row.EventTime = eventTime
row.IsDeleted = req.EventType == resourceEventDelete
if err := impl.DBService.Save(&row).Error; err != nil {
infra.Response.Error(ctx, err)
return
}
infra.Response.Success(ctx, gin.H{
"resource_type": row.ResourceType,
"resource_id": row.ResourceID,
"version": row.Version,
"is_deleted": row.IsDeleted,
})
}
func validateResourceEventRequest(req *resourceEventRequest) (time.Time, error) {
req.EventID = strings.TrimSpace(req.EventID)
req.EventType = strings.TrimSpace(req.EventType)
req.ResourceType = strings.TrimSpace(req.ResourceType)
req.ResourceID = strings.TrimSpace(req.ResourceID)
req.ResourceName = strings.TrimSpace(req.ResourceName)
req.EventTime = strings.TrimSpace(req.EventTime)
if req.EventID == "" {
return time.Time{}, errors.New("event_id is required")
}
if req.EventType != resourceEventUpsert && req.EventType != resourceEventDelete {
return time.Time{}, errors.New("event_type must be resource.upsert or resource.delete")
}
if req.ResourceType == "" {
return time.Time{}, errors.New("resource_type is required")
}
if req.ResourceID == "" {
return time.Time{}, errors.New("resource_id is required")
}
if req.Version <= 0 {
return time.Time{}, errors.New("version must be positive")
}
if req.EventTime == "" {
return time.Time{}, errors.New("event_time is required")
}
tm, err := time.Parse(time.RFC3339, req.EventTime)
if err != nil {
return time.Time{}, errors.New("event_time must be RFC3339")
}
return tm, nil
}
func nonEmptyUnique(items []string) []string {
if len(items) == 0 {
return nil
}
seen := make(map[string]struct{}, len(items))
out := make([]string, 0, len(items))
for _, item := range items {
v := strings.TrimSpace(item)
if v == "" {
continue
}
if _, ok := seen[v]; ok {
continue
}
seen[v] = struct{}{}
out = append(out, v)
}
return out
}
func verifyResourceEventSignature(signature string, body []byte) error {
signature = strings.TrimSpace(signature)
signature = strings.TrimPrefix(strings.ToLower(signature), "sha256=")
secret := strings.TrimSpace(config.Spec.ResourceEvent.HMACSecret)
if secret == "" {
return errors.New("resource_event hmac_secret is not configured")
}
if signature == "" {
return errors.New("missing X-Event-Signature")
}
mac := hmac.New(sha256.New, []byte(secret))
mac.Write(body)
expected := fmt.Sprintf("%x", mac.Sum(nil))
if !hmac.Equal([]byte(strings.ToLower(signature)), []byte(expected)) {
return errors.New("invalid X-Event-Signature")
}
return nil
}
func validateEventTimeSkew(eventTime time.Time) error {
maxSkew := config.Spec.ResourceEvent.MaxSkewSecs
if maxSkew <= 0 {
maxSkew = 300
}
diff := time.Since(eventTime)
if diff < 0 {
diff = -diff
}
if diff > time.Duration(maxSkew)*time.Second {
return errors.New("event_time out of allowed skew window")
}
return nil
}
func tryInsertResourceEventDedup(eventID string, eventTime time.Time, resourceType, resourceID string) (bool, error) {
// 先查询再插入,避免依赖数据库唯一索引存在与否。
var existed models.ResourceEventDedup
if err := impl.DBService.Where("event_id = ?", eventID).First(&existed).Error; err == nil {
return false, nil
}
row := models.ResourceEventDedup{
EventID: eventID,
EventTime: eventTime,
ResourceType: resourceType,
ResourceID: resourceID,
}
if err := impl.DBService.Create(&row).Error; err != nil {
if strings.Contains(strings.ToLower(err.Error()), "duplicate") || strings.Contains(strings.ToLower(err.Error()), "unique") {
return false, nil
}
return false, err
}
return true, nil
}

View File

@@ -0,0 +1,85 @@
package controllers
import (
"crypto/hmac"
"crypto/sha256"
"fmt"
"testing"
"time"
"git.apinb.com/ops/logs/internal/config"
)
func TestValidateResourceEventRequest(t *testing.T) {
req := &resourceEventRequest{
EventID: "evt-1",
EventTime: "2026-04-27T08:00:00Z",
EventType: resourceEventUpsert,
ResourceType: "server",
ResourceID: "srv-1",
ResourceName: "server-1",
Version: 1,
}
if _, err := validateResourceEventRequest(req); err != nil {
t.Fatalf("expected valid request, got error: %v", err)
}
}
func TestValidateResourceEventRequestInvalidTime(t *testing.T) {
req := &resourceEventRequest{
EventID: "evt-1",
EventTime: "bad-time",
EventType: resourceEventUpsert,
ResourceType: "server",
ResourceID: "srv-1",
Version: 1,
}
if _, err := validateResourceEventRequest(req); err == nil {
t.Fatal("expected invalid time error")
}
}
func TestNonEmptyUnique(t *testing.T) {
got := nonEmptyUnique([]string{" 10.0.0.1 ", "", "10.0.0.1", "host-a", "host-a"})
if len(got) != 2 {
t.Fatalf("unexpected unique size: %d", len(got))
}
if got[0] != "10.0.0.1" || got[1] != "host-a" {
t.Fatalf("unexpected output: %#v", got)
}
}
func TestVerifyResourceEventSignature(t *testing.T) {
old := config.Spec.ResourceEvent.HMACSecret
config.Spec.ResourceEvent.HMACSecret = "abc123"
defer func() {
config.Spec.ResourceEvent.HMACSecret = old
}()
body := []byte(`{"event_id":"evt-1"}`)
mac := hmac.New(sha256.New, []byte("abc123"))
mac.Write(body)
signature := fmt.Sprintf("%x", mac.Sum(nil))
if err := verifyResourceEventSignature(signature, body); err != nil {
t.Fatalf("expected signature to pass: %v", err)
}
if err := verifyResourceEventSignature("bad", body); err == nil {
t.Fatal("expected invalid signature error")
}
}
func TestValidateEventTimeSkew(t *testing.T) {
old := config.Spec.ResourceEvent.MaxSkewSecs
config.Spec.ResourceEvent.MaxSkewSecs = 60
defer func() {
config.Spec.ResourceEvent.MaxSkewSecs = old
}()
if err := validateEventTimeSkew(time.Now()); err != nil {
t.Fatalf("expected current time to pass: %v", err)
}
if err := validateEventTimeSkew(time.Now().Add(-2 * time.Minute)); err == nil {
t.Fatal("expected skew validation to fail for old timestamp")
}
}

View File

@@ -0,0 +1,29 @@
package models
import "time"
// AlertOutbox 表示待发送或重试中的告警任务。
type AlertOutbox struct {
ID uint `gorm:"primaryKey" json:"id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
// LogEventID 关联日志事件 ID。
LogEventID uint `gorm:"index" json:"log_event_id"`
// PayloadJSON 保存 AlertReceiveBody 的 JSON 文本。
PayloadJSON string `gorm:"type:text" json:"payload_json"`
// Status 任务状态pending/retrying/sent/dead。
Status string `gorm:"size:32;index" json:"status"`
// RetryCount 已重试次数。
RetryCount int `json:"retry_count"`
// NextRetryAt 下一次可重试时间。
NextRetryAt time.Time `gorm:"index" json:"next_retry_at"`
// LastError 最近一次错误信息。
LastError string `gorm:"type:text" json:"last_error"`
}
func (AlertOutbox) TableName() string {
return "logs_alert_outbox"
}

View File

@@ -20,6 +20,18 @@ type LogEvent struct {
NormalizedDetail string `gorm:"type:text" json:"normalized_detail"`
// DeviceName 表示关联设备名称。
DeviceName string `gorm:"size:512;index" json:"device_name"`
// SourceIP 表示原始来源 IP不含端口
SourceIP string `gorm:"size:64;index" json:"source_ip"`
// ResourceType 表示关联到的资源类型。
ResourceType string `gorm:"size:32;index" json:"resource_type"`
// ResourceID 表示关联到的资源 ID。
ResourceID string `gorm:"size:128;index" json:"resource_id"`
// ResourceName 表示关联到的资源名称。
ResourceName string `gorm:"size:256" json:"resource_name"`
// MatchMethod 表示资源命中方式ip/hostname/none
MatchMethod string `gorm:"size:32" json:"match_method"`
// DispatchStatus 表示告警分发状态not_applicable/pending/retrying/sent/dead
DispatchStatus string `gorm:"size:32;index" json:"dispatch_status"`
// SeverityCode 表示告警/严重度编码。
SeverityCode string `gorm:"size:32" json:"severity_code"`
// TrapOID 表示关联的 Trap OID若来源为 trap

View File

@@ -1,9 +1,14 @@
package models
import "gorm.io/gorm"
// GetAllModels 数据库迁移用模型列表
func GetAllModels() []interface{} {
return []interface{}{
&LogEvent{},
&AlertOutbox{},
&ResourceMapping{},
&ResourceEventDedup{},
&TrapDictionaryEntry{},
&SyslogRule{},
&TrapRule{},
@@ -11,7 +16,104 @@ func GetAllModels() []interface{} {
}
}
// InitData 预留默认数据
func InitData() error {
// InitData 初始化默认规则数据(幂等)
func InitData(db *gorm.DB) error {
if db == nil {
return nil
}
if err := seedDefaultSyslogRules(db); err != nil {
return err
}
if err := seedDefaultTrapRules(db); err != nil {
return err
}
if err := seedDefaultTrapDictionary(db); err != nil {
return err
}
return nil
}
func seedDefaultSyslogRules(db *gorm.DB) error {
var cnt int64
if err := db.Model(&SyslogRule{}).Count(&cnt).Error; err != nil {
return err
}
if cnt > 0 {
return nil
}
rows := []SyslogRule{
{
Name: "默认-系统严重错误",
Enabled: true,
Priority: 100,
DeviceNameContains: "",
KeywordRegex: "(?i)(panic|fatal|segmentation fault|kernel panic|out of memory|oom)",
AlertName: "Syslog严重错误",
SeverityCode: "critical",
PolicyID: 0,
},
{
Name: "默认-链路中断告警",
Enabled: true,
Priority: 90,
DeviceNameContains: "",
KeywordRegex: "(?i)(link down|interface .* down|port .* down)",
AlertName: "Syslog链路中断",
SeverityCode: "major",
PolicyID: 0,
},
}
return db.Create(&rows).Error
}
func seedDefaultTrapRules(db *gorm.DB) error {
var cnt int64
if err := db.Model(&TrapRule{}).Count(&cnt).Error; err != nil {
return err
}
if cnt > 0 {
return nil
}
rows := []TrapRule{
{
Name: "默认-Trap链路中断",
Enabled: true,
Priority: 100,
OIDPrefix: "1.3.6.1.6.3.1.1.5",
VarbindMatchRegex: "(?i)(linkdown|ifdown|down)",
AlertName: "SNMP Trap链路中断",
SeverityCode: "major",
PolicyID: 0,
},
}
return db.Create(&rows).Error
}
func seedDefaultTrapDictionary(db *gorm.DB) error {
var cnt int64
if err := db.Model(&TrapDictionaryEntry{}).Count(&cnt).Error; err != nil {
return err
}
if cnt > 0 {
return nil
}
rows := []TrapDictionaryEntry{
{
OIDPrefix: "1.3.6.1.6.3.1.1.5.3",
Title: "ifDown 接口中断",
Description: "检测到设备接口状态变为 down。",
SeverityCode: "major",
RecoveryMessage: "请检查链路、端口状态和对端设备。",
Enabled: true,
},
{
OIDPrefix: "1.3.6.1.6.3.1.1.5.4",
Title: "ifUp 接口恢复",
Description: "检测到设备接口状态恢复为 up。",
SeverityCode: "info",
RecoveryMessage: "接口已恢复,请确认业务连通性。",
Enabled: true,
},
}
return db.Create(&rows).Error
}

View File

@@ -0,0 +1,24 @@
package models
import "time"
// ResourceEventDedup 用于资源事件幂等去重。
type ResourceEventDedup struct {
ID uint `gorm:"primaryKey" json:"id"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
// EventID 为外部事件唯一标识。
EventID string `gorm:"size:128;uniqueIndex" json:"event_id"`
// EventTime 记录事件时间,便于排查重放问题。
EventTime time.Time `json:"event_time"`
// ResourceType/ResourceID 便于定位被操作资源。
ResourceType string `gorm:"size:32;index" json:"resource_type"`
ResourceID string `gorm:"size:128;index" json:"resource_id"`
}
func (ResourceEventDedup) TableName() string {
return "logs_resource_event_dedup"
}

View File

@@ -0,0 +1,37 @@
package models
import "time"
// ResourceMapping 表示来自 dc-control 的资源映射快照。
type ResourceMapping struct {
ID uint `gorm:"primaryKey" json:"id"`
// CreatedAt/UpdatedAt 由 GORM 维护。
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
// ResourceType 资源类型server/collector/device
ResourceType string `gorm:"size:32;index:idx_logs_resource_unique,unique" json:"resource_type"`
// ResourceID 资源 ID来自 dc-control
ResourceID string `gorm:"size:128;index:idx_logs_resource_unique,unique" json:"resource_id"`
// ResourceName 资源名称。
ResourceName string `gorm:"size:256" json:"resource_name"`
// IPsJSON/HostnamesJSON/LabelsJSON 以 JSON 文本存储数组和标签。
IPsJSON string `gorm:"type:text" json:"ips_json"`
HostnamesJSON string `gorm:"type:text" json:"hostnames_json"`
LabelsJSON string `gorm:"type:text" json:"labels_json"`
// Version 用于处理乱序事件,仅允许新版本覆盖。
Version int64 `gorm:"index" json:"version"`
// IsDeleted 表示逻辑删除。
IsDeleted bool `gorm:"index" json:"is_deleted"`
// LastEventID 记录最后一次成功应用的事件 ID幂等辅助
LastEventID string `gorm:"size:128" json:"last_event_id"`
// EventTime 记录事件产生时间。
EventTime time.Time `json:"event_time"`
}
func (ResourceMapping) TableName() string {
return "logs_resource_mappings"
}

View File

@@ -39,6 +39,10 @@ func Register(srvKey string, engine *gin.Engine) {
api.PUT("/trap-suppressions/:id", controllers.UpdateTrapShield)
api.DELETE("/trap-suppressions/:id", controllers.DeleteTrapShield)
api.POST("/resource-events", controllers.ReceiveResourceEvent)
api.GET("/entries", controllers.ListLogEvents)
api.GET("/alert-outbox", controllers.ListAlertOutbox)
api.POST("/alert-outbox/:id/retry", controllers.RetryAlertOutbox)
}
}