mirror of
https://github.com/SSLMate/certspotter.git
synced 2025-07-03 10:47:17 +02:00
Store log errors in state directory
Instead of writing log errors to stderr, write them to a file in the state directory. When reporting a health check failure, include the path to the file and the last several lines. Log files are named by date, and the last 7 days are kept. Closes #106
This commit is contained in:
parent
5a8dd2ca82
commit
4fbbc5818e
@ -192,7 +192,6 @@ func main() {
|
||||
ScriptDir: defaultScriptDir(),
|
||||
Email: flags.email,
|
||||
Stdout: flags.stdout,
|
||||
Quiet: !flags.verbose,
|
||||
}
|
||||
config := &monitor.Config{
|
||||
LogListSource: flags.logs,
|
||||
@ -241,6 +240,19 @@ func main() {
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(24*time.Hour)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
fsstate.PruneOldErrors()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if err := monitor.Run(ctx, config); ctx.Err() == context.Canceled && errors.Is(err, context.Canceled) {
|
||||
if flags.verbose {
|
||||
fmt.Fprintf(os.Stderr, "%s: exiting due to SIGINT or SIGTERM\n", programName)
|
||||
|
@ -50,11 +50,21 @@ type daemon struct {
|
||||
|
||||
func (daemon *daemon) healthCheck(ctx context.Context) error {
|
||||
if time.Since(daemon.logsLoadedAt) >= daemon.config.HealthCheckInterval {
|
||||
errors, err := daemon.config.State.GetErrors(ctx, nil, recentErrorCount)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting recent errors: %w", err)
|
||||
}
|
||||
var errorsDir string
|
||||
if fsstate, ok := daemon.config.State.(*FilesystemState); ok {
|
||||
errorsDir = fsstate.errorDir(nil)
|
||||
}
|
||||
info := &StaleLogListInfo{
|
||||
Source: daemon.config.LogListSource,
|
||||
LastSuccess: daemon.logsLoadedAt,
|
||||
LastError: daemon.logListError,
|
||||
LastErrorTime: daemon.logListErrorAt,
|
||||
RecentErrors: errors,
|
||||
ErrorsDir: errorsDir,
|
||||
}
|
||||
if err := daemon.config.State.NotifyHealthCheckFailure(ctx, nil, info); err != nil {
|
||||
return fmt.Errorf("error notifying about stale log list: %w", err)
|
||||
|
@ -14,7 +14,9 @@ import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"slices"
|
||||
)
|
||||
|
||||
func randomFileSuffix() string {
|
||||
@ -69,3 +71,47 @@ func fileExists(filename string) bool {
|
||||
_, err := os.Lstat(filename)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func tailFile(filename string, linesWanted int) ([]byte, int, error) {
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
return tail(file, linesWanted, 4096)
|
||||
}
|
||||
|
||||
func tail(r io.ReadSeeker, linesWanted int, chunkSize int) ([]byte, int, error) {
|
||||
var buf []byte
|
||||
linesGot := 0
|
||||
|
||||
offset, err := r.Seek(0, io.SeekEnd)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
for offset > 0 {
|
||||
readSize := chunkSize
|
||||
if offset < int64(readSize) {
|
||||
readSize = int(offset)
|
||||
}
|
||||
offset -= int64(readSize)
|
||||
if _, err := r.Seek(offset, io.SeekStart); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
buf = slices.Grow(buf, readSize)
|
||||
copy(buf[readSize:len(buf)+readSize], buf)
|
||||
buf = buf[:len(buf)+readSize]
|
||||
if _, err := io.ReadFull(r, buf[:readSize]); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
for i := readSize; i > 0; i-- {
|
||||
if buf[i-1] == '\n' {
|
||||
if linesGot == linesWanted {
|
||||
return buf[i:], linesGot, nil
|
||||
}
|
||||
linesGot++
|
||||
}
|
||||
}
|
||||
}
|
||||
return buf, linesGot, nil
|
||||
}
|
||||
|
@ -20,12 +20,17 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"software.sslmate.com/src/certspotter/cttypes"
|
||||
"software.sslmate.com/src/certspotter/loglist"
|
||||
"software.sslmate.com/src/certspotter/merkletree"
|
||||
)
|
||||
|
||||
const keepErrorDays = 7
|
||||
const errorDateFormat = "2006-01-02"
|
||||
|
||||
type FilesystemState struct {
|
||||
StateDir string
|
||||
CacheDir string
|
||||
@ -34,7 +39,7 @@ type FilesystemState struct {
|
||||
ScriptDir string
|
||||
Email []string
|
||||
Stdout bool
|
||||
Quiet bool
|
||||
errorMu sync.Mutex
|
||||
}
|
||||
|
||||
func (s *FilesystemState) logStateDir(logID LogID) string {
|
||||
@ -57,8 +62,9 @@ func (s *FilesystemState) PrepareLog(ctx context.Context, logID LogID) error {
|
||||
sthsDirPath = filepath.Join(stateDirPath, "unverified_sths")
|
||||
malformedDirPath = filepath.Join(stateDirPath, "malformed_entries")
|
||||
healthchecksDirPath = filepath.Join(stateDirPath, "healthchecks")
|
||||
errorsDirPath = filepath.Join(stateDirPath, "errors")
|
||||
)
|
||||
for _, dirPath := range []string{stateDirPath, sthsDirPath, malformedDirPath, healthchecksDirPath} {
|
||||
for _, dirPath := range []string{stateDirPath, sthsDirPath, malformedDirPath, healthchecksDirPath, errorsDirPath} {
|
||||
if err := os.Mkdir(dirPath, 0777); err != nil && !errors.Is(err, fs.ErrExist) {
|
||||
return err
|
||||
}
|
||||
@ -227,6 +233,13 @@ func (s *FilesystemState) healthCheckDir(ctlog *loglist.Log) string {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *FilesystemState) errorDir(ctlog *loglist.Log) string {
|
||||
if ctlog == nil {
|
||||
return filepath.Join(s.StateDir, "errors")
|
||||
}
|
||||
return filepath.Join(s.logStateDir(ctlog.LogID), "errors")
|
||||
}
|
||||
|
||||
func (s *FilesystemState) NotifyHealthCheckFailure(ctx context.Context, ctlog *loglist.Log, info HealthCheckFailure) error {
|
||||
textPath := filepath.Join(s.healthCheckDir(ctlog), healthCheckFilename())
|
||||
environ := []string{
|
||||
@ -248,13 +261,80 @@ func (s *FilesystemState) NotifyHealthCheckFailure(ctx context.Context, ctlog *l
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FilesystemState) NotifyError(ctx context.Context, ctlog *loglist.Log, err error) error {
|
||||
if !s.Quiet {
|
||||
if ctlog == nil {
|
||||
log.Print(err)
|
||||
} else {
|
||||
log.Print(ctlog.GetMonitoringURL(), ": ", err)
|
||||
func (s *FilesystemState) NotifyError(ctx context.Context, ctlog *loglist.Log, notifyErr error) error {
|
||||
var (
|
||||
now = time.Now()
|
||||
filePath = filepath.Join(s.errorDir(ctlog), now.Format(errorDateFormat))
|
||||
line = now.Format(time.RFC3339) + " " + notifyErr.Error() + "\n"
|
||||
)
|
||||
|
||||
s.errorMu.Lock()
|
||||
defer s.errorMu.Unlock()
|
||||
file, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
if _, err := file.WriteString(line); err != nil {
|
||||
return err
|
||||
}
|
||||
return file.Close()
|
||||
}
|
||||
|
||||
func (s *FilesystemState) GetErrors(ctx context.Context, ctlog *loglist.Log, count int) (string, error) {
|
||||
dir := s.errorDir(ctlog)
|
||||
now := time.Now()
|
||||
var buf []byte
|
||||
for daysBack := 0; count > 0 && daysBack < keepErrorDays; daysBack++ {
|
||||
datePath := filepath.Join(dir, now.AddDate(0, 0, -daysBack).Format(errorDateFormat))
|
||||
dateBuf, dateLines, err := tailFile(datePath, count)
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
continue
|
||||
} else if err != nil {
|
||||
return "", err
|
||||
}
|
||||
buf = append(dateBuf, buf...)
|
||||
count -= dateLines
|
||||
}
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
func (s *FilesystemState) PruneOldErrors() {
|
||||
cutoff := time.Now().AddDate(0, 0, -keepErrorDays)
|
||||
pruneDir := func(dir string) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return
|
||||
} else if err != nil {
|
||||
log.Printf("unable to read error directory: %s", err)
|
||||
return
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
date, err := time.Parse(errorDateFormat, entry.Name())
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if date.Before(cutoff) {
|
||||
if err := os.Remove(filepath.Join(dir, entry.Name())); err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||
log.Printf("unable to remove old error file: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
pruneDir(filepath.Join(s.StateDir, "errors"))
|
||||
logsDir := filepath.Join(s.StateDir, "logs")
|
||||
logDirs, err := os.ReadDir(logsDir)
|
||||
if err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||
log.Printf("unable to read logs directory: %s", err)
|
||||
return
|
||||
}
|
||||
for _, d := range logDirs {
|
||||
if !d.IsDir() {
|
||||
continue
|
||||
}
|
||||
pruneDir(filepath.Join(logsDir, d.Name(), "errors"))
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,8 @@ import (
|
||||
"software.sslmate.com/src/certspotter/loglist"
|
||||
)
|
||||
|
||||
const recentErrorCount = 10
|
||||
|
||||
func healthCheckFilename() string {
|
||||
return time.Now().UTC().Format(time.RFC3339) + ".txt"
|
||||
}
|
||||
@ -48,20 +50,37 @@ func healthCheckLog(ctx context.Context, config *Config, ctlog *loglist.Log) err
|
||||
return fmt.Errorf("error loading STHs: %w", err)
|
||||
}
|
||||
|
||||
var errorsDir string
|
||||
if fsstate, ok := config.State.(*FilesystemState); ok {
|
||||
errorsDir = fsstate.errorDir(ctlog)
|
||||
}
|
||||
|
||||
if len(sths) == 0 {
|
||||
errors, err := config.State.GetErrors(ctx, ctlog, recentErrorCount)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting recent errors: %w", err)
|
||||
}
|
||||
info := &StaleSTHInfo{
|
||||
Log: ctlog,
|
||||
LastSuccess: lastSuccess,
|
||||
LatestSTH: verifiedSTH,
|
||||
Log: ctlog,
|
||||
LastSuccess: lastSuccess,
|
||||
LatestSTH: verifiedSTH,
|
||||
RecentErrors: errors,
|
||||
ErrorsDir: errorsDir,
|
||||
}
|
||||
if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil {
|
||||
return fmt.Errorf("error notifying about stale STH: %w", err)
|
||||
}
|
||||
} else {
|
||||
errors, err := config.State.GetErrors(ctx, ctlog, recentErrorCount)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting recent errors: %w", err)
|
||||
}
|
||||
info := &BacklogInfo{
|
||||
Log: ctlog,
|
||||
LatestSTH: sths[len(sths)-1],
|
||||
Position: position,
|
||||
Log: ctlog,
|
||||
LatestSTH: sths[len(sths)-1],
|
||||
Position: position,
|
||||
RecentErrors: errors,
|
||||
ErrorsDir: errorsDir,
|
||||
}
|
||||
if err := config.State.NotifyHealthCheckFailure(ctx, ctlog, info); err != nil {
|
||||
return fmt.Errorf("error notifying about backlog: %w", err)
|
||||
@ -77,15 +96,19 @@ type HealthCheckFailure interface {
|
||||
}
|
||||
|
||||
type StaleSTHInfo struct {
|
||||
Log *loglist.Log
|
||||
LastSuccess time.Time // may be zero
|
||||
LatestSTH *cttypes.SignedTreeHead // may be nil
|
||||
Log *loglist.Log
|
||||
LastSuccess time.Time // may be zero
|
||||
LatestSTH *cttypes.SignedTreeHead // may be nil
|
||||
RecentErrors string
|
||||
ErrorsDir string
|
||||
}
|
||||
|
||||
type BacklogInfo struct {
|
||||
Log *loglist.Log
|
||||
LatestSTH *StoredSTH
|
||||
Position uint64
|
||||
Log *loglist.Log
|
||||
LatestSTH *StoredSTH
|
||||
Position uint64
|
||||
RecentErrors string
|
||||
ErrorsDir string
|
||||
}
|
||||
|
||||
type StaleLogListInfo struct {
|
||||
@ -93,6 +116,8 @@ type StaleLogListInfo struct {
|
||||
LastSuccess time.Time
|
||||
LastError string
|
||||
LastErrorTime time.Time
|
||||
RecentErrors string
|
||||
ErrorsDir string
|
||||
}
|
||||
|
||||
func (e *StaleSTHInfo) LastSuccessString() string {
|
||||
@ -120,33 +145,45 @@ func (e *StaleSTHInfo) Text() string {
|
||||
text := new(strings.Builder)
|
||||
fmt.Fprintf(text, "certspotter has been unable to contact %s since %s. Consequentially, certspotter may fail to notify you about certificates in this log.\n", e.Log.GetMonitoringURL(), e.LastSuccessString())
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "For details, enable -verbose and see certspotter's stderr output.\n")
|
||||
fmt.Fprintf(text, "\n")
|
||||
if e.LatestSTH != nil {
|
||||
fmt.Fprintf(text, "Latest known log size = %d\n", e.LatestSTH.TreeSize)
|
||||
} else {
|
||||
fmt.Fprintf(text, "Latest known log size = none\n")
|
||||
}
|
||||
if e.RecentErrors != "" {
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Recent errors (see %s for complete records):\n", e.ErrorsDir)
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprint(text, e.RecentErrors)
|
||||
}
|
||||
return text.String()
|
||||
}
|
||||
func (e *BacklogInfo) Text() string {
|
||||
text := new(strings.Builder)
|
||||
fmt.Fprintf(text, "certspotter has been unable to download entries from %s in a timely manner. Consequentially, certspotter may be slow to notify you about certificates in this log.\n", e.Log.GetMonitoringURL())
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "For details, enable -verbose and see certspotter's stderr output.\n")
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Current log size = %d (as of %s)\n", e.LatestSTH.TreeSize, e.LatestSTH.StoredAt)
|
||||
fmt.Fprintf(text, "Current position = %d\n", e.Position)
|
||||
fmt.Fprintf(text, " Backlog = %d\n", e.Backlog())
|
||||
if e.RecentErrors != "" {
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Recent errors (see %s for complete records):\n", e.ErrorsDir)
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprint(text, e.RecentErrors)
|
||||
}
|
||||
return text.String()
|
||||
}
|
||||
func (e *StaleLogListInfo) Text() string {
|
||||
text := new(strings.Builder)
|
||||
fmt.Fprintf(text, "certspotter has been unable to retrieve the log list from %s since %s.\n", e.Source, e.LastSuccess)
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Last error (at %s): %s\n", e.LastErrorTime, e.LastError)
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Consequentially, certspotter may not be monitoring all logs, and might fail to detect certificates.\n")
|
||||
if e.RecentErrors != "" {
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprintf(text, "Recent errors (see %s for complete records):\n", e.ErrorsDir)
|
||||
fmt.Fprintf(text, "\n")
|
||||
fmt.Fprint(text, e.RecentErrors)
|
||||
}
|
||||
return text.String()
|
||||
}
|
||||
|
||||
|
@ -85,4 +85,7 @@ type StateProvider interface {
|
||||
// not associated with a log. Note that most errors are transient, and
|
||||
// certspotter will retry the failed operation later.
|
||||
NotifyError(context.Context, *loglist.Log, error) error
|
||||
|
||||
// Retrieve the specified number of most recent errors.
|
||||
GetErrors(context.Context, *loglist.Log, int) (string, error)
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ func prepareStateDir(stateDir string) error {
|
||||
return fmt.Errorf("%s was created by a newer version of certspotter; upgrade to the latest version of certspotter or remove this directory to start from scratch", stateDir)
|
||||
}
|
||||
|
||||
for _, subdir := range []string{"certs", "logs", "healthchecks"} {
|
||||
for _, subdir := range []string{"certs", "logs", "healthchecks", "errors"} {
|
||||
if err := os.Mkdir(filepath.Join(stateDir, subdir), 0777); err != nil && !errors.Is(err, fs.ErrExist) {
|
||||
return err
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user