zerolog icon indicating copy to clipboard operation
zerolog copied to clipboard

please help to resolve race-condition in zerolog

Open Slach opened this issue 2 years ago • 3 comments

@rs i have race-condition ;( https://github.com/Altinity/clickhouse-backup/actions/runs/5259410860/jobs/9504979925#step:7:2222

WARNING: DATA RACE
Write at 0x00c000109440 by goroutine 30:
  github.com/rs/zerolog.newEvent()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:60 +0xd9
  github.com/rs/zerolog.(*Logger).newEvent()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/log.go:453 +0xec
  github.com/rs/zerolog.(*Logger).Info()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/log.go:334 +0x59
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).LogQuery()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:917 +0x2e4
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).Query()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:887 +0x193
  github.com/Altinity/clickhouse-backup/pkg/partition.GetPartitionId()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/partition/partition.go:60 +0x1058
  github.com/Altinity/clickhouse-backup/pkg/filesystemhelper.CreatePartitionsToBackupMap()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/filesystemhelper/filesystemhelper.go:305 +0x14d9
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:386 +0x33f7
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:196 +0x8b2
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:75 +0x133

Previous read at 0x00c000109440 by goroutine 31:
  github.com/rs/zerolog.putEvent()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:40 +0x3c
  github.com/rs/zerolog.(*Event).write()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:81 +0x339
  github.com/rs/zerolog.(*Event).msg()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:149 +0x55d
  github.com/rs/zerolog.(*Event).Msg()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:108 +0x64
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).LogQuery()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:922 +0xc71
  github.com/Altinity/clickhouse-backup/pkg/clickhouse.(*ClickHouse).Query()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/clickhouse/clickhouse.go:887 +0x193
  github.com/Altinity/clickhouse-backup/pkg/partition.GetPartitionId()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/partition/partition.go:60 +0x1058
  github.com/Altinity/clickhouse-backup/pkg/filesystemhelper.CreatePartitionsToBackupMap()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/filesystemhelper/filesystemhelper.go:305 +0x14d9
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:386 +0x33f7
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:196 +0x8b2
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:75 +0x133      

when try to use zerolog with multiple go-routines + errorgroup

LogQuery() related code

import (
	"context"
	"fmt"
	"github.com/rs/zerolog"
	"github.com/rs/zerolog/log"
	"github.com/rs/zerolog/pkgerrors"
	"github.com/urfave/cli"
	stdlog "log"
	"os"
)
...
	zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs
	zerolog.ErrorStackMarshaler = pkgerrors.MarshalStack
	log.Logger = zerolog.New(zerolog.SyncWriter(os.Stdout)).With().Timestamp().Caller().Logger()
	stdlog.SetOutput(log.Logger)
...
type ClickHouse struct {
	Config               *config.ClickHouseConfig
	conn                 driver.Conn
	disks                []Disk
	Logger               zerolog.Logger
	version              int
	isPartsColumnPresent int8
	IsOpen               bool
}

...
	ch := &clickhouse.ClickHouse{
		Config: &api.config.ClickHouse,
		Logger: log.With().Str("logger", "clickhouse").Logger(),
	}
...
func (ch *ClickHouse) LogQuery(query string, args ...interface{}) string {
	var logF *zerolog.Event
	// zerolog is not thread-safe https://github.com/rs/zerolog/issues/242 ;(
	logger := ch.Logger.With().Logger()
	if !ch.Config.LogSQLQueries {
		logF = logger.Debug()
	} else {
		logF = logger.Info()
	}
	if len(args) > 0 {
		logF.Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(fmt.Sprintf("%s with args %v", query, args)))
	} else {
		logF.Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(query))
	}
	return query
}

How race condition possible here? i redefine log.Logger once in main function

and after it use copy of logger

Logger: log.With().Str("logger", "clickhouse").Logger(), and copy of copy logger := ch.Logger.With().Logger()

during execution of With() it shall create copy of Logger

what i'm doing wrong?

Slach avatar Jun 13 '23 19:06 Slach

@Slach could you try this?

func (ch *ClickHouse) LogQuery(query string, args ...interface{}) string {
	level := zerolog.InfoLevel
	if !ch.Config.LogSQLQueries {
                level = zerolog.DebugLevel
	}
	if len(args) > 0 {
		ch.Logger.WithLevel(level).Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(fmt.Sprintf("%s with args %v", query, args)))
	} else {
        	ch.Logger.WithLevel(level).Msg(strings.NewReplacer("\n", " ", "\r", " ", "\t", " ").Replace(query))
	}
	return query
}

Trying to confirm if that might help.

sruehl avatar Aug 08 '23 16:08 sruehl

@sruehl I can't thank you enough, it works, you save many debug hours

Slach avatar Aug 12 '23 15:08 Slach

=( but found race condition in other place

WARNING: DATA RACE
Write at 0x00c000448655 by goroutine 34:
  github.com/rs/zerolog/internal/json.Encoder.AppendString()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/internal/json/string.go:63 +0x71d
  github.com/rs/zerolog/internal/json.Encoder.AppendKey()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/internal/json/base.go:18 +0x275
  github.com/rs/zerolog.(*Event).Str()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:256 +0xea
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:412 +0x3c24
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:197 +0x44e
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:75 +0x133

Previous write at 0x00c000448655 by goroutine 33:
  github.com/rs/zerolog/internal/json.Encoder.AppendString()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/internal/json/string.go:63 +0x71d
  github.com/rs/zerolog/internal/json.Encoder.AppendKey()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/internal/json/base.go:18 +0x275
  github.com/rs/zerolog.(*Event).Str()
      /home/runner/go/pkg/mod/github.com/rs/[email protected]/event.go:256 +0xea
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).downloadTableMetadata()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:412 +0x3c24
  github.com/Altinity/clickhouse-backup/pkg/backup.(*Backuper).Download.func2()
      /home/runner/work/clickhouse-backup/clickhouse-backup/pkg/backup/download.go:197 +0x44e
  golang.org/x/sync/errgroup.(*Group).Go.func1()
      /home/runner/go/pkg/mod/golang.org/x/[email protected]/errgroup/errgroup.go:75 +0x133

backup/download.go:412

// simple import zerolog
import (
	"github.com/rs/zerolog/log"
)

func (b *Backuper) downloadTableMetadata(ctx context.Context, backupName string, disks []clickhouse.Disk, tableTitle metadata.TableTitle, schemaOnly bool, partitions []string) (*metadata.TableMetadata, uint64, error) {

...
	log.Info().
		Str("operation", "download_metadata").
		Str("backup", backupName).
		Str("duration", utils.HumanizeDuration(time.Since(start))).
		Str("size", utils.FormatBytes(size)).
		Msg("done")
	return &tableMetadata, size, nil
}

just one function without instances of zerolog.Logger, which called inside pool of go-routines

	downloadSemaphore := semaphore.NewWeighted(int64(b.cfg.General.DownloadConcurrency))
	metadataGroup, metadataCtx := errgroup.WithContext(ctx)
	for i, t := range tablesForDownload {
...
		metadataGroup.Go(func() error {
			defer downloadSemaphore.Release(1)
			if err != nil {
				return err
			}
		})
	}
	if err := metadataGroup.Wait(); err != nil {
		return fmt.Errorf("one of Download Metadata go-routine return error: %v", err)
	}

Any ideas why it happen? and how to resolve it?

main.go with zerolog initialization

import (
	stdlog "log"
	"github.com/rs/zerolog"
	"github.com/rs/zerolog/log"
	"github.com/rs/zerolog/pkgerrors"
)

func main() {
	zerolog.TimeFieldFormat = zerolog.TimeFormatUnixMs
	zerolog.ErrorStackMarshaler = pkgerrors.MarshalStack
	consoleWriter := zerolog.ConsoleWriter{Out: os.Stdout, NoColor: true, TimeFormat: "2006-01-02 15:04:05.000"}
	log.Logger = zerolog.New(zerolog.SyncWriter(consoleWriter)).With().Timestamp().Caller().Logger()
	stdlog.SetOutput(log.Logger)
...
}

Slach avatar Aug 13 '23 12:08 Slach