Skip to content

Commit ced761b

Browse files
authored
Allow configuration of backfill batch size (#406)
It can be controlled via the `--backfill-batch-size` command line parameter or by setting the `PGROLL_BACKFILL_BATCH_SIZE` environment variable. It can also be set programatically via the `roll.WithBackfillBatchSize` function. If unset, it will default to 1000. Part of #168
1 parent 681a3eb commit ced761b

File tree

6 files changed

+33
-8
lines changed

6 files changed

+33
-8
lines changed

cmd/flags/flags.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ func LockTimeout() int {
2222
return viper.GetInt("LOCK_TIMEOUT")
2323
}
2424

25+
func BackfillBatchSize() int { return viper.GetInt("BACKFILL_BATCH_SIZE") }
26+
2527
func Role() string {
2628
return viper.GetString("ROLE")
2729
}

cmd/root.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/spf13/cobra"
99
"github.com/spf13/viper"
10+
1011
"github.com/xataio/pgroll/cmd/flags"
1112
"github.com/xataio/pgroll/pkg/roll"
1213
"github.com/xataio/pgroll/pkg/state"
@@ -23,12 +24,14 @@ func init() {
2324
rootCmd.PersistentFlags().String("schema", "public", "Postgres schema to use for the migration")
2425
rootCmd.PersistentFlags().String("pgroll-schema", "pgroll", "Postgres schema to use for pgroll internal state")
2526
rootCmd.PersistentFlags().Int("lock-timeout", 500, "Postgres lock timeout in milliseconds for pgroll DDL operations")
27+
rootCmd.PersistentFlags().Int("backfill-batch-size", roll.DefaultBackfillBatchSize, "Number of rows backfilled in each batch")
2628
rootCmd.PersistentFlags().String("role", "", "Optional postgres role to set when executing migrations")
2729

2830
viper.BindPFlag("PG_URL", rootCmd.PersistentFlags().Lookup("postgres-url"))
2931
viper.BindPFlag("SCHEMA", rootCmd.PersistentFlags().Lookup("schema"))
3032
viper.BindPFlag("STATE_SCHEMA", rootCmd.PersistentFlags().Lookup("pgroll-schema"))
3133
viper.BindPFlag("LOCK_TIMEOUT", rootCmd.PersistentFlags().Lookup("lock-timeout"))
34+
viper.BindPFlag("BACKFILL_BATCH_SIZE", rootCmd.PersistentFlags().Lookup("backfill-batch-size"))
3235
viper.BindPFlag("ROLE", rootCmd.PersistentFlags().Lookup("role"))
3336
}
3437

@@ -44,6 +47,7 @@ func NewRoll(ctx context.Context) (*roll.Roll, error) {
4447
stateSchema := flags.StateSchema()
4548
lockTimeout := flags.LockTimeout()
4649
role := flags.Role()
50+
backfillBatchSize := flags.BackfillBatchSize()
4751

4852
state, err := state.New(ctx, pgURL, stateSchema)
4953
if err != nil {
@@ -53,6 +57,7 @@ func NewRoll(ctx context.Context) (*roll.Roll, error) {
5357
return roll.New(ctx, pgURL, schema, state,
5458
roll.WithLockTimeoutMs(lockTimeout),
5559
roll.WithRole(role),
60+
roll.WithBackfillBatchSize(backfillBatchSize),
5661
)
5762
}
5863

pkg/migrations/backfill.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import (
1919
// 2. Get the first batch of rows from the table, ordered by the primary key.
2020
// 3. Update each row in the batch, setting the value of the primary key column to itself.
2121
// 4. Repeat steps 2 and 3 until no more rows are returned.
22-
func Backfill(ctx context.Context, conn db.DB, table *schema.Table, cbs ...CallbackFn) error {
22+
func Backfill(ctx context.Context, conn db.DB, table *schema.Table, batchSize int, cbs ...CallbackFn) error {
2323
// get the backfill column
2424
identityColumn := getIdentityColumn(table)
2525
if identityColumn == nil {
@@ -31,7 +31,7 @@ func Backfill(ctx context.Context, conn db.DB, table *schema.Table, cbs ...Callb
3131
table: table,
3232
identityColumn: identityColumn,
3333
lastValue: nil,
34-
batchSize: 1000,
34+
batchSize: batchSize,
3535
}
3636

3737
// Update each batch of rows, invoking callbacks for each one.

pkg/roll/execute.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ func (m *Roll) ensureView(ctx context.Context, version, name string, table schem
277277

278278
func (m *Roll) performBackfills(ctx context.Context, tables []*schema.Table, cbs ...migrations.CallbackFn) error {
279279
for _, table := range tables {
280-
if err := migrations.Backfill(ctx, m.pgConn, table, cbs...); err != nil {
280+
if err := migrations.Backfill(ctx, m.pgConn, table, m.backfillBatchSize, cbs...); err != nil {
281281
errRollback := m.Rollback(ctx)
282282

283283
return errors.Join(

pkg/roll/options.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ type options struct {
2525
// additional entries to add to the search_path during migration execution
2626
searchPath []string
2727

28+
// the number of rows to backfill in each batch
29+
backfillBatchSize int
30+
2831
migrationHooks MigrationHooks
2932
}
3033

@@ -99,3 +102,10 @@ func WithSearchPath(schemas ...string) Option {
99102
o.searchPath = schemas
100103
}
101104
}
105+
106+
// WithBackfillBatchSize sets the number of rows backfilled in each batch.
107+
func WithBackfillBatchSize(batchSize int) Option {
108+
return func(o *options) {
109+
o.backfillBatchSize = batchSize
110+
}
111+
}

pkg/roll/roll.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ import (
1717

1818
type PGVersion int
1919

20-
const PGVersion15 PGVersion = 15
20+
const (
21+
PGVersion15 PGVersion = 15
22+
DefaultBackfillBatchSize int = 1000
23+
)
2124

2225
type Roll struct {
2326
pgConn db.DB
@@ -31,10 +34,11 @@ type Roll struct {
3134
// disable creation of version schema for raw SQL migrations
3235
noVersionSchemaForRawSQL bool
3336

34-
migrationHooks MigrationHooks
35-
state *state.State
36-
pgVersion PGVersion
37-
sqlTransformer migrations.SQLTransformer
37+
migrationHooks MigrationHooks
38+
state *state.State
39+
pgVersion PGVersion
40+
sqlTransformer migrations.SQLTransformer
41+
backfillBatchSize int
3842
}
3943

4044
// New creates a new Roll instance
@@ -43,6 +47,9 @@ func New(ctx context.Context, pgURL, schema string, state *state.State, opts ...
4347
for _, o := range opts {
4448
o(rollOpts)
4549
}
50+
if rollOpts.backfillBatchSize <= 0 {
51+
rollOpts.backfillBatchSize = DefaultBackfillBatchSize
52+
}
4653

4754
conn, err := setupConn(ctx, pgURL, schema, *rollOpts)
4855
if err != nil {
@@ -71,6 +78,7 @@ func New(ctx context.Context, pgURL, schema string, state *state.State, opts ...
7178
noVersionSchemaForRawSQL: rollOpts.noVersionSchemaForRawSQL,
7279
migrationHooks: rollOpts.migrationHooks,
7380
sqlTransformer: sqlTransformer,
81+
backfillBatchSize: rollOpts.backfillBatchSize,
7482
}, nil
7583
}
7684

0 commit comments

Comments
 (0)