Skip to content

Commit b6f76c7

Browse files
authored
Add backfill benchmarks (#412)
This change adds a benchmark that run against 10k, 100k and 1 million rows. They benchmark: * How long it takes to complete a full back fill of a single column * How long it takes to update all rows in a table with and without a migration trigger in place This should give us a baseline metric that we can use to compare performance over time. Example output: ``` make bench go test ./internal/benchmarks -v -benchtime=1x -bench . 2024/10/21 12:44:01 github.com/testcontainers/testcontainers-go - Connected to docker: Server Version: 27.2.0 API Version: 1.46 Operating System: Docker Desktop Total Memory: 7838 MB Labels: com.docker.desktop.address=unix:///Users/ryan/Library/Containers/com.docker.docker/Data/docker-cli.sock Testcontainers for Go Version: v0.33.0 Resolved Docker Host: unix:///var/run/docker.sock Resolved Docker Socket Path: /var/run/docker.sock Test SessionID: 816adaef777204b01d23a061c6f5532ca8cea098c7f8c6a68fdf542fbfa73f6e Test ProcessID: bf2f6095-b21e-4569-a4df-52291606bf3d 2024/10/21 12:44:01 🐳 Creating container for image testcontainers/ryuk:0.8.1 2024/10/21 12:44:01 ✅ Container created: eab8b6af62ba 2024/10/21 12:44:01 🐳 Starting container: eab8b6af62ba 2024/10/21 12:44:01 ✅ Container started: eab8b6af62ba 2024/10/21 12:44:01 ⏳ Waiting for container id eab8b6af62ba image: testcontainers/ryuk:0.8.1. Waiting for: &{Port:8080/tcp timeout:<nil> PollInterval:100ms skipInternalCheck:false} 2024/10/21 12:44:01 🔔 Container is ready: eab8b6af62ba 2024/10/21 12:44:01 🐳 Creating container for image postgres:15.3 2024/10/21 12:44:01 ✅ Container created: 7bc6dfd7af00 2024/10/21 12:44:01 🐳 Starting container: 7bc6dfd7af00 2024/10/21 12:44:01 ✅ Container started: 7bc6dfd7af00 2024/10/21 12:44:01 ⏳ Waiting for container id 7bc6dfd7af00 image: postgres:15.3. Waiting for: &{timeout:<nil> deadline:0x14000435060 Strategies:[0x14000460540]} 2024/10/21 12:44:02 🔔 Container is ready: 7bc6dfd7af00 goos: darwin goarch: arm64 pkg: github.com/xataio/pgroll/internal/benchmarks cpu: Apple M2 Pro BenchmarkBackfill BenchmarkBackfill/10000 benchmarks_test.go:136: Seeded 10000 rows in 19.073458ms (524289 rows/s) benchmarks_test.go:51: Backfilled 10000 rows in 102.083958ms BenchmarkBackfill/10000-10 1 102083958 ns/op 97959 rows/s BenchmarkBackfill/100000 benchmarks_test.go:136: Seeded 100000 rows in 96.639042ms (1034778 rows/s) benchmarks_test.go:51: Backfilled 100000 rows in 2.032871959s BenchmarkBackfill/100000-10 1 2032871959 ns/op 49191 rows/s BenchmarkBackfill/1000000 benchmarks_test.go:136: Seeded 1000000 rows in 608.590708ms (1643140 rows/s) benchmarks_test.go:51: Backfilled 1000000 rows in 56.80506s BenchmarkBackfill/1000000-10 1 56805060000 ns/op 17604 rows/s BenchmarkWriteAmplification BenchmarkWriteAmplification/NoTrigger BenchmarkWriteAmplification/NoTrigger/10000 benchmarks_test.go:136: Seeded 10000 rows in 21.901875ms (456582 rows/s) BenchmarkWriteAmplification/NoTrigger/10000-10 1 15013333 ns/op 666075 rows/s BenchmarkWriteAmplification/NoTrigger/100000 benchmarks_test.go:136: Seeded 100000 rows in 98.442458ms (1015822 rows/s) BenchmarkWriteAmplification/NoTrigger/100000-10 1 155141667 ns/op 644572 rows/s BenchmarkWriteAmplification/NoTrigger/1000000 benchmarks_test.go:136: Seeded 1000000 rows in 663.248542ms (1507730 rows/s) BenchmarkWriteAmplification/NoTrigger/1000000-10 1 1704721875 ns/op 586606 rows/s BenchmarkWriteAmplification/WithTrigger BenchmarkWriteAmplification/WithTrigger/10000 benchmarks_test.go:136: Seeded 10000 rows in 26.146708ms (382457 rows/s) BenchmarkWriteAmplification/WithTrigger/10000-10 1 59703417 ns/op 167495 rows/s BenchmarkWriteAmplification/WithTrigger/100000 benchmarks_test.go:136: Seeded 100000 rows in 102.552667ms (975109 rows/s) BenchmarkWriteAmplification/WithTrigger/100000-10 1 630408666 ns/op 158627 rows/s BenchmarkWriteAmplification/WithTrigger/1000000 benchmarks_test.go:136: Seeded 1000000 rows in 666.005167ms (1501490 rows/s) BenchmarkWriteAmplification/WithTrigger/1000000-10 1 5909246000 ns/op 169226 rows/s PASS 2024/10/21 12:45:51 🐳 Terminating container: 7bc6dfd7af00 2024/10/21 12:45:51 🚫 Container terminated: 7bc6dfd7af00 ok github.com/xataio/pgroll/internal/benchmarks 110.632s ``` Part of #408
1 parent e2f1740 commit b6f76c7

File tree

5 files changed

+226
-3
lines changed

5 files changed

+226
-3
lines changed

.github/workflows/benchmark.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Benchmark
2+
on:
3+
push:
4+
branches:
5+
- main
6+
permissions:
7+
contents: read
8+
packages: read
9+
jobs:
10+
benchmark:
11+
name: 'benchmark (pg: ${{ matrix.pgVersion }})'
12+
runs-on: ubuntu-latest
13+
strategy:
14+
fail-fast: false
15+
matrix:
16+
pgVersion: ['14.8', '15.3', '16.4', '17.0' ,'latest']
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- name: Set up Go
21+
uses: actions/setup-go@v5
22+
with:
23+
go-version-file: 'go.mod'
24+
25+
- name: Run benchmarks
26+
run: make bench
27+
env:
28+
POSTGRES_VERSION: ${{ matrix.pgVersion }}

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ jobs:
204204
version: latest
205205
args: release --clean
206206
env:
207-
# We use two github tokens here:
207+
# We use two GitHub tokens here:
208208
# * The actions-bound `GITHUB_TOKEN` with permissions to write packages.
209209
# * The org level `GIT_TOKEN` to be able to publish the brew tap file.
210210
# See: https://goreleaser.com/errors/resource-not-accessible-by-integration/

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ clean:
99
format:
1010
# Format JSON schema
1111
docker run --rm -v $$PWD/schema.json:/mnt/schema.json node:alpine npx prettier /mnt/schema.json --parser json --tab-width 2 --single-quote --trailing-comma all --no-semi --arrow-parens always --print-width 120 --write
12+
# Format embedded SQL
1213
docker run --rm -v $$PWD/pkg/state/init.sql:/mnt/init.sql node:alpine npx sql-formatter -l postgresql -o /mnt/init.sql /mnt/init.sql
1314

1415
generate: format
@@ -32,3 +33,6 @@ examples:
3233

3334
test:
3435
go test ./...
36+
37+
bench:
38+
go test ./internal/benchmarks -v -benchtime=1x -bench .
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
package benchmarks
4+
5+
import (
6+
"context"
7+
"database/sql"
8+
"strconv"
9+
"testing"
10+
11+
"github.com/lib/pq"
12+
"github.com/oapi-codegen/nullable"
13+
"github.com/stretchr/testify/require"
14+
15+
"github.com/xataio/pgroll/internal/testutils"
16+
"github.com/xataio/pgroll/pkg/migrations"
17+
"github.com/xataio/pgroll/pkg/roll"
18+
)
19+
20+
const unitRowsPerSecond = "rows/s"
21+
22+
var rowCounts = []int{10_000, 100_000, 300_000}
23+
24+
func TestMain(m *testing.M) {
25+
testutils.SharedTestMain(m)
26+
}
27+
28+
func BenchmarkBackfill(b *testing.B) {
29+
ctx := context.Background()
30+
testSchema := testutils.TestSchema()
31+
var opts []roll.Option
32+
33+
for _, rowCount := range rowCounts {
34+
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
35+
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
36+
b.Cleanup(func() {
37+
require.NoError(b, mig.Close())
38+
})
39+
40+
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
41+
b.ResetTimer()
42+
43+
// Backfill
44+
b.StartTimer()
45+
require.NoError(b, mig.Start(ctx, &migAlterColumn))
46+
require.NoError(b, mig.Complete(ctx))
47+
b.StopTimer()
48+
b.Logf("Backfilled %d rows in %s", rowCount, b.Elapsed())
49+
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
50+
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
51+
})
52+
})
53+
}
54+
}
55+
56+
// Benchmark the difference between updating all rows with and without an update trigger in place
57+
func BenchmarkWriteAmplification(b *testing.B) {
58+
ctx := context.Background()
59+
testSchema := testutils.TestSchema()
60+
var opts []roll.Option
61+
62+
assertRowCount := func(tb testing.TB, db *sql.DB, rowCount int) {
63+
tb.Helper()
64+
65+
var count int
66+
err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM users WHERE name = 'person'").Scan(&count)
67+
require.NoError(b, err)
68+
require.Equal(b, rowCount, count)
69+
}
70+
71+
b.Run("NoTrigger", func(b *testing.B) {
72+
for _, rowCount := range rowCounts {
73+
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
74+
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
75+
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
76+
b.Cleanup(func() {
77+
require.NoError(b, mig.Close())
78+
assertRowCount(b, db, rowCount)
79+
})
80+
81+
b.ResetTimer()
82+
83+
// Update the name in all rows
84+
b.StartTimer()
85+
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
86+
require.NoError(b, err)
87+
b.StopTimer()
88+
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
89+
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
90+
})
91+
})
92+
}
93+
})
94+
95+
b.Run("WithTrigger", func(b *testing.B) {
96+
for _, rowCount := range rowCounts {
97+
b.Run(strconv.Itoa(rowCount), func(b *testing.B) {
98+
testutils.WithMigratorInSchemaAndConnectionToContainerWithOptions(b, testSchema, opts, func(mig *roll.Roll, db *sql.DB) {
99+
setupInitialTable(b, ctx, testSchema, mig, db, rowCount)
100+
101+
// Start the migration
102+
require.NoError(b, mig.Start(ctx, &migAlterColumn))
103+
b.Cleanup(func() {
104+
// Finish the migration
105+
require.NoError(b, mig.Complete(ctx))
106+
require.NoError(b, mig.Close())
107+
assertRowCount(b, db, rowCount)
108+
})
109+
110+
b.ResetTimer()
111+
112+
// Update the name in all rows
113+
b.StartTimer()
114+
_, err := db.ExecContext(ctx, `UPDATE users SET name = 'person'`)
115+
require.NoError(b, err)
116+
b.StopTimer()
117+
rowsPerSecond := float64(rowCount) / b.Elapsed().Seconds()
118+
b.ReportMetric(rowsPerSecond, unitRowsPerSecond)
119+
})
120+
})
121+
}
122+
})
123+
}
124+
125+
func setupInitialTable(tb testing.TB, ctx context.Context, testSchema string, mig *roll.Roll, db *sql.DB, rowCount int) {
126+
tb.Helper()
127+
128+
seed := func(tb testing.TB, rowCount int, db *sql.DB) {
129+
tx, err := db.Begin()
130+
require.NoError(tb, err)
131+
defer tx.Rollback()
132+
133+
stmt, err := tx.PrepareContext(ctx, pq.CopyInSchema(testSchema, "users", "name"))
134+
require.NoError(tb, err)
135+
136+
for i := 0; i < rowCount; i++ {
137+
_, err = stmt.ExecContext(ctx, nil)
138+
require.NoError(tb, err)
139+
}
140+
141+
_, err = stmt.ExecContext(ctx)
142+
require.NoError(tb, err)
143+
require.NoError(tb, tx.Commit())
144+
}
145+
146+
// Setup
147+
require.NoError(tb, mig.Start(ctx, &migCreateTable))
148+
require.NoError(tb, mig.Complete(ctx))
149+
seed(tb, rowCount, db)
150+
}
151+
152+
// Simple table with a nullable `name` field.
153+
var migCreateTable = migrations.Migration{
154+
Name: "01_create_table",
155+
Operations: migrations.Operations{
156+
&migrations.OpCreateTable{
157+
Name: "users",
158+
Columns: []migrations.Column{
159+
{
160+
Name: "id",
161+
Type: "serial",
162+
Pk: ptr(true),
163+
},
164+
{
165+
Name: "name",
166+
Type: "varchar(255)",
167+
Nullable: ptr(true),
168+
Unique: ptr(false),
169+
},
170+
},
171+
},
172+
},
173+
}
174+
175+
// Alter the table to make the name field not null and backfill the old name fields with
176+
// `placeholder`.
177+
var migAlterColumn = migrations.Migration{
178+
Name: "02_alter_column",
179+
Operations: migrations.Operations{
180+
&migrations.OpAlterColumn{
181+
Table: "users",
182+
Column: "name",
183+
Up: "(SELECT CASE WHEN name IS NULL THEN 'placeholder' ELSE name END)",
184+
Down: "user_name",
185+
Comment: nullable.NewNullableWithValue("the name of the user"),
186+
Nullable: ptr(false),
187+
},
188+
},
189+
}
190+
191+
func ptr[T any](x T) *T { return &x }

internal/testutils/util.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func WithUninitializedState(t *testing.T, fn func(*state.State)) {
130130
fn(st)
131131
}
132132

133-
func WithMigratorInSchemaAndConnectionToContainerWithOptions(t *testing.T, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
133+
func WithMigratorInSchemaAndConnectionToContainerWithOptions(t testing.TB, schema string, opts []roll.Option, fn func(mig *roll.Roll, db *sql.DB)) {
134134
t.Helper()
135135
ctx := context.Background()
136136

@@ -236,7 +236,7 @@ func WithMigratorAndConnectionToContainerWithOptions(t *testing.T, opts []roll.O
236236
// - a connection to the new database
237237
// - the connection string to the new database
238238
// - the name of the new database
239-
func setupTestDatabase(t *testing.T) (*sql.DB, string, string) {
239+
func setupTestDatabase(t testing.TB) (*sql.DB, string, string) {
240240
t.Helper()
241241
ctx := context.Background()
242242

0 commit comments

Comments
 (0)