Skip to content

Commit 73253df

Browse files
authored
fix: use separate HTTP clients in scale test load generators (#21288)
While scale testing, I noticed that our load generators send basically all requests to a single Coderd instance. e.g. ![image.png](https://app.graphite.com/user-attachments/assets/e259862a-adf1-47e7-a37b-fd14e420058e.png) This is because our scale test commands create all `Runner`s using the same codersdk Client, which means they share an underlying HTTP client. With HTTP/2 a single TCP session can multiplex many different HTTP requests (including websockets). So, it creates a single TCP connection to a single coderd, and then sends all the requests down the one TCP connections. This PR modifies the `exp scaletest` load generator commands to create an independent HTTP client per `Runner`. This means that each runner will create its own TCP connection. This should help spread the load and make a more realistic test, because in a real deployment, scaled out load will be coming over different TCP connections.
1 parent a9d8b12 commit 73253df

File tree

7 files changed

+169
-78
lines changed

7 files changed

+169
-78
lines changed

cli/exp_scaletest.go

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ import (
4848

4949
const scaletestTracerName = "coder_scaletest"
5050

51+
var BypassHeader = map[string][]string{codersdk.BypassRatelimitHeader: {"true"}}
52+
5153
func (r *RootCmd) scaletestCmd() *serpent.Command {
5254
cmd := &serpent.Command{
5355
Use: "scaletest",
@@ -691,15 +693,6 @@ func (r *RootCmd) scaletestCreateWorkspaces() *serpent.Command {
691693
return err
692694
}
693695

694-
client.HTTPClient = &http.Client{
695-
Transport: &codersdk.HeaderTransport{
696-
Transport: http.DefaultTransport,
697-
Header: map[string][]string{
698-
codersdk.BypassRatelimitHeader: {"true"},
699-
},
700-
},
701-
}
702-
703696
if count <= 0 {
704697
return xerrors.Errorf("--count is required and must be greater than 0")
705698
}
@@ -811,7 +804,13 @@ func (r *RootCmd) scaletestCreateWorkspaces() *serpent.Command {
811804
return xerrors.Errorf("validate config: %w", err)
812805
}
813806

814-
var runner harness.Runnable = createworkspaces.NewRunner(client, config)
807+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
808+
// requests being unbalanced among Coder instances.
809+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
810+
if err != nil {
811+
return xerrors.Errorf("create runner client: %w", err)
812+
}
813+
var runner harness.Runnable = createworkspaces.NewRunner(runnerClient, config)
815814
if tracingEnabled {
816815
runner = &runnableTraceWrapper{
817816
tracer: tracer,
@@ -1019,15 +1018,6 @@ func (r *RootCmd) scaletestWorkspaceUpdates() *serpent.Command {
10191018
return err
10201019
}
10211020

1022-
client.HTTPClient = &http.Client{
1023-
Transport: &codersdk.HeaderTransport{
1024-
Transport: http.DefaultTransport,
1025-
Header: map[string][]string{
1026-
codersdk.BypassRatelimitHeader: {"true"},
1027-
},
1028-
},
1029-
}
1030-
10311021
if workspaceCount <= 0 {
10321022
return xerrors.Errorf("--workspace-count must be greater than 0")
10331023
}
@@ -1166,7 +1156,14 @@ func (r *RootCmd) scaletestWorkspaceUpdates() *serpent.Command {
11661156
for i, config := range configs {
11671157
name := fmt.Sprintf("workspaceupdates-%dw", config.WorkspaceCount)
11681158
id := strconv.Itoa(i)
1169-
var runner harness.Runnable = workspaceupdates.NewRunner(client, config)
1159+
1160+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
1161+
// requests being unbalanced among Coder instances.
1162+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
1163+
if err != nil {
1164+
return xerrors.Errorf("create runner client: %w", err)
1165+
}
1166+
var runner harness.Runnable = workspaceupdates.NewRunner(runnerClient, config)
11701167
if tracingEnabled {
11711168
runner = &runnableTraceWrapper{
11721169
tracer: tracer,
@@ -1323,16 +1320,6 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *serpent.Command {
13231320
prometheusSrvClose := ServeHandler(ctx, logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), prometheusFlags.Address, "prometheus")
13241321
defer prometheusSrvClose()
13251322

1326-
// Bypass rate limiting
1327-
client.HTTPClient = &http.Client{
1328-
Transport: &codersdk.HeaderTransport{
1329-
Transport: http.DefaultTransport,
1330-
Header: map[string][]string{
1331-
codersdk.BypassRatelimitHeader: {"true"},
1332-
},
1333-
},
1334-
}
1335-
13361323
workspaces, err := targetFlags.getTargetedWorkspaces(ctx, client, me.OrganizationIDs, inv.Stdout)
13371324
if err != nil {
13381325
return err
@@ -1429,7 +1416,13 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *serpent.Command {
14291416
if err := config.Validate(); err != nil {
14301417
return xerrors.Errorf("validate config: %w", err)
14311418
}
1432-
var runner harness.Runnable = workspacetraffic.NewRunner(client, config)
1419+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
1420+
// requests being unbalanced among Coder instances.
1421+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
1422+
if err != nil {
1423+
return xerrors.Errorf("create runner client: %w", err)
1424+
}
1425+
var runner harness.Runnable = workspacetraffic.NewRunner(runnerClient, config)
14331426
if tracingEnabled {
14341427
runner = &runnableTraceWrapper{
14351428
tracer: tracer,
@@ -1617,9 +1610,13 @@ func (r *RootCmd) scaletestDashboard() *serpent.Command {
16171610
return xerrors.Errorf("create token for user: %w", err)
16181611
}
16191612

1620-
userClient := codersdk.New(client.URL,
1621-
codersdk.WithSessionToken(userTokResp.Key),
1622-
)
1613+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
1614+
// requests being unbalanced among Coder instances.
1615+
userClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
1616+
if err != nil {
1617+
return xerrors.Errorf("create runner client: %w", err)
1618+
}
1619+
codersdk.WithSessionToken(userTokResp.Key)(userClient)
16231620

16241621
config := dashboard.Config{
16251622
Interval: interval,
@@ -1766,15 +1763,6 @@ func (r *RootCmd) scaletestAutostart() *serpent.Command {
17661763
return err
17671764
}
17681765

1769-
client.HTTPClient = &http.Client{
1770-
Transport: &codersdk.HeaderTransport{
1771-
Transport: http.DefaultTransport,
1772-
Header: map[string][]string{
1773-
codersdk.BypassRatelimitHeader: {"true"},
1774-
},
1775-
},
1776-
}
1777-
17781766
if workspaceCount <= 0 {
17791767
return xerrors.Errorf("--workspace-count must be greater than zero")
17801768
}
@@ -1840,7 +1828,13 @@ func (r *RootCmd) scaletestAutostart() *serpent.Command {
18401828
if err := config.Validate(); err != nil {
18411829
return xerrors.Errorf("validate config: %w", err)
18421830
}
1843-
var runner harness.Runnable = autostart.NewRunner(client, config)
1831+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
1832+
// requests being unbalanced among Coder instances.
1833+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
1834+
if err != nil {
1835+
return xerrors.Errorf("create runner client: %w", err)
1836+
}
1837+
var runner harness.Runnable = autostart.NewRunner(runnerClient, config)
18441838
if tracingEnabled {
18451839
runner = &runnableTraceWrapper{
18461840
tracer: tracer,

cli/exp_scaletest_dynamicparameters.go

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@ package cli
44

55
import (
66
"fmt"
7-
"net/http"
87
"time"
98

109
"github.com/prometheus/client_golang/prometheus"
1110
"github.com/prometheus/client_golang/prometheus/promhttp"
1211
"golang.org/x/xerrors"
1312

13+
"github.com/coder/coder/v2/scaletest/loadtestutil"
14+
1415
"cdr.dev/slog"
1516
"cdr.dev/slog/sloggers/sloghuman"
1617
"github.com/coder/serpent"
1718

18-
"github.com/coder/coder/v2/codersdk"
1919
"github.com/coder/coder/v2/scaletest/dynamicparameters"
2020
"github.com/coder/coder/v2/scaletest/harness"
2121
)
@@ -72,15 +72,6 @@ func (r *RootCmd) scaletestDynamicParameters() *serpent.Command {
7272
return err
7373
}
7474

75-
client.HTTPClient = &http.Client{
76-
Transport: &codersdk.HeaderTransport{
77-
Transport: http.DefaultTransport,
78-
Header: map[string][]string{
79-
codersdk.BypassRatelimitHeader: {"true"},
80-
},
81-
},
82-
}
83-
8475
reg := prometheus.NewRegistry()
8576
metrics := dynamicparameters.NewMetrics(reg, "concurrent_evaluations")
8677

@@ -122,7 +113,13 @@ func (r *RootCmd) scaletestDynamicParameters() *serpent.Command {
122113
Metrics: metrics,
123114
MetricLabelValues: []string{fmt.Sprintf("%d", part.ConcurrentEvaluations)},
124115
}
125-
var runner harness.Runnable = dynamicparameters.NewRunner(client, cfg)
116+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
117+
// requests being unbalanced among Coder instances.
118+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
119+
if err != nil {
120+
return xerrors.Errorf("create runner client: %w", err)
121+
}
122+
var runner harness.Runnable = dynamicparameters.NewRunner(runnerClient, cfg)
126123
if tracingEnabled {
127124
runner = &runnableTraceWrapper{
128125
tracer: tracer,

cli/exp_scaletest_notifications.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ import (
1818
"github.com/prometheus/client_golang/prometheus/promhttp"
1919
"golang.org/x/xerrors"
2020

21+
"github.com/coder/coder/v2/scaletest/loadtestutil"
22+
2123
"cdr.dev/slog"
2224

2325
notificationsLib "github.com/coder/coder/v2/coderd/notifications"
@@ -66,15 +68,6 @@ func (r *RootCmd) scaletestNotifications() *serpent.Command {
6668
return err
6769
}
6870

69-
client.HTTPClient = &http.Client{
70-
Transport: &codersdk.HeaderTransport{
71-
Transport: http.DefaultTransport,
72-
Header: map[string][]string{
73-
codersdk.BypassRatelimitHeader: {"true"},
74-
},
75-
},
76-
}
77-
7871
if userCount <= 0 {
7972
return xerrors.Errorf("--user-count must be greater than 0")
8073
}
@@ -206,7 +199,13 @@ func (r *RootCmd) scaletestNotifications() *serpent.Command {
206199
for i, config := range configs {
207200
id := strconv.Itoa(i)
208201
name := fmt.Sprintf("notifications-%s", id)
209-
var runner harness.Runnable = notifications.NewRunner(client, config)
202+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
203+
// requests being unbalanced among Coder instances.
204+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
205+
if err != nil {
206+
return xerrors.Errorf("create runner client: %w", err)
207+
}
208+
var runner harness.Runnable = notifications.NewRunner(runnerClient, config)
210209
if tracingEnabled {
211210
runner = &runnableTraceWrapper{
212211
tracer: tracer,

cli/exp_scaletest_prebuilds.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package cli
44

55
import (
66
"fmt"
7-
"net/http"
87
"os/signal"
98
"strconv"
109
"sync"
@@ -14,6 +13,8 @@ import (
1413
"github.com/prometheus/client_golang/prometheus/promhttp"
1514
"golang.org/x/xerrors"
1615

16+
"github.com/coder/coder/v2/scaletest/loadtestutil"
17+
1718
"github.com/coder/coder/v2/codersdk"
1819
"github.com/coder/coder/v2/scaletest/harness"
1920
"github.com/coder/coder/v2/scaletest/prebuilds"
@@ -56,15 +57,6 @@ func (r *RootCmd) scaletestPrebuilds() *serpent.Command {
5657
return err
5758
}
5859

59-
client.HTTPClient = &http.Client{
60-
Transport: &codersdk.HeaderTransport{
61-
Transport: http.DefaultTransport,
62-
Header: map[string][]string{
63-
codersdk.BypassRatelimitHeader: {"true"},
64-
},
65-
},
66-
}
67-
6860
if numTemplates <= 0 {
6961
return xerrors.Errorf("--num-templates must be greater than 0")
7062
}
@@ -140,7 +132,13 @@ func (r *RootCmd) scaletestPrebuilds() *serpent.Command {
140132
return xerrors.Errorf("validate config: %w", err)
141133
}
142134

143-
var runner harness.Runnable = prebuilds.NewRunner(client, cfg)
135+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
136+
// requests being unbalanced among Coder instances.
137+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
138+
if err != nil {
139+
return xerrors.Errorf("create runner client: %w", err)
140+
}
141+
var runner harness.Runnable = prebuilds.NewRunner(runnerClient, cfg)
144142
if tracingEnabled {
145143
runner = &runnableTraceWrapper{
146144
tracer: tracer,

cli/exp_scaletest_taskstatus.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"github.com/prometheus/client_golang/prometheus/promhttp"
1515
"golang.org/x/xerrors"
1616

17+
"github.com/coder/coder/v2/scaletest/loadtestutil"
18+
1719
"cdr.dev/slog"
1820
"cdr.dev/slog/sloggers/sloghuman"
1921
"github.com/coder/serpent"
@@ -143,7 +145,13 @@ After all runners connect, it waits for the baseline duration before triggering
143145
return xerrors.Errorf("validate config for runner %d: %w", i, err)
144146
}
145147

146-
var runner harness.Runnable = taskstatus.NewRunner(client, cfg)
148+
// use an independent client for each Runner, so they don't reuse TCP connections. This can lead to
149+
// requests being unbalanced among Coder instances.
150+
runnerClient, err := loadtestutil.DupClientCopyingHeaders(client, BypassHeader)
151+
if err != nil {
152+
return xerrors.Errorf("create runner client: %w", err)
153+
}
154+
var runner harness.Runnable = taskstatus.NewRunner(runnerClient, cfg)
147155
if tracingEnabled {
148156
runner = &runnableTraceWrapper{
149157
tracer: tracer,

scaletest/loadtestutil/client.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package loadtestutil
2+
3+
import (
4+
"maps"
5+
"net/http"
6+
7+
"golang.org/x/xerrors"
8+
9+
"github.com/coder/coder/v2/codersdk"
10+
)
11+
12+
// DupClientCopyingHeaders duplicates the Client, but with an independent underlying HTTP transport, so that it will not
13+
// share connections with the client being duplicated. It copies any headers already on the existing transport as
14+
// [codersdk.HeaderTransport] and add the headers in the argument.
15+
func DupClientCopyingHeaders(client *codersdk.Client, header http.Header) (*codersdk.Client, error) {
16+
nc := codersdk.New(client.URL, codersdk.WithLogger(client.Logger()))
17+
nc.SessionTokenProvider = client.SessionTokenProvider
18+
newHeader, t, err := extractHeaderAndInnerTransport(client.HTTPClient.Transport)
19+
if err != nil {
20+
return nil, xerrors.Errorf("extract headers: %w", err)
21+
}
22+
maps.Copy(newHeader, header)
23+
24+
nc.HTTPClient.Transport = &codersdk.HeaderTransport{
25+
Transport: t.Clone(),
26+
Header: newHeader,
27+
}
28+
return nc, nil
29+
}
30+
31+
func extractHeaderAndInnerTransport(rt http.RoundTripper) (http.Header, *http.Transport, error) {
32+
if t, ok := rt.(*http.Transport); ok {
33+
// base case
34+
return make(http.Header), t, nil
35+
}
36+
if ht, ok := rt.(*codersdk.HeaderTransport); ok {
37+
headers, t, err := extractHeaderAndInnerTransport(ht.Transport)
38+
if err != nil {
39+
return nil, nil, err
40+
}
41+
maps.Copy(headers, ht.Header)
42+
return headers, t, nil
43+
}
44+
return nil, nil, xerrors.New("round tripper is neither HeaderTransport nor Transport")
45+
}

0 commit comments

Comments
 (0)