Skip to content

Commit ce3d632

Browse files
authored
Add retries to kibana client (#1563)
Adding it by now only to kibana client because for elasticsearch client we rely on the Go SDK, that has its own retry mechanisms. As it is implemented, we can later consider using it in more places where an http client is used now.
1 parent b6013c1 commit ce3d632

File tree

5 files changed

+396
-13
lines changed

5 files changed

+396
-13
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ require (
2424
github.com/google/go-github/v32 v32.1.0
2525
github.com/google/go-querystring v1.1.0
2626
github.com/google/uuid v1.4.0
27+
github.com/hashicorp/go-retryablehttp v0.7.5
2728
github.com/jedib0t/go-pretty v4.3.0+incompatible
2829
github.com/magefile/mage v1.15.0
2930
github.com/mholt/archiver/v3 v3.5.1
@@ -93,6 +94,7 @@ require (
9394
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
9495
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
9596
github.com/hashicorp/errwrap v1.1.0 // indirect
97+
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
9698
github.com/hashicorp/go-multierror v1.1.1 // indirect
9799
github.com/huandu/xstrings v1.4.0 // indirect
98100
github.com/imdario/mergo v0.3.16 // indirect

go.sum

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,14 @@ github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:Fecb
301301
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
302302
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
303303
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
304+
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
305+
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
306+
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
307+
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
304308
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
305309
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
310+
github.com/hashicorp/go-retryablehttp v0.7.5 h1:bJj+Pj19UZMIweq/iie+1u5YCdGrnxCT9yvm0e+Nd5M=
311+
github.com/hashicorp/go-retryablehttp v0.7.5/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8=
306312
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
307313
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
308314
github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec h1:qv2VnGeEQHchGaZ/u7lxST/RaJw+cv273q79D81Xbog=

internal/kibana/client.go

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/elastic/elastic-package/internal/certs"
1919
"github.com/elastic/elastic-package/internal/install"
2020
"github.com/elastic/elastic-package/internal/logger"
21+
"github.com/elastic/elastic-package/internal/retry"
2122
)
2223

2324
var ErrUndefinedHost = errors.New("missing kibana host")
@@ -33,14 +34,19 @@ type Client struct {
3334

3435
versionInfo VersionInfo
3536
semver *semver.Version
37+
38+
retryMax int
39+
http *http.Client
3640
}
3741

3842
// ClientOption is functional option modifying Kibana client.
3943
type ClientOption func(*Client)
4044

4145
// NewClient creates a new instance of the client.
4246
func NewClient(opts ...ClientOption) (*Client, error) {
43-
c := &Client{}
47+
c := &Client{
48+
retryMax: 10,
49+
}
4450
for _, opt := range opts {
4551
opt(c)
4652
}
@@ -49,6 +55,12 @@ func NewClient(opts ...ClientOption) (*Client, error) {
4955
return nil, ErrUndefinedHost
5056
}
5157

58+
httpClient, err := c.newHttpClient()
59+
if err != nil {
60+
return nil, err
61+
}
62+
c.http = httpClient
63+
5264
// Allow to initialize version from tests.
5365
var zeroVersion VersionInfo
5466
if c.semver == nil || c.versionInfo == zeroVersion {
@@ -95,6 +107,13 @@ func Password(password string) ClientOption {
95107
}
96108
}
97109

110+
// RetryMax configures the number of retries before failing.
111+
func RetryMax(retryMax int) ClientOption {
112+
return func(c *Client) {
113+
c.retryMax = retryMax
114+
}
115+
}
116+
98117
// CertificateAuthority sets the certificate authority to be used by the client.
99118
func CertificateAuthority(certificateAuthority string) ClientOption {
100119
return func(c *Client) {
@@ -156,31 +175,42 @@ func (c *Client) newRequest(method, resourcePath string, reqBody io.Reader) (*ht
156175
}
157176

158177
func (c *Client) doRequest(request *http.Request) (int, []byte, error) {
159-
client := http.Client{}
178+
resp, err := c.http.Do(request)
179+
if err != nil {
180+
return 0, nil, fmt.Errorf("could not send request to Kibana API: %w", err)
181+
}
182+
183+
defer resp.Body.Close()
184+
body, err := io.ReadAll(resp.Body)
185+
if err != nil {
186+
return resp.StatusCode, nil, fmt.Errorf("could not read response body: %w", err)
187+
}
188+
189+
return resp.StatusCode, body, nil
190+
}
191+
192+
func (c *Client) newHttpClient() (*http.Client, error) {
193+
client := &http.Client{}
160194
if c.tlSkipVerify {
161195
client.Transport = &http.Transport{
162196
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
163197
}
164198
} else if c.certificateAuthority != "" {
165199
rootCAs, err := certs.SystemPoolWithCACertificate(c.certificateAuthority)
166200
if err != nil {
167-
return 0, nil, fmt.Errorf("reading CA certificate: %w", err)
201+
return nil, fmt.Errorf("reading CA certificate: %w", err)
168202
}
169203
client.Transport = &http.Transport{
170204
TLSClientConfig: &tls.Config{RootCAs: rootCAs},
171205
}
172206
}
173207

174-
resp, err := client.Do(request)
175-
if err != nil {
176-
return 0, nil, fmt.Errorf("could not send request to Kibana API: %w", err)
177-
}
178-
179-
defer resp.Body.Close()
180-
body, err := io.ReadAll(resp.Body)
181-
if err != nil {
182-
return resp.StatusCode, nil, fmt.Errorf("could not read response body: %w", err)
208+
if c.retryMax > 0 {
209+
opts := retry.HTTPOptions{
210+
RetryMax: c.retryMax,
211+
}
212+
client = retry.WrapHTTPClient(client, opts)
183213
}
184214

185-
return resp.StatusCode, body, nil
215+
return client, nil
186216
}

internal/retry/http.go

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package retry
6+
7+
import (
8+
"context"
9+
"crypto/tls"
10+
"crypto/x509"
11+
"errors"
12+
"fmt"
13+
"log/slog"
14+
"net/http"
15+
"net/url"
16+
"time"
17+
18+
"github.com/hashicorp/go-retryablehttp"
19+
)
20+
21+
const (
22+
defaultRetryWaitMin = 1 * time.Second
23+
defaultRetryWaitMax = 5 * time.Second
24+
)
25+
26+
type HTTPOptions struct {
27+
RetryMax int
28+
29+
retryWaitMin time.Duration
30+
retryWaitMax time.Duration
31+
}
32+
33+
func WrapHTTPClient(client *http.Client, opts HTTPOptions) *http.Client {
34+
if opts.RetryMax <= 0 {
35+
return client
36+
}
37+
retryWaitMin := opts.retryWaitMin
38+
if retryWaitMin == 0 {
39+
retryWaitMin = defaultRetryWaitMin
40+
}
41+
retryWaitMax := opts.retryWaitMax
42+
if retryWaitMax == 0 {
43+
retryWaitMax = defaultRetryWaitMax
44+
}
45+
46+
if client == nil {
47+
client = &http.Client{}
48+
}
49+
if client.CheckRedirect == nil {
50+
client.CheckRedirect = checkRedirect
51+
}
52+
retryClient := retryablehttp.NewClient()
53+
retryClient.HTTPClient = client
54+
retryClient.CheckRetry = checkRetry
55+
retryClient.ErrorHandler = retryablehttp.PassthroughErrorHandler
56+
retryClient.RetryMax = opts.RetryMax
57+
retryClient.RetryWaitMin = retryWaitMin
58+
retryClient.RetryWaitMax = retryWaitMax
59+
60+
// It needs to be a logger with support for attributes as key-value pairs.
61+
retryClient.Logger = slog.Default()
62+
return retryClient.StandardClient()
63+
}
64+
65+
var (
66+
maxRedirects = 10
67+
errTooManyRedirects = fmt.Errorf("stopped after %d redirects", maxRedirects)
68+
)
69+
70+
// checkRedirect reimplements default http redirect policy but returning a typed error.
71+
func checkRedirect(req *http.Request, via []*http.Request) error {
72+
if len(via) >= maxRedirects {
73+
return errTooManyRedirects
74+
}
75+
return nil
76+
}
77+
78+
// checkRetry reimplements retryablehttp.DefaultRetryPolicy with better error checking.
79+
func checkRetry(ctx context.Context, resp *http.Response, err error) (bool, error) {
80+
if ctx.Err() != nil {
81+
return false, ctx.Err()
82+
}
83+
84+
if err != nil {
85+
if errors.Is(err, errTooManyRedirects) {
86+
// Too many redirects, let's stop here.
87+
return false, nil
88+
}
89+
90+
var urlError *url.Error
91+
if errors.As(err, &urlError) {
92+
// URL is invalid, not recoverable.
93+
return false, nil
94+
}
95+
96+
var certVerificationError *tls.CertificateVerificationError
97+
if errors.As(err, &certVerificationError) {
98+
// Something failed while verifying certificates.
99+
return false, nil
100+
}
101+
102+
var certError *x509.CertificateInvalidError
103+
if errors.As(err, &certError) {
104+
// Invalid certificate, not recoverable.
105+
return false, nil
106+
}
107+
108+
var caError x509.UnknownAuthorityError
109+
if errors.As(err, &caError) {
110+
// Unknown CA, not recoverable.
111+
return false, nil
112+
}
113+
114+
// Consider other errors as recoverable and retry.
115+
return true, nil
116+
}
117+
118+
// 429 Too Many Requests is recoverable. Sometimes the server puts
119+
// a Retry-After response header to indicate when the server is
120+
// available to start processing request from client.
121+
if resp.StatusCode == http.StatusTooManyRequests {
122+
return true, nil
123+
}
124+
125+
// Check the response code. We retry on 500-range responses to allow
126+
// the server time to recover, as 500's are typically not permanent
127+
// errors and may relate to outages on the server side. This will catch
128+
// invalid response codes as well, like 0 and 999.
129+
if resp.StatusCode == 0 || (resp.StatusCode >= 500 && resp.StatusCode != http.StatusNotImplemented) {
130+
// Return the underlying error, that will probably be nil.
131+
// retryablehttp.DefaultRetryPolicy did generate an error for these cases,
132+
// but this is not what the default HTTP client does.
133+
return true, err
134+
}
135+
136+
return false, nil
137+
}

0 commit comments

Comments
 (0)