Skip to content

Commit b483728

Browse files
authored
Avoid encoding html entities in JSON documents (#1464)
JSON encoders in Go standard library encode HTML special characters by default, so the resulting JSON is safe to use in HTML documents. In sample documents and test results in packages we want the raw JSON, so we can see the actual content that would be ingested when using the integration.
1 parent 1680d75 commit b483728

File tree

8 files changed

+256
-38
lines changed

8 files changed

+256
-38
lines changed

internal/docs/readme_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111

1212
"github.com/stretchr/testify/assert"
1313
"github.com/stretchr/testify/require"
14+
15+
"github.com/elastic/elastic-package/internal/packages"
1416
)
1517

1618
func TestGenerateReadme(t *testing.T) {
@@ -152,6 +154,9 @@ An example event for ` + "`example`" + ` looks as following:
152154
err = createSampleEventFile(c.packageRoot, c.dataStreamName, c.sampleEventJsonContents)
153155
require.NoError(t, err)
154156

157+
err = createManifestFile(c.packageRoot)
158+
require.NoError(t, err)
159+
155160
rendered, err := renderReadme(filename, c.packageRoot, templatePath, linksMap)
156161
require.NoError(t, err)
157162

@@ -293,6 +298,13 @@ func createSampleEventFile(packageRoot, dataStreamName, contents string) error {
293298
return nil
294299
}
295300

301+
func createManifestFile(packageRoot string) error {
302+
// Minimal content needed to render readme.
303+
manifest := `format_version: 2.10.0`
304+
manifestFile := filepath.Join(packageRoot, packages.PackageManifestFile)
305+
return os.WriteFile(manifestFile, []byte(manifest), 0644)
306+
}
307+
296308
func createDataStreamFolder(packageRoot, dataStreamName string) (string, error) {
297309
if dataStreamName == "" {
298310
return "", nil

internal/docs/sample_event.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ import (
1010
"path/filepath"
1111
"strings"
1212

13+
"github.com/Masterminds/semver/v3"
14+
1315
"github.com/elastic/elastic-package/internal/formatter"
16+
"github.com/elastic/elastic-package/internal/packages"
1417
)
1518

1619
const sampleEventFile = "sample_event.json"
@@ -23,7 +26,17 @@ func renderSampleEvent(packageRoot, dataStreamName string) (string, error) {
2326
return "", fmt.Errorf("reading sample event file failed (path: %s): %w", eventPath, err)
2427
}
2528

26-
formatted, _, err := formatter.JSONFormatter(body)
29+
manifest, err := packages.ReadPackageManifestFromPackageRoot(packageRoot)
30+
if err != nil {
31+
return "", fmt.Errorf("reading package manifest failed: %w", err)
32+
}
33+
specVersion, err := semver.NewVersion(manifest.SpecVersion)
34+
if err != nil {
35+
return "", fmt.Errorf("parsing format version %q failed: %w", manifest.SpecVersion, err)
36+
}
37+
38+
jsonFormatter := formatter.JSONFormatterBuilder(*specVersion)
39+
formatted, _, err := jsonFormatter.Format(body)
2740
if err != nil {
2841
return "", fmt.Errorf("formatting sample event file failed (path: %s): %w", eventPath, err)
2942
}

internal/formatter/formatter.go

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,36 @@ import (
88
"fmt"
99
"os"
1010
"path/filepath"
11+
12+
"github.com/Masterminds/semver/v3"
13+
14+
"github.com/elastic/elastic-package/internal/packages"
1115
)
1216

1317
type formatter func(content []byte) ([]byte, bool, error)
1418

15-
var formatters = map[string]formatter{
16-
".json": JSONFormatter,
17-
".yaml": YAMLFormatter,
18-
".yml": YAMLFormatter,
19+
func newFormatter(specVersion semver.Version, ext string) formatter {
20+
switch ext {
21+
case ".json":
22+
return JSONFormatterBuilder(specVersion).Format
23+
case ".yaml", ".yml":
24+
return YAMLFormatter
25+
default:
26+
return nil
27+
}
1928
}
2029

2130
// Format method formats files inside of the integration directory.
2231
func Format(packageRoot string, failFast bool) error {
23-
err := filepath.Walk(packageRoot, func(path string, info os.FileInfo, err error) error {
32+
manifest, err := packages.ReadPackageManifestFromPackageRoot(packageRoot)
33+
if err != nil {
34+
return fmt.Errorf("failed to read package manifest: %w", err)
35+
}
36+
specVersion, err := semver.NewVersion(manifest.SpecVersion)
37+
if err != nil {
38+
return fmt.Errorf("failed to parse package format version %q: %w", manifest.SpecVersion, err)
39+
}
40+
err = filepath.Walk(packageRoot, func(path string, info os.FileInfo, err error) error {
2441
if err != nil {
2542
return err
2643
}
@@ -31,7 +48,7 @@ func Format(packageRoot string, failFast bool) error {
3148
if info.IsDir() {
3249
return nil
3350
}
34-
err = formatFile(path, failFast)
51+
err = formatFile(path, failFast, *specVersion)
3552
if err != nil {
3653
return fmt.Errorf("formatting file failed (path: %s): %w", path, err)
3754
}
@@ -44,17 +61,15 @@ func Format(packageRoot string, failFast bool) error {
4461
return nil
4562
}
4663

47-
func formatFile(path string, failFast bool) error {
48-
file := filepath.Base(path)
49-
ext := filepath.Ext(file)
50-
64+
func formatFile(path string, failFast bool, specVersion semver.Version) error {
5165
content, err := os.ReadFile(path)
5266
if err != nil {
5367
return fmt.Errorf("reading file content failed: %w", err)
5468
}
5569

56-
format, defined := formatters[ext]
57-
if !defined {
70+
ext := filepath.Ext(filepath.Base(path))
71+
format := newFormatter(specVersion, ext)
72+
if format == nil {
5873
return nil // no errors returned as we have few files that will be never formatted (png, svg, log, etc.)
5974
}
6075

internal/formatter/json_formatter.go

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,31 @@
55
package formatter
66

77
import (
8+
"bytes"
89
"encoding/json"
910
"fmt"
11+
12+
"github.com/Masterminds/semver/v3"
1013
)
1114

12-
// JSONFormatter function is responsible for formatting the given JSON input.
13-
// The function is exposed, so it can be used by other internal packages, e.g. to format sample events in docs.
14-
func JSONFormatter(content []byte) ([]byte, bool, error) {
15+
type JSONFormatter interface {
16+
Format([]byte) ([]byte, bool, error)
17+
Encode(doc any) ([]byte, error)
18+
}
19+
20+
func JSONFormatterBuilder(specVersion semver.Version) JSONFormatter {
21+
if specVersion.LessThan(semver.MustParse("2.12.0")) {
22+
return &jsonFormatterWithHTMLEncoding{}
23+
}
24+
25+
return &jsonFormatter{}
26+
}
27+
28+
// jsonFormatterWithHTMLEncoding function is responsible for formatting the given JSON input.
29+
// It encodes special HTML characters.
30+
type jsonFormatterWithHTMLEncoding struct{}
31+
32+
func (jsonFormatterWithHTMLEncoding) Format(content []byte) ([]byte, bool, error) {
1533
var rawMessage json.RawMessage
1634
err := json.Unmarshal(content, &rawMessage)
1735
if err != nil {
@@ -24,3 +42,35 @@ func JSONFormatter(content []byte) ([]byte, bool, error) {
2442
}
2543
return formatted, string(content) == string(formatted), nil
2644
}
45+
46+
func (jsonFormatterWithHTMLEncoding) Encode(doc any) ([]byte, error) {
47+
return json.MarshalIndent(doc, "", " ")
48+
}
49+
50+
// jsonFormatter function is responsible for formatting the given JSON input.
51+
type jsonFormatter struct{}
52+
53+
func (jsonFormatter) Format(content []byte) ([]byte, bool, error) {
54+
var formatted bytes.Buffer
55+
err := json.Indent(&formatted, content, "", " ")
56+
if err != nil {
57+
return nil, false, fmt.Errorf("formatting JSON document failed: %w", err)
58+
}
59+
60+
return formatted.Bytes(), bytes.Equal(content, formatted.Bytes()), nil
61+
}
62+
63+
func (jsonFormatter) Encode(doc any) ([]byte, error) {
64+
var formatted bytes.Buffer
65+
enc := json.NewEncoder(&formatted)
66+
enc.SetEscapeHTML(false)
67+
enc.SetIndent("", " ")
68+
69+
err := enc.Encode(doc)
70+
if err != nil {
71+
return nil, err
72+
}
73+
74+
// Trimming to be consistent with MarshalIndent, that seems to trim the result.
75+
return bytes.TrimSpace(formatted.Bytes()), nil
76+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package formatter_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/Masterminds/semver/v3"
11+
"github.com/stretchr/testify/assert"
12+
"github.com/stretchr/testify/require"
13+
14+
"github.com/elastic/elastic-package/internal/formatter"
15+
)
16+
17+
func TestJSONFormatterFormat(t *testing.T) {
18+
cases := []struct {
19+
title string
20+
version *semver.Version
21+
content string
22+
expected string
23+
valid bool
24+
}{
25+
{
26+
title: "invalid json 2.0",
27+
version: semver.MustParse("2.0.0"),
28+
content: `{"foo":}`,
29+
valid: false,
30+
},
31+
{
32+
title: "invalid json 3.0",
33+
version: semver.MustParse("3.0.0"),
34+
content: `{"foo":}`,
35+
valid: false,
36+
},
37+
{
38+
title: "encode html in old versions",
39+
version: semver.MustParse("2.0.0"),
40+
content: `{"a": "<script></script>"}`,
41+
expected: `{
42+
"a": "\u003cscript\u003e\u003c/script\u003e"
43+
}`,
44+
valid: true,
45+
},
46+
{
47+
title: "don't encode html since 2.12.0",
48+
version: semver.MustParse("2.12.0"),
49+
content: `{"a": "<script></script>"}`,
50+
expected: `{
51+
"a": "<script></script>"
52+
}`,
53+
valid: true,
54+
},
55+
}
56+
57+
for _, c := range cases {
58+
t.Run(c.title, func(t *testing.T) {
59+
jsonFormatter := formatter.JSONFormatterBuilder(*c.version)
60+
formatted, equal, err := jsonFormatter.Format([]byte(c.content))
61+
if !c.valid {
62+
assert.Error(t, err)
63+
return
64+
}
65+
require.NoError(t, err)
66+
67+
assert.Equal(t, c.expected, string(formatted))
68+
assert.Equal(t, c.content == c.expected, equal)
69+
})
70+
}
71+
}
72+
73+
func TestJSONFormatterEncode(t *testing.T) {
74+
cases := []struct {
75+
title string
76+
version *semver.Version
77+
object any
78+
expected string
79+
}{
80+
{
81+
title: "encode html in old versions",
82+
version: semver.MustParse("2.0.0"),
83+
object: map[string]any{"a": "<script></script>"},
84+
expected: `{
85+
"a": "\u003cscript\u003e\u003c/script\u003e"
86+
}`,
87+
},
88+
{
89+
title: "don't encode html since 2.12.0",
90+
version: semver.MustParse("2.12.0"),
91+
object: map[string]any{"a": "<script></script>"},
92+
expected: `{
93+
"a": "<script></script>"
94+
}`,
95+
},
96+
}
97+
98+
for _, c := range cases {
99+
t.Run(c.title, func(t *testing.T) {
100+
jsonFormatter := formatter.JSONFormatterBuilder(*c.version)
101+
formatted, err := jsonFormatter.Encode(c.object)
102+
require.NoError(t, err)
103+
assert.Equal(t, c.expected, string(formatted))
104+
})
105+
}
106+
}

internal/testrunner/runners/pipeline/runner.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"strings"
1515
"time"
1616

17+
"github.com/Masterminds/semver/v3"
1718
"gopkg.in/yaml.v3"
1819

1920
"github.com/elastic/elastic-package/internal/common"
@@ -283,10 +284,19 @@ func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
283284
func (r *runner) verifyResults(testCaseFile string, config *testConfig, result *testResult, fieldsValidator *fields.Validator) error {
284285
testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)
285286

287+
manifest, err := packages.ReadPackageManifestFromPackageRoot(r.options.PackageRootPath)
288+
if err != nil {
289+
return fmt.Errorf("failed to read package manifest: %w", err)
290+
}
291+
specVersion, err := semver.NewVersion(manifest.SpecVersion)
292+
if err != nil {
293+
return fmt.Errorf("failed to parse package format version %q: %w", manifest.SpecVersion, err)
294+
}
295+
286296
if r.options.GenerateTestResult {
287297
// TODO: Add tests to cover regressive use of json.Unmarshal in writeTestResult.
288298
// See https://github.com/elastic/elastic-package/pull/717.
289-
err := writeTestResult(testCasePath, result)
299+
err := writeTestResult(testCasePath, result, *specVersion)
290300
if err != nil {
291301
return fmt.Errorf("writing test result failed: %w", err)
292302
}
@@ -301,7 +311,7 @@ func (r *runner) verifyResults(testCaseFile string, config *testConfig, result *
301311
if stackConfig.Provider == stack.ProviderServerless {
302312
skipGeoIP = true
303313
}
304-
err = compareResults(testCasePath, config, result, skipGeoIP)
314+
err = compareResults(testCasePath, config, result, skipGeoIP, *specVersion)
305315
if _, ok := err.(testrunner.ErrTestCaseFailed); ok {
306316
return err
307317
}

0 commit comments

Comments
 (0)