Skip to content

Commit 6184b08

Browse files
authored
Cache search better in UniqueAttributeValueConstraint (#924)
If a document happened to have a large number of elements that have the UniqueAttributeValueConstraint validation, it will end up recalculating the values for the constraint way too often. This was because the constraint was generating the cached lookup with a key using the attribute text itself. This change updates the lookup to cache all possible duplicates for the element in question so it only has to be searched once.
1 parent 537a280 commit 6184b08

File tree

11 files changed

+134
-45
lines changed

11 files changed

+134
-45
lines changed

CHANGELOG.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
66

77
## Unreleased
88

9-
### Deprecated
10-
- Deprecated Office2013.Word.Person.Contact property. It no longer persists and will be removed in a future version (#912)
9+
### Fixed
10+
- Fixed massive performance bottleneck when `UniqueAttributeValueConstraint` is involved (#924)
1111

1212
Release Notes:
1313
## Version 2.13.0-beta2 - 2021-04-20
1414

1515
### Added
1616
- Additional O19 types to match Open Specifications (#916)
1717

18+
### Deprecated
19+
- Deprecated Office2013.Word.Person.Contact property. It no longer persists and will be removed in a future version (#912)
20+
1821
## Version 2.13.0-beta1 - 2021-03-09
1922

2023
### Added

global.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"sdk": {
33
"version": "5.0.101",
4-
"rollForward": "feature"
4+
"rollForward": "latestFeature"
55
}
66
}

src/DocumentFormat.OpenXml/OpenXmlElement.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,11 @@ internal virtual void ConfigureMetadata(ElementMetadata.Builder builder)
195195
{
196196
}
197197

198-
private protected void SetAttribute<TSimpleType>(TSimpleType? value, [CallerMemberName] string propertyName = null)
198+
private protected void SetAttribute<TSimpleType>(TSimpleType? value, [CallerMemberName] string propertyName = null!)
199199
where TSimpleType : OpenXmlSimpleType
200200
=> ParsedState.Attributes.GetProperty(propertyName).Value = value;
201201

202-
private protected TSimpleType? GetAttribute<TSimpleType>([CallerMemberName] string propertyName = null)
202+
private protected TSimpleType? GetAttribute<TSimpleType>([CallerMemberName] string propertyName = null!)
203203
where TSimpleType : OpenXmlSimpleType
204204
=> ParsedState.Attributes.GetProperty(propertyName).Value as TSimpleType;
205205

src/DocumentFormat.OpenXml/SimpleTypes/EnumInfoLookup.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ public EnumStringLookupImpl()
122122
{
123123
var field = enumType.GetDeclaredField(enumVal.ToString()!);
124124
var enumString = field!.GetCustomAttribute<EnumStringAttribute>();
125+
126+
if (field is null)
127+
{
128+
return;
129+
}
130+
125131
var officeAvailability = field.GetCustomAttribute<OfficeAvailabilityAttribute>();
126132

127133
if (enumString is null)

src/DocumentFormat.OpenXml/Validation/Semantic/IndexReferenceConstraint.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,13 @@ private PartHolder<int> GetRefElementCount(ValidationContext context)
8282
return new PartHolder<int>(0, null);
8383
}
8484

85-
var result = context.State.Get(new { part.Uri, _refElement, _attribute, _refElementParent }, () =>
85+
var result = context.State.GetOrCreate(new { part, constraint = this }, static (key, context) =>
8686
{
8787
var count = 0;
8888

89-
foreach (var element in part.RootElement.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
89+
foreach (var element in key.part.RootElement.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
9090
{
91-
if (_refElementParent is null || element.Parent?.GetType() == _refElementParent)
91+
if (key.constraint._refElementParent is null || element.Parent?.GetType() == key.constraint._refElementParent)
9292
{
9393
count++;
9494
}

src/DocumentFormat.OpenXml/Validation/Semantic/ReferenceExistConstraint.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,15 @@ private PartHolder<ICollection<string>> GetReferencedAttributes(ValidationContex
8282
return new PartHolder<ICollection<string>>(Cached.Array<string>(), part);
8383
}
8484

85-
var result = context.State.Get(new { part.Uri, _partPath, _element, _attribute }, () =>
85+
var result = context.State.GetOrCreate(new { part, constraint = this }, static (key, context) =>
8686
{
8787
var referencedAttributes = new HashSet<string>(StringComparer.Ordinal);
8888

89-
foreach (var element in part.RootElement.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
89+
foreach (var element in key.part.RootElement.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
9090
{
91-
if (element.GetType() == _element)
91+
if (element.GetType() == key.constraint._element)
9292
{
93-
var attribute = element.ParsedState.Attributes[_attribute];
93+
var attribute = element.ParsedState.Attributes[key.constraint._attribute];
9494

9595
//Attributes whose value is empty string or null don't need to be cached.
9696
if (attribute.Value is not null && !attribute.Value.InnerText.IsNullOrEmpty())

src/DocumentFormat.OpenXml/Validation/Semantic/UniqueAttributeValueConstraint.cs

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
33

44
using System;
5+
using System.Collections.Generic;
56

67
namespace DocumentFormat.OpenXml.Validation.Semantic
78
{
@@ -37,51 +38,46 @@ public UniqueAttributeValueConstraint(byte attribute, bool caseSensitive, Type?
3738
}
3839

3940
var attribute = element.ParsedState.Attributes[_attribute];
40-
var elementType = element.GetType();
4141

4242
//if the attribute is omitted, semantic validation will do nothing
4343
if (attribute.Value is null || string.IsNullOrEmpty(attribute.Value.InnerText))
4444
{
4545
return null;
4646
}
4747

48-
var part = element.GetPart();
4948
var root = GetRoot(element);
5049

5150
if (root is null)
5251
{
5352
return null;
5453
}
5554

56-
if (part is null)
57-
{
58-
return null;
59-
}
60-
61-
var attributeText = attribute.Value.InnerText;
62-
63-
var added = false;
64-
var isDuplicate = context.State.Get(new { part.Uri, elementType, _parent, attributeText, _attribute, _comparer }, () =>
55+
var elementType = element.GetType();
56+
var textValues = context.State.GetOrCreate(new { elementType, root, constraint = this }, static (key, context) =>
6557
{
66-
added = true;
58+
var set = new DuplicateFinder(key.constraint._comparer);
6759

68-
foreach (var e in root.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
60+
foreach (var e in key.root.Descendants(context.FileFormat, TraversalOptions.SelectAlternateContent))
6961
{
70-
if (e != element && e.GetType() == elementType)
62+
if (e.GetType() == key.elementType)
7163
{
72-
var eValue = e.ParsedState.Attributes[_attribute];
64+
var eValue = e.ParsedState.Attributes[key.constraint._attribute];
7365

74-
if (eValue.Value is not null && _comparer.Equals(attributeText, eValue.Value.InnerText))
66+
if (eValue.Value is not null)
7567
{
76-
return true;
68+
set.Add(eValue.Value.InnerText);
7769
}
7870
}
7971
}
8072

81-
return false;
73+
set.Complete();
74+
75+
return set;
8276
});
8377

84-
if (!isDuplicate || !added)
78+
var isDuplicate = textValues.IsDuplicate(attribute.Value.InnerText);
79+
80+
if (!isDuplicate)
8581
{
8682
return null;
8783
}
@@ -115,5 +111,67 @@ public UniqueAttributeValueConstraint(byte attribute, bool caseSensitive, Type?
115111

116112
return null;
117113
}
114+
115+
private class DuplicateFinder
116+
{
117+
private readonly StringComparer _comparer;
118+
119+
private bool _isCompleted;
120+
private HashSet<string?>? _set;
121+
private HashSet<string?>? _duplicate;
122+
123+
public DuplicateFinder(StringComparer comparer)
124+
{
125+
_comparer = comparer;
126+
}
127+
128+
/// <summary>
129+
/// Add a text value and track whether it has been seen before or not.
130+
/// </summary>
131+
public void Add(string? text)
132+
{
133+
if (_isCompleted)
134+
{
135+
throw new InvalidOperationException();
136+
}
137+
138+
if (_set is null)
139+
{
140+
_set = new HashSet<string?>(_comparer);
141+
}
142+
143+
if (!_set.Add(text))
144+
{
145+
if (_duplicate is null)
146+
{
147+
_duplicate = new HashSet<string?>(_comparer);
148+
}
149+
150+
_duplicate.Add(text);
151+
}
152+
}
153+
154+
/// <summary>
155+
/// Clear the tracking set to free up space
156+
/// </summary>
157+
public void Complete()
158+
{
159+
_isCompleted = true;
160+
_set = null;
161+
}
162+
163+
/// <summary>
164+
/// Checks if a duplicate was detected. Once a duplicate is checked, subsequent calls will result in <c>false</c> so we only raise the error once.
165+
/// </summary>
166+
public bool IsDuplicate(string? text)
167+
{
168+
if (_duplicate is null)
169+
{
170+
return false;
171+
}
172+
173+
return _duplicate.Remove(text);
174+
}
175+
}
118176
}
119177
}

src/DocumentFormat.OpenXml/Validation/StateManager.cs

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,46 @@ namespace DocumentFormat.OpenXml.Validation
88
{
99
internal class StateManager
1010
{
11+
private readonly ValidationContext _context;
12+
1113
private Dictionary<object, object>? _state;
1214

13-
public T Get<T>(object key, Func<T> factory)
14-
where T : notnull
15+
public StateManager(ValidationContext context)
16+
{
17+
_context = context;
18+
}
19+
20+
/// <summary>
21+
/// Method to get or create a cached value. To minimize allocations, the key should track everything that is
22+
/// required to generate the item in the factory. If so, then a static lambda can be used to ensure nothing
23+
/// else is required and that the key will be correct.
24+
/// </summary>
25+
/// <typeparam name="TValue">Type of the value produced.</typeparam>
26+
/// <typeparam name="TKey">Type of the key provided.</typeparam>
27+
/// <param name="key">Provided key that should identify the cached value uniquely.</param>
28+
/// <param name="factory">A factory method to create the value.</param>
29+
/// <returns>The created or cached value.</returns>
30+
public TValue GetOrCreate<TValue, TKey>(TKey key, Func<TKey, ValidationContext, TValue> factory)
31+
where TValue : notnull
32+
where TKey : notnull
1533
{
1634
if (_state is null)
1735
{
1836
_state = new Dictionary<object, object>();
1937
}
2038
else if (_state.TryGetValue(key, out var value))
2139
{
22-
if (value is T t)
40+
if (value is TValue t)
2341
{
2442
return t;
2543
}
2644
else
2745
{
28-
throw new InvalidOperationException(SR.Format("Value of incorrect type: '{0}'. Expecting '{1}'", value.GetType(), typeof(T)));
46+
throw new InvalidOperationException(SR.Format("Value of incorrect type: '{0}'. Expecting '{1}'", value.GetType(), typeof(TValue)));
2947
}
3048
}
3149

32-
var result = factory();
50+
var result = factory(key, _context);
3351

3452
_state.Add(key, result);
3553

src/DocumentFormat.OpenXml/Validation/ValidationContext.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public ValidationContext(ValidationSettings settings, ValidationCache cache, Can
3232
McContext = new MCContext(false);
3333

3434
Stack = new ValidationStack();
35+
State = new StateManager(this);
3536

3637
Stack.Push(Errors.Add);
3738
}
@@ -63,7 +64,7 @@ public bool CheckIfCancelled()
6364

6465
public void Clear() => Errors.Clear();
6566

66-
public StateManager State { get; } = new StateManager();
67+
public StateManager State { get; }
6768

6869
/// <summary>
6970
/// Gets used to track MC context.

test/DocumentFormat.OpenXml.Tests/TestDocx01.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -753,10 +753,13 @@ public void W015_InsertBeforeSelf()
753753
firstPara.InsertBeforeSelf(newPara);
754754

755755
var v = new OpenXmlValidator(FileFormatVersions.Office2013);
756-
var errs = v.Validate(doc);
757-
var cnt = errs.Count();
758756

759-
Assert.Single(v.Validate(doc));
757+
Assert.Collection(
758+
v.Validate(doc),
759+
e =>
760+
{
761+
Assert.Equal("Sem_UniqueAttributeValue", e.Id);
762+
});
760763
}
761764
}
762765

0 commit comments

Comments
 (0)