4
4
"context"
5
5
"fmt"
6
6
"sync"
7
+ "sync/atomic"
7
8
"time"
8
9
9
10
"github.com/uber/cadence/client/sharddistributorexecutor"
@@ -14,7 +15,7 @@ import (
14
15
"github.com/uber/cadence/service/sharddistributor/executorclient/syncgeneric"
15
16
)
16
17
17
- type processorState int
18
+ type processorState int32
18
19
19
20
const (
20
21
processorStateStarting processorState = iota
@@ -24,7 +25,25 @@ const (
24
25
25
26
type managedProcessor [SP ShardProcessor ] struct {
26
27
processor SP
27
- state processorState
28
+ state atomic.Int32
29
+ }
30
+
31
+ func (mp * managedProcessor [SP ]) setState (state processorState ) {
32
+ mp .state .Store (int32 (state ))
33
+ }
34
+
35
+ func (mp * managedProcessor [SP ]) getState () processorState {
36
+ return processorState (mp .state .Load ())
37
+ }
38
+
39
+ func newManagedProcessor [SP ShardProcessor ](processor SP , state processorState ) * managedProcessor [SP ] {
40
+ managed := & managedProcessor [SP ]{
41
+ processor : processor ,
42
+ state : atomic.Int32 {},
43
+ }
44
+
45
+ managed .setState (state )
46
+ return managed
28
47
}
29
48
30
49
type executorImpl [SP ShardProcessor ] struct {
@@ -37,14 +56,21 @@ type executorImpl[SP ShardProcessor] struct {
37
56
managedProcessors syncgeneric.Map [string , * managedProcessor [SP ]]
38
57
executorID string
39
58
timeSource clock.TimeSource
59
+ processLoopWG sync.WaitGroup
60
+ assignmentMutex sync.Mutex
40
61
}
41
62
42
63
func (e * executorImpl [SP ]) Start (ctx context.Context ) {
43
- go e .heartbeatloop (ctx )
64
+ e .processLoopWG .Add (1 )
65
+ go func () {
66
+ defer e .processLoopWG .Done ()
67
+ e .heartbeatloop (ctx )
68
+ }()
44
69
}
45
70
46
71
func (e * executorImpl [SP ]) Stop () {
47
72
close (e .stopC )
73
+ e .processLoopWG .Wait ()
48
74
}
49
75
50
76
func (e * executorImpl [SP ]) GetShardProcess (shardID string ) (SP , error ) {
@@ -63,6 +89,9 @@ func (e *executorImpl[SP]) heartbeatloop(ctx context.Context) {
63
89
for {
64
90
select {
65
91
case <- ctx .Done ():
92
+ e .logger .Info ("shard distributorexecutor context done, stopping" )
93
+ e .stopShardProcessors ()
94
+ return
66
95
case <- e .stopC :
67
96
e .logger .Info ("shard distributorexecutor stopped" )
68
97
e .stopShardProcessors ()
@@ -73,7 +102,14 @@ func (e *executorImpl[SP]) heartbeatloop(ctx context.Context) {
73
102
e .logger .Error ("failed to heartbeat" , tag .Error (err ))
74
103
continue // TODO: should we stop the executor, and drop all the shards?
75
104
}
76
- e .updateShardAssignment (ctx , shardAssignment )
105
+ if ! e .assignmentMutex .TryLock () {
106
+ e .logger .Warn ("already doing shard assignment, will skip this assignment" )
107
+ continue
108
+ }
109
+ go func () {
110
+ defer e .assignmentMutex .Unlock ()
111
+ e .updateShardAssignment (ctx , shardAssignment )
112
+ }()
77
113
}
78
114
}
79
115
}
@@ -82,7 +118,7 @@ func (e *executorImpl[SP]) heartbeat(ctx context.Context) (shardAssignments map[
82
118
// Fill in the shard status reports
83
119
shardStatusReports := make (map [string ]* types.ShardStatusReport )
84
120
e .managedProcessors .Range (func (shardID string , managedProcessor * managedProcessor [SP ]) bool {
85
- if managedProcessor .state == processorStateStarted {
121
+ if managedProcessor .getState () == processorStateStarted {
86
122
shardStatusReports [shardID ] = & types.ShardStatusReport {
87
123
ShardLoad : managedProcessor .processor .GetShardLoad (),
88
124
Status : types .ShardStatusREADY ,
@@ -109,11 +145,15 @@ func (e *executorImpl[SP]) heartbeat(ctx context.Context) (shardAssignments map[
109
145
}
110
146
111
147
func (e * executorImpl [SP ]) updateShardAssignment (ctx context.Context , shardAssignments map [string ]* types.ShardAssignment ) {
148
+ wg := sync.WaitGroup {}
149
+
112
150
// Stop shard processing for shards not assigned to this executor
113
151
e .managedProcessors .Range (func (shardID string , managedProcessor * managedProcessor [SP ]) bool {
114
152
if assignment , ok := shardAssignments [shardID ]; ! ok || assignment .Status != types .AssignmentStatusREADY {
153
+ wg .Add (1 )
115
154
go func () {
116
- managedProcessor .state = processorStateStopping
155
+ defer wg .Done ()
156
+ managedProcessor .setState (processorStateStopping )
117
157
managedProcessor .processor .Stop ()
118
158
e .managedProcessors .Delete (shardID )
119
159
}()
@@ -125,37 +165,42 @@ func (e *executorImpl[SP]) updateShardAssignment(ctx context.Context, shardAssig
125
165
for shardID , assignment := range shardAssignments {
126
166
if assignment .Status == types .AssignmentStatusREADY {
127
167
if _ , ok := e .managedProcessors .Load (shardID ); ! ok {
168
+ wg .Add (1 )
128
169
go func () {
170
+ defer wg .Done ()
129
171
processor , err := e .shardProcessorFactory .NewShardProcessor (shardID )
130
172
if err != nil {
131
173
e .logger .Error ("failed to create shard processor" , tag .Error (err ))
132
174
return
133
175
}
176
+ managedProcessor := newManagedProcessor (processor , processorStateStarting )
177
+ e .managedProcessors .Store (shardID , managedProcessor )
178
+
134
179
processor .Start (ctx )
135
- e .managedProcessors .Store (shardID , & managedProcessor [SP ]{
136
- processor : processor ,
137
- state : processorStateStarted ,
138
- })
180
+
181
+ managedProcessor .setState (processorStateStarted )
139
182
}()
140
183
}
141
184
}
142
185
}
186
+
187
+ wg .Wait ()
143
188
}
144
189
145
190
func (e * executorImpl [SP ]) stopShardProcessors () {
146
191
wg := sync.WaitGroup {}
147
192
148
193
e .managedProcessors .Range (func (shardID string , managedProcessor * managedProcessor [SP ]) bool {
149
194
// If the processor is already stopping, skip it
150
- if managedProcessor .state == processorStateStopping {
195
+ if managedProcessor .getState () == processorStateStopping {
151
196
return true
152
197
}
153
198
154
199
wg .Add (1 )
155
200
go func () {
156
201
defer wg .Done ()
157
202
158
- managedProcessor .state = processorStateStopping
203
+ managedProcessor .setState ( processorStateStopping )
159
204
managedProcessor .processor .Stop ()
160
205
e .managedProcessors .Delete (shardID )
161
206
}()
0 commit comments