14
14
from sklearn .metrics import mean_absolute_error
15
15
16
16
17
- def absolute_loss (y_true , y_pred ):
17
+ def absolute_loss (y_true , y_pred , sample_weight = None ):
18
18
"""
19
19
Computes the absolute loss for regression.
20
20
21
21
:param y_true: array-like or label indicator matrix
22
22
Ground truth (correct) values.
23
23
:param y_pred: array-like or label indicator matrix
24
24
Predicted values, as returned by a regression estimator.
25
+ :param sample_weight: sample weights
25
26
:return: loss, float
26
27
The degree to which the samples are correctly predicted.
27
28
"""
28
- return np .sum (np .abs (y_true - y_pred )) / y_true .shape [0 ]
29
+ if sample_weight is None :
30
+ return np .sum (np .abs (y_true - y_pred )) / y_true .shape [0 ]
31
+ return (
32
+ np .average (np .abs (y_true - y_pred ), weights = sample_weight , axis = 0 )
33
+ / y_true .shape [0 ]
34
+ )
29
35
30
36
31
37
def float_sign (a ):
@@ -132,7 +138,7 @@ def _modify_loss_derivatives(self, last_deltas):
132
138
return DERIVATIVE_LOSS_FUNCTIONS ["absolute_loss" ](last_deltas )
133
139
return last_deltas
134
140
135
- def _backprop (self , X , y , activations , deltas , coef_grads , intercept_grads ):
141
+ def _backprop (self , * args ):
136
142
"""
137
143
Computes the MLP loss function and its corresponding derivatives
138
144
with respect to each parameter: weights and bias vectors.
@@ -141,6 +147,8 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
141
147
The input data.
142
148
:param y: array-like, shape (n_samples,)
143
149
The target values.
150
+ :param sample_weight: array-like of shape (n_samples,), default=None
151
+ Sample weights.
144
152
:param activations: list, length = n_layers - 1
145
153
The ith element of the list holds the values of the ith layer.
146
154
:param deltas: list, length = n_layers - 1
@@ -155,10 +163,18 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
155
163
:param intercept_grads: list, length = n_layers - 1
156
164
The ith element contains the amount of change used to update the
157
165
intercept parameters of the ith layer in an iteration.
158
- :return: loss, float
159
- :return: coef_grads, list, length = n_layers - 1
160
- :return: intercept_grads, list, length = n_layers - 1
166
+ :return: loss (float),
167
+ coef_grads (list, length = n_layers - 1)
168
+ intercept_grads: (list, length = n_layers - 1)
169
+
170
+
161
171
"""
172
+ if len (args ) == 6 :
173
+ X , y , activations , deltas , coef_grads , intercept_grads = args
174
+ sample_weight = None
175
+ else :
176
+ X , y , sample_weight , activations , deltas , coef_grads , intercept_grads = args
177
+
162
178
n_samples = X .shape [0 ]
163
179
164
180
# Forward propagate
@@ -169,10 +185,12 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
169
185
if loss_func_name == "log_loss" and self .out_activation_ == "logistic" :
170
186
loss_func_name = "binary_log_loss"
171
187
loss_function = self ._get_loss_function (loss_func_name )
172
- loss = loss_function (y , activations [- 1 ])
188
+ loss = loss_function (y , activations [- 1 ], sample_weight )
173
189
# Add L2 regularization term to loss
174
190
values = np .sum (np .array ([np .dot (s .ravel (), s .ravel ()) for s in self .coefs_ ]))
175
- loss += (0.5 * self .alpha ) * values / n_samples
191
+
192
+ sw_sum = n_samples if sample_weight is None else sample_weight .sum ()
193
+ loss += (0.5 * self .alpha ) * values / sw_sum
176
194
177
195
# Backward propagate
178
196
last = self .n_layers_ - 2
@@ -182,20 +200,22 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
182
200
# sigmoid and binary cross entropy, softmax and categorical cross
183
201
# entropy, and identity with squared loss
184
202
deltas [last ] = activations [- 1 ] - y
203
+ if sample_weight is not None :
204
+ deltas [last ] *= sample_weight .reshape (- 1 , 1 )
185
205
186
206
# We insert the following modification to modify the gradient
187
207
# due to the modification of the loss function.
188
208
deltas [last ] = self ._modify_loss_derivatives (deltas [last ])
189
209
190
210
# Compute gradient for the last layer
191
211
temp = self ._compute_loss_grad (
192
- last , n_samples , activations , deltas , coef_grads , intercept_grads
212
+ last , sw_sum , activations , deltas , coef_grads , intercept_grads
193
213
)
194
214
if temp is None :
195
215
# recent version of scikit-learn
196
216
# Compute gradient for the last layer
197
217
self ._compute_loss_grad (
198
- last , n_samples , activations , deltas , coef_grads , intercept_grads
218
+ last , sw_sum , activations , deltas , coef_grads , intercept_grads
199
219
)
200
220
201
221
inplace_derivative = DERIVATIVES [self .activation ]
@@ -205,7 +225,7 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
205
225
inplace_derivative (activations [i ], deltas [i - 1 ])
206
226
207
227
self ._compute_loss_grad (
208
- i - 1 , n_samples , activations , deltas , coef_grads , intercept_grads
228
+ i - 1 , sw_sum , activations , deltas , coef_grads , intercept_grads
209
229
)
210
230
else :
211
231
coef_grads , intercept_grads = temp
@@ -220,7 +240,7 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
220
240
coef_grads ,
221
241
intercept_grads ,
222
242
) = self ._compute_loss_grad (
223
- i - 1 , n_samples , activations , deltas , coef_grads , intercept_grads
243
+ i - 1 , sw_sum , activations , deltas , coef_grads , intercept_grads
224
244
)
225
245
226
246
return loss , coef_grads , intercept_grads
0 commit comments