@@ -100,32 +100,33 @@ class AbstractRNNTDecoding(ConfidenceMixin):
100
100
from the `token_confidence`.
101
101
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
102
102
Valid options are `mean`, `min`, `max`, `prod`.
103
- method_cfg : A dict-like object which contains the method name and settings to compute per-frame
103
+ measure_cfg : A dict-like object which contains the measure name and settings to compute per-frame
104
104
confidence scores.
105
105
106
- name: The method name (str).
106
+ name: The measure name (str).
107
107
Supported values:
108
108
- 'max_prob' for using the maximum token probability as a confidence.
109
109
- 'entropy' for using a normalized entropy of a log-likelihood vector.
110
110
111
111
entropy_type: Which type of entropy to use (str).
112
- Used if confidence_method_cfg .name is set to `entropy`.
112
+ Used if confidence_measure_cfg .name is set to `entropy`.
113
113
Supported values:
114
- - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
114
+ - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
115
115
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
116
- Note that for this entropy, the temperature should comply the following inequality:
117
- 1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
116
+ Note that for this entropy, the alpha should comply the following inequality:
117
+ (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
118
+ where V is the model vocabulary size.
118
119
- 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
119
120
Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
120
121
where α is a parameter. When α == 1, it works like the Gibbs entropy.
121
122
More: https://en.wikipedia.org/wiki/Tsallis_entropy
122
- - 'renui ' for the Rényi entropy.
123
+ - 'renyi ' for the Rényi entropy.
123
124
Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
124
125
where α is a parameter. When α == 1, it works like the Gibbs entropy.
125
126
More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
126
127
127
- temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
128
- When the temperature equals one, scaling is not applied to 'max_prob',
128
+ alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
129
+ When the alpha equals one, scaling is not applied to 'max_prob',
129
130
and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
130
131
131
132
entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -139,7 +140,7 @@ class AbstractRNNTDecoding(ConfidenceMixin):
139
140
timestep during greedy decoding. Setting to larger values allows longer sentences
140
141
to be decoded, at the cost of increased execution time.
141
142
preserve_frame_confidence: Same as above, overrides above value.
142
- confidence_method : Same as above, overrides confidence_cfg.method .
143
+ confidence_measure_cfg : Same as above, overrides confidence_cfg.measure_cfg .
143
144
144
145
"beam":
145
146
beam_size: int, defining the beam size for beam search. Must be >= 1.
@@ -255,15 +256,13 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
255
256
# initialize confidence-related fields
256
257
self ._init_confidence (self .cfg .get ('confidence_cfg' , None ))
257
258
258
- # Update preserve frame confidence
259
- if self .preserve_frame_confidence is False :
260
- if self .cfg .strategy in ['greedy' , 'greedy_batch' ]:
261
- self .preserve_frame_confidence = self .cfg .greedy .get ('preserve_frame_confidence' , False )
262
- self .confidence_method_cfg = self .cfg .greedy .get ('confidence_method_cfg' , None )
263
-
264
- elif self .cfg .strategy in ['beam' , 'tsd' , 'alsd' , 'maes' ]:
265
- # Not implemented
266
- pass
259
+ # Confidence estimation is not implemented for these strategies
260
+ if (
261
+ not self .preserve_frame_confidence
262
+ and self .cfg .strategy in ['beam' , 'tsd' , 'alsd' , 'maes' ]
263
+ and self .cfg .beam .get ('preserve_frame_confidence' , False )
264
+ ):
265
+ raise NotImplementedError (f"Confidence calculation is not supported for strategy `{ self .cfg .strategy } `" )
267
266
268
267
if self .cfg .strategy == 'greedy' :
269
268
if self .big_blank_durations is None :
@@ -278,7 +277,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
278
277
),
279
278
preserve_alignments = self .preserve_alignments ,
280
279
preserve_frame_confidence = self .preserve_frame_confidence ,
281
- confidence_method_cfg = self .confidence_method_cfg ,
280
+ confidence_measure_cfg = self .confidence_measure_cfg ,
282
281
)
283
282
else :
284
283
self .decoding = greedy_decode .GreedyTDTInfer (
@@ -292,7 +291,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
292
291
),
293
292
preserve_alignments = self .preserve_alignments ,
294
293
preserve_frame_confidence = self .preserve_frame_confidence ,
295
- confidence_method_cfg = self .confidence_method_cfg ,
294
+ confidence_measure_cfg = self .confidence_measure_cfg ,
296
295
)
297
296
else :
298
297
self .decoding = greedy_decode .GreedyMultiblankRNNTInfer (
@@ -305,7 +304,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
305
304
),
306
305
preserve_alignments = self .preserve_alignments ,
307
306
preserve_frame_confidence = self .preserve_frame_confidence ,
308
- confidence_method_cfg = self .confidence_method_cfg ,
307
+ confidence_measure_cfg = self .confidence_measure_cfg ,
309
308
)
310
309
311
310
elif self .cfg .strategy == 'greedy_batch' :
@@ -321,7 +320,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
321
320
),
322
321
preserve_alignments = self .preserve_alignments ,
323
322
preserve_frame_confidence = self .preserve_frame_confidence ,
324
- confidence_method_cfg = self .confidence_method_cfg ,
323
+ confidence_measure_cfg = self .confidence_measure_cfg ,
325
324
)
326
325
else :
327
326
self .decoding = greedy_decode .GreedyBatchedTDTInfer (
@@ -335,7 +334,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
335
334
),
336
335
preserve_alignments = self .preserve_alignments ,
337
336
preserve_frame_confidence = self .preserve_frame_confidence ,
338
- confidence_method_cfg = self .confidence_method_cfg ,
337
+ confidence_measure_cfg = self .confidence_measure_cfg ,
339
338
)
340
339
341
340
else :
@@ -349,7 +348,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int):
349
348
),
350
349
preserve_alignments = self .preserve_alignments ,
351
350
preserve_frame_confidence = self .preserve_frame_confidence ,
352
- confidence_method_cfg = self .confidence_method_cfg ,
351
+ confidence_measure_cfg = self .confidence_measure_cfg ,
353
352
)
354
353
355
354
elif self .cfg .strategy == 'beam' :
@@ -1006,32 +1005,33 @@ class RNNTDecoding(AbstractRNNTDecoding):
1006
1005
from the `token_confidence`.
1007
1006
aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence.
1008
1007
Valid options are `mean`, `min`, `max`, `prod`.
1009
- method_cfg : A dict-like object which contains the method name and settings to compute per-frame
1008
+ measure_cfg : A dict-like object which contains the measure name and settings to compute per-frame
1010
1009
confidence scores.
1011
1010
1012
- name: The method name (str).
1011
+ name: The measure name (str).
1013
1012
Supported values:
1014
1013
- 'max_prob' for using the maximum token probability as a confidence.
1015
1014
- 'entropy' for using a normalized entropy of a log-likelihood vector.
1016
1015
1017
1016
entropy_type: Which type of entropy to use (str).
1018
- Used if confidence_method_cfg .name is set to `entropy`.
1017
+ Used if confidence_measure_cfg .name is set to `entropy`.
1019
1018
Supported values:
1020
- - 'gibbs' for the (standard) Gibbs entropy. If the temperature α is provided,
1019
+ - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided,
1021
1020
the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)).
1022
- Note that for this entropy, the temperature should comply the following inequality:
1023
- 1/log(V) <= α <= -1/log(1-1/V) where V is the model vocabulary size.
1021
+ Note that for this entropy, the alpha should comply the following inequality:
1022
+ (log(V)+2-sqrt(log^2(V)+4))/(2*log(V)) <= α <= (1+log(V-1))/log(V-1)
1023
+ where V is the model vocabulary size.
1024
1024
- 'tsallis' for the Tsallis entropy with the Boltzmann constant one.
1025
1025
Tsallis entropy formula is the following: H_α = 1/(α-1)*(1-sum_i(p^α_i)),
1026
1026
where α is a parameter. When α == 1, it works like the Gibbs entropy.
1027
1027
More: https://en.wikipedia.org/wiki/Tsallis_entropy
1028
- - 'renui ' for the Rényi entropy.
1028
+ - 'renyi ' for the Rényi entropy.
1029
1029
Rényi entropy formula is the following: H_α = 1/(1-α)*log_2(sum_i(p^α_i)),
1030
1030
where α is a parameter. When α == 1, it works like the Gibbs entropy.
1031
1031
More: https://en.wikipedia.org/wiki/R%C3%A9nyi_entropy
1032
1032
1033
- temperature: Temperature scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
1034
- When the temperature equals one, scaling is not applied to 'max_prob',
1033
+ alpha: Power scale for logsoftmax (α for entropies). Here we restrict it to be > 0.
1034
+ When the alpha equals one, scaling is not applied to 'max_prob',
1035
1035
and any entropy type behaves like the Shannon entropy: H = -sum_i(p_i*log(p_i))
1036
1036
1037
1037
entropy_norm: A mapping of the entropy value to the interval [0,1].
@@ -1047,7 +1047,7 @@ class RNNTDecoding(AbstractRNNTDecoding):
1047
1047
1048
1048
preserve_frame_confidence: Same as above, overrides above value.
1049
1049
1050
- confidence_method : Same as above, overrides confidence_cfg.method .
1050
+ confidence_measure_cfg : Same as above, overrides confidence_cfg.measure_cfg .
1051
1051
1052
1052
"beam":
1053
1053
beam_size: int, defining the beam size for beam search. Must be >= 1.
0 commit comments