139
139
//!
140
140
//! Given these invariants, we argue why each optimization preserves execution
141
141
//! semantics below (grep for "Preserves execution semantics").
142
+ //!
143
+ //! # Avoiding Quadratic Behavior
144
+ //!
145
+ //! There are two cases where we've had to take some care to avoid
146
+ //! quadratic worst-case behavior:
147
+ //!
148
+ //! - The "labels at this branch" list can grow unboundedly if the
149
+ //! code generator binds many labels at one location. If the count
150
+ //! gets too high (defined by the `LABEL_LIST_THRESHOLD` constant), we
151
+ //! simply abort an optimization early in a way that is always correct
152
+ //! but is conservative.
153
+ //!
154
+ //! - The fixup list can interact with island emission to create
155
+ //! "quadratic island behvior". In a little more detail, one can hit
156
+ //! this behavior by having some pending fixups (forward label
157
+ //! references) with long-range label-use kinds, and some others
158
+ //! with shorter-range references that nonetheless still are pending
159
+ //! long enough to trigger island generation. In such a case, we
160
+ //! process the fixup list, generate veneers to extend some forward
161
+ //! references' ranges, but leave the other (longer-range) ones
162
+ //! alone. The way this was implemented put them back on a list and
163
+ //! resulted in quadratic behavior.
164
+ //!
165
+ //! To avoid this, we could use a better data structure that allows
166
+ //! us to query for fixups with deadlines "coming soon" and generate
167
+ //! veneers for only those fixups. However, there is some
168
+ //! interaction with the branch peephole optimizations: the
169
+ //! invariant there is that branches in the "most recent branches
170
+ //! contiguous with end of buffer" list have corresponding fixups in
171
+ //! order (so that when we chomp the branch, we can chomp its fixup
172
+ //! too).
173
+ //!
174
+ //! So instead, when we generate an island, for now we create
175
+ //! veneers for *all* pending fixups, then if upgraded to a kind
176
+ //! that no longer supports veneers (is at "max range"), kick the
177
+ //! fixups off to a list that is *not* processed at islands except
178
+ //! for one last pass after emission. This allows us to skip the
179
+ //! work and avoids the quadratic behvior. We expect that this is
180
+ //! fine-ish for now: islands are relatively rare, and if they do
181
+ //! happen and generate unnecessary veneers (as will now happen for
182
+ //! the case above) we'll only get one unnecessary veneer per
183
+ //! branch (then they are at max range already).
184
+ //!
185
+ //! Longer-term, we could use a data structure that allows querying
186
+ //! by deadline, as long as we can properly chomp just-added fixups
187
+ //! when chomping branches.
142
188
143
189
use crate :: binemit:: { Addend , CodeOffset , Reloc , StackMap } ;
144
190
use crate :: ir:: { ExternalName , Opcode , RelSourceLoc , SourceLoc , TrapCode } ;
@@ -150,7 +196,7 @@ use crate::timing;
150
196
use crate :: trace;
151
197
use cranelift_control:: ControlPlane ;
152
198
use cranelift_entity:: { entity_impl, PrimaryMap } ;
153
- use smallvec:: SmallVec ;
199
+ use smallvec:: { smallvec , SmallVec } ;
154
200
use std:: convert:: TryFrom ;
155
201
use std:: mem;
156
202
use std:: string:: String ;
@@ -190,6 +236,18 @@ impl CompilePhase for Final {
190
236
type SourceLocType = SourceLoc ;
191
237
}
192
238
239
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
240
+ enum ForceVeneers {
241
+ Yes ,
242
+ No ,
243
+ }
244
+
245
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
246
+ enum IsLastIsland {
247
+ Yes ,
248
+ No ,
249
+ }
250
+
193
251
/// A buffer of output to be produced, fixed up, and then emitted to a CodeSink
194
252
/// in bulk.
195
253
///
@@ -234,6 +292,10 @@ pub struct MachBuffer<I: VCodeInst> {
234
292
pending_traps : SmallVec < [ MachLabelTrap ; 16 ] > ,
235
293
/// Fixups that must be performed after all code is emitted.
236
294
fixup_records : SmallVec < [ MachLabelFixup < I > ; 16 ] > ,
295
+ /// Fixups whose labels are at maximum range already: these need
296
+ /// not be considered in island emission until we're done
297
+ /// emitting.
298
+ fixup_records_max_range : SmallVec < [ MachLabelFixup < I > ; 16 ] > ,
237
299
/// Current deadline at which all constants are flushed and all code labels
238
300
/// are extended by emitting long-range jumps in an island. This flush
239
301
/// should be rare (e.g., on AArch64, the shortest-range PC-rel references
@@ -389,6 +451,7 @@ impl<I: VCodeInst> MachBuffer<I> {
389
451
pending_constants : SmallVec :: new ( ) ,
390
452
pending_traps : SmallVec :: new ( ) ,
391
453
fixup_records : SmallVec :: new ( ) ,
454
+ fixup_records_max_range : SmallVec :: new ( ) ,
392
455
island_deadline : UNKNOWN_LABEL_OFFSET ,
393
456
island_worst_case_size : 0 ,
394
457
latest_branches : SmallVec :: new ( ) ,
@@ -1157,27 +1220,24 @@ impl<I: VCodeInst> MachBuffer<I> {
1157
1220
/// Should only be called if `island_needed()` returns true, i.e., if we
1158
1221
/// actually reach a deadline. It's not necessarily a problem to do so
1159
1222
/// otherwise but it may result in unnecessary work during emission.
1160
- pub fn emit_island ( & mut self , distance : CodeOffset , ctrl_plane : & mut ControlPlane ) {
1161
- self . emit_island_maybe_forced ( false , distance , ctrl_plane) ;
1223
+ pub fn emit_island ( & mut self , ctrl_plane : & mut ControlPlane ) {
1224
+ self . emit_island_maybe_forced ( ForceVeneers :: No , IsLastIsland :: No , ctrl_plane) ;
1162
1225
}
1163
1226
1164
1227
/// Same as `emit_island`, but an internal API with a `force_veneers`
1165
1228
/// argument to force all veneers to always get emitted for debugging.
1166
1229
fn emit_island_maybe_forced (
1167
1230
& mut self ,
1168
- force_veneers : bool ,
1169
- distance : CodeOffset ,
1231
+ force_veneers : ForceVeneers ,
1232
+ last_island : IsLastIsland ,
1170
1233
ctrl_plane : & mut ControlPlane ,
1171
1234
) {
1172
1235
// We're going to purge fixups, so no latest-branch editing can happen
1173
1236
// anymore.
1174
1237
self . latest_branches . clear ( ) ;
1175
1238
1176
1239
// Reset internal calculations about islands since we're going to
1177
- // change the calculus as we apply fixups. The `forced_threshold` is
1178
- // used here to determine whether jumps to unknown labels will require
1179
- // a veneer or not.
1180
- let forced_threshold = self . worst_case_end_of_island ( distance) ;
1240
+ // change the calculus as we apply fixups.
1181
1241
self . island_deadline = UNKNOWN_LABEL_OFFSET ;
1182
1242
self . island_worst_case_size = 0 ;
1183
1243
@@ -1232,7 +1292,14 @@ impl<I: VCodeInst> MachBuffer<I> {
1232
1292
self . get_appended_space ( size) ;
1233
1293
}
1234
1294
1235
- for fixup in mem:: take ( & mut self . fixup_records ) {
1295
+ let last_island_fixups = match last_island {
1296
+ IsLastIsland :: Yes => mem:: take ( & mut self . fixup_records_max_range ) ,
1297
+ IsLastIsland :: No => smallvec ! [ ] ,
1298
+ } ;
1299
+ for fixup in mem:: take ( & mut self . fixup_records )
1300
+ . into_iter ( )
1301
+ . chain ( last_island_fixups. into_iter ( ) )
1302
+ {
1236
1303
trace ! ( "emit_island: fixup {:?}" , fixup) ;
1237
1304
let MachLabelFixup {
1238
1305
label,
@@ -1275,7 +1342,8 @@ impl<I: VCodeInst> MachBuffer<I> {
1275
1342
kind. max_neg_range( )
1276
1343
) ;
1277
1344
1278
- if ( force_veneers && kind. supports_veneer ( ) ) || veneer_required {
1345
+ if ( force_veneers == ForceVeneers :: Yes && kind. supports_veneer ( ) ) || veneer_required
1346
+ {
1279
1347
self . emit_veneer ( label, offset, kind) ;
1280
1348
} else {
1281
1349
let slice = & mut self . data [ start..end] ;
@@ -1284,21 +1352,43 @@ impl<I: VCodeInst> MachBuffer<I> {
1284
1352
}
1285
1353
} else {
1286
1354
// If the offset of this label is not known at this time then
1287
- // there's one of two possibilities:
1355
+ // there are three possibilities:
1288
1356
//
1289
- // * First we may be about to exceed the maximum jump range of
1290
- // this fixup. In that case a veneer is inserted to buy some
1291
- // more budget for the forward-jump. It's guaranteed that the
1292
- // label will eventually come after where we're at, so we know
1293
- // that the forward jump is necessary.
1357
+ // 1. It's possible that the label is already a "max
1358
+ // range" label: a veneer would not help us any,
1359
+ // and so we need not consider the label during
1360
+ // island emission any more until the very end (the
1361
+ // last "island" pass). In this case we kick the
1362
+ // label into a separate list to process once at
1363
+ // the end, to avoid quadratic behavior (see
1364
+ // "quadratic island behavior" above, and issue
1365
+ // #6798).
1294
1366
//
1295
- // * Otherwise we're still within range of the forward jump but
1296
- // the precise target isn't known yet. In that case we
1297
- // enqueue the fixup to get processed later.
1298
- if forced_threshold - offset > kind. max_pos_range ( ) {
1299
- self . emit_veneer ( label, offset, kind) ;
1367
+ // 2. Or, we may be about to exceed the maximum jump range of
1368
+ // this fixup. In that case a veneer is inserted to buy some
1369
+ // more budget for the forward-jump. It's guaranteed that the
1370
+ // label will eventually come after where we're at, so we know
1371
+ // that the forward jump is necessary.
1372
+ //
1373
+ // 3. Otherwise, we're still within range of the
1374
+ // forward jump but the precise target isn't known
1375
+ // yet. In that case, to avoid quadratic behavior
1376
+ // (again, see above), we emit a veneer and if the
1377
+ // resulting label-use fixup is then max-range, we
1378
+ // put it in the max-range list. We could enqueue
1379
+ // the fixup for processing later, and this would
1380
+ // enable slightly fewer veneers, but islands are
1381
+ // relatively rare and the cost of "upgrading" all
1382
+ // forward label refs that cross an island should
1383
+ // be relatively low.
1384
+ if !kind. supports_veneer ( ) {
1385
+ self . fixup_records_max_range . push ( MachLabelFixup {
1386
+ label,
1387
+ offset,
1388
+ kind,
1389
+ } ) ;
1300
1390
} else {
1301
- self . use_label_at_offset ( offset , label , kind) ;
1391
+ self . emit_veneer ( label , offset , kind) ;
1302
1392
}
1303
1393
}
1304
1394
}
@@ -1346,25 +1436,36 @@ impl<I: VCodeInst> MachBuffer<I> {
1346
1436
veneer_fixup_off,
1347
1437
veneer_label_use
1348
1438
) ;
1349
- // Register a new use of `label` with our new veneer fixup and offset.
1350
- // This'll recalculate deadlines accordingly and enqueue this fixup to
1351
- // get processed at some later time.
1352
- self . use_label_at_offset ( veneer_fixup_off, label, veneer_label_use) ;
1439
+ // Register a new use of `label` with our new veneer fixup and
1440
+ // offset. This'll recalculate deadlines accordingly and
1441
+ // enqueue this fixup to get processed at some later
1442
+ // time. Note that if we now have a max-range, we instead skip
1443
+ // the usual fixup list to avoid quadratic behavior.
1444
+ if veneer_label_use. supports_veneer ( ) {
1445
+ self . use_label_at_offset ( veneer_fixup_off, label, veneer_label_use) ;
1446
+ } else {
1447
+ self . fixup_records_max_range . push ( MachLabelFixup {
1448
+ label,
1449
+ offset : veneer_fixup_off,
1450
+ kind : veneer_label_use,
1451
+ } ) ;
1452
+ }
1353
1453
}
1354
1454
1355
1455
fn finish_emission_maybe_forcing_veneers (
1356
1456
& mut self ,
1357
- force_veneers : bool ,
1457
+ force_veneers : ForceVeneers ,
1358
1458
ctrl_plane : & mut ControlPlane ,
1359
1459
) {
1360
1460
while !self . pending_constants . is_empty ( )
1361
1461
|| !self . pending_traps . is_empty ( )
1362
1462
|| !self . fixup_records . is_empty ( )
1463
+ || !self . fixup_records_max_range . is_empty ( )
1363
1464
{
1364
1465
// `emit_island()` will emit any pending veneers and constants, and
1365
1466
// as a side-effect, will also take care of any fixups with resolved
1366
1467
// labels eagerly.
1367
- self . emit_island_maybe_forced ( force_veneers, u32 :: MAX , ctrl_plane) ;
1468
+ self . emit_island_maybe_forced ( force_veneers, IsLastIsland :: Yes , ctrl_plane) ;
1368
1469
}
1369
1470
1370
1471
// Ensure that all labels have been fixed up after the last island is emitted. This is a
@@ -1385,7 +1486,7 @@ impl<I: VCodeInst> MachBuffer<I> {
1385
1486
// had bound one last label.
1386
1487
self . optimize_branches ( ctrl_plane) ;
1387
1488
1388
- self . finish_emission_maybe_forcing_veneers ( false , ctrl_plane) ;
1489
+ self . finish_emission_maybe_forcing_veneers ( ForceVeneers :: No , ctrl_plane) ;
1389
1490
1390
1491
let alignment = self . finish_constants ( constants) ;
1391
1492
@@ -1734,7 +1835,7 @@ impl MachBranch {
1734
1835
pub struct MachTextSectionBuilder < I : VCodeInst > {
1735
1836
buf : MachBuffer < I > ,
1736
1837
next_func : usize ,
1737
- force_veneers : bool ,
1838
+ force_veneers : ForceVeneers ,
1738
1839
}
1739
1840
1740
1841
impl < I : VCodeInst > MachTextSectionBuilder < I > {
@@ -1746,7 +1847,7 @@ impl<I: VCodeInst> MachTextSectionBuilder<I> {
1746
1847
MachTextSectionBuilder {
1747
1848
buf,
1748
1849
next_func : 0 ,
1749
- force_veneers : false ,
1850
+ force_veneers : ForceVeneers :: No ,
1750
1851
}
1751
1852
}
1752
1853
}
@@ -1762,9 +1863,9 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
1762
1863
// Conditionally emit an island if it's necessary to resolve jumps
1763
1864
// between functions which are too far away.
1764
1865
let size = func. len ( ) as u32 ;
1765
- if self . force_veneers || self . buf . island_needed ( size) {
1866
+ if self . force_veneers == ForceVeneers :: Yes || self . buf . island_needed ( size) {
1766
1867
self . buf
1767
- . emit_island_maybe_forced ( self . force_veneers , size , ctrl_plane) ;
1868
+ . emit_island_maybe_forced ( self . force_veneers , IsLastIsland :: No , ctrl_plane) ;
1768
1869
}
1769
1870
1770
1871
self . buf . align_to ( align) ;
@@ -1796,7 +1897,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
1796
1897
}
1797
1898
1798
1899
fn force_veneers ( & mut self ) {
1799
- self . force_veneers = true ;
1900
+ self . force_veneers = ForceVeneers :: Yes ;
1800
1901
}
1801
1902
1802
1903
fn finish ( & mut self , ctrl_plane : & mut ControlPlane ) -> Vec < u8 > {
@@ -1946,7 +2047,7 @@ mod test {
1946
2047
buf. bind_label ( label ( 1 ) , state. ctrl_plane_mut ( ) ) ;
1947
2048
while buf. cur_offset ( ) < 2000000 {
1948
2049
if buf. island_needed ( 0 ) {
1949
- buf. emit_island ( 0 , state. ctrl_plane_mut ( ) ) ;
2050
+ buf. emit_island ( state. ctrl_plane_mut ( ) ) ;
1950
2051
}
1951
2052
let inst = Inst :: Nop4 ;
1952
2053
inst. emit ( & [ ] , & mut buf, & info, & mut state) ;
@@ -1983,9 +2084,15 @@ mod test {
1983
2084
// before the deadline.
1984
2085
taken : BranchTarget :: ResolvedOffset ( ( 1 << 20 ) - 4 - 20 ) ,
1985
2086
1986
- // This branch is in-range so no veneers should be needed, it should
1987
- // go directly to the target.
1988
- not_taken : BranchTarget :: ResolvedOffset ( 2000000 + 4 - 4 ) ,
2087
+ // This branch is in-range so no veneers are technically
2088
+ // be needed; however because we resolve *all* pending
2089
+ // fixups that cross an island when that island occurs, it
2090
+ // will have a veneer as well. This veneer comes just
2091
+ // after the one above. (Note that because the CondBr has
2092
+ // two instructions, the conditinoal and unconditional,
2093
+ // this offset is the same, though the veneer is four
2094
+ // bytes later.)
2095
+ not_taken : BranchTarget :: ResolvedOffset ( ( 1 << 20 ) - 4 - 20 ) ,
1989
2096
} ;
1990
2097
inst. emit ( & [ ] , & mut buf2, & info, & mut state) ;
1991
2098
0 commit comments