@@ -215,7 +215,6 @@ class StructurizeCFG : public RegionPass {
215215 void orderNodes ();
216216
217217 Loop *getAdjustedLoop (RegionNode *RN);
218- unsigned getAdjustedLoopDepth (RegionNode *RN);
219218
220219 void analyzeLoops (RegionNode *N);
221220
@@ -324,65 +323,108 @@ Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) {
324323 return LI->getLoopFor (RN->getEntry ());
325324}
326325
327- // / Use the exit block to determine the loop depth if RN is a SubRegion.
328- unsigned StructurizeCFG::getAdjustedLoopDepth (RegionNode *RN) {
329- if (RN->isSubRegion ()) {
330- Region *SubR = RN->getNodeAs <Region>();
331- return LI->getLoopDepth (SubR->getExit ());
332- }
333-
334- return LI->getLoopDepth (RN->getEntry ());
335- }
336-
337- // / Build up the general order of nodes
326+ // / Build up the general order of nodes, by performing a topology sort of the
327+ // / parent region's nodes, while ensuring that there is no outer loop node
328+ // / between any two inner loop nodes.
338329void StructurizeCFG::orderNodes () {
339- ReversePostOrderTraversal<Region*> RPOT (ParentRegion);
340- SmallDenseMap<Loop*, unsigned , 8 > LoopBlocks;
330+ SmallVector<RegionNode *, 32 > POT;
331+ SmallDenseMap<Loop *, unsigned , 8 > LoopSizes;
332+ for (RegionNode *RN : post_order (ParentRegion)) {
333+ POT.push_back (RN);
341334
342- // The reverse post-order traversal of the list gives us an ordering close
343- // to what we want. The only problem with it is that sometimes backedges
344- // for outer loops will be visited before backedges for inner loops.
345- for (RegionNode *RN : RPOT) {
335+ // Accumulate the number of nodes inside the region that belong to a loop.
346336 Loop *Loop = getAdjustedLoop (RN);
347- ++LoopBlocks [Loop];
337+ ++LoopSizes [Loop];
348338 }
349-
350- unsigned CurrentLoopDepth = 0 ;
339+ // A quick exit for the case where all nodes belong to the same loop (or no
340+ // loop at all).
341+ if (LoopSizes.size () <= 1U ) {
342+ Order.assign (POT.begin (), POT.end ());
343+ return ;
344+ }
345+ Order.resize (POT.size ());
346+
347+ // The post-order traversal of the list gives us an ordering close to what we
348+ // want. The only problem with it is that sometimes backedges for outer loops
349+ // will be visited before backedges for inner loops. So now we fix that by
350+ // inserting the nodes in order, while making sure that encountered inner loop
351+ // are complete before their parents (outer loops).
352+
353+ SmallVector<Loop *, 8 > WorkList;
354+ // Get the size of the outermost region (the nodes that don't belong to any
355+ // loop inside ParentRegion).
356+ unsigned ZeroCurrentLoopSize = 0U ;
357+ auto LSI = LoopSizes.find (nullptr );
358+ unsigned *CurrentLoopSize =
359+ LSI != LoopSizes.end () ? &LSI->second : &ZeroCurrentLoopSize;
351360 Loop *CurrentLoop = nullptr ;
352- for (auto I = RPOT.begin (), E = RPOT.end (); I != E; ++I) {
353- RegionNode *RN = cast<RegionNode>(*I);
354- unsigned LoopDepth = getAdjustedLoopDepth (RN);
355361
356- if (is_contained (Order, *I))
357- continue ;
362+ // The "skipped" list is actually located at the (reversed) beginning of the
363+ // POT. This saves us the use of an intermediate container.
364+ // Note that there is always enough room, for the skipped nodes, before the
365+ // current location, as we have just passed at least that amount of nodes.
366+
367+ auto Begin = POT.rbegin ();
368+ auto I = Begin, SkippedEnd = Begin;
369+ auto O = Order.rbegin (), OE = Order.rend ();
370+ while (O != OE) {
371+ // If we have any skipped nodes, then erase the gap between the end of the
372+ // "skipped" list, and the current location.
373+ if (SkippedEnd != Begin) {
374+ POT.erase (I.base (), SkippedEnd.base ());
375+ I = SkippedEnd = Begin = POT.rbegin ();
376+ }
358377
359- if (LoopDepth < CurrentLoopDepth) {
360- // Make sure we have visited all blocks in this loop before moving back to
361- // the outer loop.
378+ // Keep processing outer loops, in order (from inner most, to outer).
379+ if (!WorkList.empty ()) {
380+ CurrentLoop = WorkList.pop_back_val ();
381+ CurrentLoopSize = &LoopSizes.find (CurrentLoop)->second ;
382+ }
362383
363- auto LoopI = I;
364- while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
365- LoopI++;
366- if (getAdjustedLoop (cast<RegionNode>(*LoopI)) == CurrentLoop) {
367- --BlockCount;
368- Order.push_back (*LoopI);
384+ // Keep processing loops while only going deeper (into inner loops).
385+ do {
386+ assert (I != POT.rend ());
387+ RegionNode *RN = *I++;
388+
389+ Loop *L = getAdjustedLoop (RN);
390+ if (L != CurrentLoop) {
391+ // If L is a loop inside CurrentLoop, then CurrentLoop must be the
392+ // parent of L.
393+ // To prove this, we will contradict the opposite:
394+ // Let P be the parent of L. If CurrentLoop is the parent of P, then
395+ // the header of P must have been processed already, as it must
396+ // dominate the other blocks of P (otherwise P is an irreducible loop,
397+ // and won't be recorded in the LoopInfo), especially L (inside). But
398+ // then CurrentLoop must have been updated to P at the time of
399+ // processing the header of P, which conflicts with the assumption
400+ // that CurrentLoop is not P.
401+
402+ // If L is not a loop inside CurrentLoop, then skip RN.
403+ if (!L || L->getParentLoop () != CurrentLoop) {
404+ // Skip the node by pushing it to the end of the "skipped" list.
405+ *SkippedEnd++ = RN;
406+ continue ;
369407 }
408+
409+ // If we still haven't processed all the nodes that belong to
410+ // CurrentLoop, then make sure we come back later, to finish the job, by
411+ // pushing it to the WorkList.
412+ if (*CurrentLoopSize)
413+ WorkList.push_back (CurrentLoop);
414+
415+ CurrentLoop = L;
416+ CurrentLoopSize = &LoopSizes.find (CurrentLoop)->second ;
370417 }
371- }
372418
373- CurrentLoop = getAdjustedLoop (RN);
374- if (CurrentLoop)
375- LoopBlocks[CurrentLoop]--;
419+ assert (O != OE);
420+ *O++ = RN;
376421
377- CurrentLoopDepth = LoopDepth;
378- Order.push_back (*I);
422+ // If we have finished processing the current loop, then we are done here.
423+ --*CurrentLoopSize;
424+ } while (*CurrentLoopSize);
379425 }
380-
381- // This pass originally used a post-order traversal and then operated on
382- // the list in reverse. Now that we are using a reverse post-order traversal
383- // rather than re-working the whole pass to operate on the list in order,
384- // we just reverse the list and continue to operate on it in reverse.
385- std::reverse (Order.begin (), Order.end ());
426+ assert (WorkList.empty ());
427+ assert (SkippedEnd == Begin);
386428}
387429
388430// / Determine the end of the loops
0 commit comments