From f6d6b0c96d86541e8fb69d133ff6222a038e5a53 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Thu, 26 Nov 2020 01:10:43 -0800
Subject: [PATCH 1/8] rustc_query_system: share previous graph data with
 current graph

Reduce memory consumption by taking advantage of red/green algorithm
properties to share the previous dependency graph's node data with the
current graph instead of storing node data redundantly. Red nodes can
share the `DepNode`, and green nodes can share the `DepNode` and
`Fingerprint`. Edges will be shared when possible in a later change.
---
 .../rustc_query_system/src/dep_graph/graph.rs | 707 ++++++++++++------
 .../rustc_query_system/src/dep_graph/query.rs |   8 +-
 2 files changed, 499 insertions(+), 216 deletions(-)
diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index d2f0e39ea6bbd..3bc93f63905bd 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -68,7 +68,7 @@ struct DepGraphData<K: DepKind> {
     /// The new encoding of the dependency graph, optimized for red/green
     /// tracking. The `current` field is the dependency graph of only the
     /// current compilation session: We don't merge the previous dep-graph into
-    /// current one anymore.
+    /// current one anymore, but we do reference shared data to save space.
     current: CurrentDepGraph<K>,
 
     /// The dep-graph from the previous compilation session. It contains all
@@ -134,16 +134,44 @@ impl<K: DepKind> DepGraph<K> {
     }
 
     pub fn query(&self) -> DepGraphQuery<K> {
-        let data = self.data.as_ref().unwrap().current.data.lock();
-        let nodes: Vec<_> = data.iter().map(|n| n.node).collect();
-        let mut edges = Vec::new();
-        for (from, edge_targets) in data.iter().map(|d| (d.node, &d.edges)) {
-            for &edge_target in edge_targets.iter() {
-                let to = data[edge_target].node;
-                edges.push((from, to));
+        let data = self.data.as_ref().unwrap();
+        let previous = &data.previous;
+        let data = data.current.data.lock();
+
+        let node_count = data.hybrid_indices.len();
+
+        let edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.green.edges.iter().map(|e| e.len()).sum::<usize>();
+
+        let mut nodes = Vec::with_capacity(node_count);
+        let mut edges = Vec::with_capacity(edge_count);
+
+        for (index, &hybrid_index) in data.hybrid_indices.iter_enumerated() {
+            let src = index.index();
+
+            match hybrid_index.into() {
+                HybridIndex::New(new_index) => {
+                    let new = &data.new;
+                    nodes.push(new.nodes[new_index]);
+                    edges.extend(new.edges[new_index].iter().map(|dst| (src, dst.index())));
+                }
+                HybridIndex::Red(red_index) => {
+                    let red = &data.red;
+                    nodes.push(previous.index_to_node(red.node_indices[red_index]));
+                    edges.extend(red.edges[red_index].iter().map(|dst| (src, dst.index())));
+                }
+                HybridIndex::Green(green_index) => {
+                    let green = &data.green;
+                    nodes.push(previous.index_to_node(green.node_indices[green_index]));
+                    edges.extend(green.edges[green_index].iter().map(|dst| (src, dst.index())));
+                }
             }
         }
 
+        debug_assert_eq!(nodes.len(), node_count);
+        debug_assert_eq!(edges.len(), edge_count);
+
         DepGraphQuery::new(&nodes[..], &edges[..])
     }
 
@@ -212,7 +240,6 @@ impl<K: DepKind> DepGraph<K> {
                     phantom_data: PhantomData,
                 })
             },
-            |data, key, fingerprint, task| data.complete_task(key, task.unwrap(), fingerprint),
             hash_result,
         )
     }
@@ -225,12 +252,6 @@ impl<K: DepKind> DepGraph<K> {
         no_tcx: bool,
         task: fn(Ctxt, A) -> R,
         create_task: fn(DepNode<K>) -> Option<TaskDeps<K>>,
-        finish_task_and_alloc_depnode: fn(
-            &CurrentDepGraph<K>,
-            DepNode<K>,
-            Fingerprint,
-            Option<TaskDeps<K>>,
-        ) -> DepNodeIndex,
         hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>,
     ) -> (R, DepNodeIndex) {
         if let Some(ref data) = self.data {
@@ -249,39 +270,54 @@ impl<K: DepKind> DepGraph<K> {
                 K::with_deps(task_deps.as_ref(), || task(cx, arg))
             };
 
-            let current_fingerprint = hash_result(&mut hcx, &result);
+            let edges = task_deps.map_or_else(|| smallvec![], |lock| lock.into_inner().reads);
 
-            let dep_node_index = finish_task_and_alloc_depnode(
-                &data.current,
-                key,
-                current_fingerprint.unwrap_or(Fingerprint::ZERO),
-                task_deps.map(|lock| lock.into_inner()),
-            );
+            let current_fingerprint = hash_result(&mut hcx, &result);
 
             let print_status = cfg!(debug_assertions) && cx.debug_dep_tasks();
 
-            // Determine the color of the new DepNode.
-            if let Some(prev_index) = data.previous.node_to_index_opt(&key) {
-                let prev_fingerprint = data.previous.fingerprint_by_index(prev_index);
-
-                let color = if let Some(current_fingerprint) = current_fingerprint {
-                    if current_fingerprint == prev_fingerprint {
+            // Intern the new `DepNode`.
+            let dep_node_index = if let Some(prev_index) = data.previous.node_to_index_opt(&key) {
+                // Determine the color and index of the new `DepNode`.
+                let (color, dep_node_index) = if let Some(current_fingerprint) = current_fingerprint
+                {
+                    if current_fingerprint == data.previous.fingerprint_by_index(prev_index) {
                         if print_status {
                             eprintln!("[task::green] {:?}", key);
                         }
-                        DepNodeColor::Green(dep_node_index)
+
+                        let dep_node_index =
+                            data.current.intern_green_node(&data.previous, prev_index, edges);
+
+                        (DepNodeColor::Green(dep_node_index), dep_node_index)
                     } else {
                         if print_status {
                             eprintln!("[task::red] {:?}", key);
                         }
-                        DepNodeColor::Red
+
+                        let dep_node_index = data.current.intern_red_node(
+                            &data.previous,
+                            prev_index,
+                            edges,
+                            current_fingerprint,
+                        );
+
+                        (DepNodeColor::Red, dep_node_index)
                     }
                 } else {
                     if print_status {
                         eprintln!("[task::unknown] {:?}", key);
                     }
+
+                    let dep_node_index = data.current.intern_red_node(
+                        &data.previous,
+                        prev_index,
+                        edges,
+                        Fingerprint::ZERO,
+                    );
+
                     // Mark the node as Red if we can't hash the result
-                    DepNodeColor::Red
+                    (DepNodeColor::Red, dep_node_index)
                 };
 
                 debug_assert!(
@@ -292,9 +328,19 @@ impl<K: DepKind> DepGraph<K> {
                 );
 
                 data.colors.insert(prev_index, color);
-            } else if print_status {
-                eprintln!("[task::new] {:?}", key);
-            }
+                dep_node_index
+            } else {
+                if print_status {
+                    eprintln!("[task::new] {:?}", key);
+                }
+
+                data.current.intern_node(
+                    &data.previous,
+                    key,
+                    edges,
+                    current_fingerprint.unwrap_or(Fingerprint::ZERO),
+                )
+            };
 
             (result, dep_node_index)
         } else {
@@ -308,13 +354,36 @@ impl<K: DepKind> DepGraph<K> {
     where
         OP: FnOnce() -> R,
     {
+        debug_assert!(!dep_kind.is_eval_always());
+
         if let Some(ref data) = self.data {
             let task_deps = Lock::new(TaskDeps::default());
-
             let result = K::with_deps(Some(&task_deps), op);
             let task_deps = task_deps.into_inner();
 
-            let dep_node_index = data.current.complete_anon_task(dep_kind, task_deps);
+            // The dep node indices are hashed here instead of hashing the dep nodes of the
+            // dependencies. These indices may refer to different nodes per session, but this isn't
+            // a problem here because we that ensure the final dep node hash is per session only by
+            // combining it with the per session random number `anon_id_seed`. This hash only need
+            // to map the dependencies to a single value on a per session basis.
+            let mut hasher = StableHasher::new();
+            task_deps.reads.hash(&mut hasher);
+
+            let target_dep_node = DepNode {
+                kind: dep_kind,
+                // Fingerprint::combine() is faster than sending Fingerprint
+                // through the StableHasher (at least as long as StableHasher
+                // is so slow).
+                hash: data.current.anon_id_seed.combine(hasher.finish()).into(),
+            };
+
+            let dep_node_index = data.current.intern_node(
+                &data.previous,
+                target_dep_node,
+                task_deps.reads,
+                Fingerprint::ZERO,
+            );
+
             (result, dep_node_index)
         } else {
             (op(), self.next_virtual_depnode_index())
@@ -331,69 +400,104 @@ impl<K: DepKind> DepGraph<K> {
         task: fn(Ctxt, A) -> R,
         hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>,
     ) -> (R, DepNodeIndex) {
-        self.with_task_impl(
-            key,
-            cx,
-            arg,
-            false,
-            task,
-            |_| None,
-            |data, key, fingerprint, _| data.alloc_node(key, smallvec![], fingerprint),
-            hash_result,
-        )
+        self.with_task_impl(key, cx, arg, false, task, |_| None, hash_result)
     }
 
     #[inline]
-    pub fn read(&self, v: DepNode<K>) {
+    pub fn read_index(&self, dep_node_index: DepNodeIndex) {
         if let Some(ref data) = self.data {
-            let map = data.current.node_to_node_index.get_shard_by_value(&v).lock();
-            if let Some(dep_node_index) = map.get(&v).copied() {
-                std::mem::drop(map);
-                data.read_index(dep_node_index);
-            } else {
-                panic!("DepKind {:?} should be pre-allocated but isn't.", v.kind)
-            }
+            K::read_deps(|task_deps| {
+                if let Some(task_deps) = task_deps {
+                    let mut task_deps = task_deps.lock();
+                    let task_deps = &mut *task_deps;
+                    if cfg!(debug_assertions) {
+                        data.current.total_read_count.fetch_add(1, Relaxed);
+                    }
+
+                    // As long as we only have a low number of reads we can avoid doing a hash
+                    // insert and potentially allocating/reallocating the hashmap
+                    let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
+                        task_deps.reads.iter().all(|other| *other != dep_node_index)
+                    } else {
+                        task_deps.read_set.insert(dep_node_index)
+                    };
+                    if new_read {
+                        task_deps.reads.push(dep_node_index);
+                        if task_deps.reads.len() == TASK_DEPS_READS_CAP {
+                            // Fill `read_set` with what we have so far so we can use the hashset
+                            // next time
+                            task_deps.read_set.extend(task_deps.reads.iter().copied());
+                        }
+
+                        #[cfg(debug_assertions)]
+                        {
+                            if let Some(target) = task_deps.node {
+                                if let Some(ref forbidden_edge) = data.current.forbidden_edge {
+                                    let src = self.dep_node_of(dep_node_index);
+                                    if forbidden_edge.test(&src, &target) {
+                                        panic!("forbidden edge {:?} -> {:?} created", src, target)
+                                    }
+                                }
+                            }
+                        }
+                    } else if cfg!(debug_assertions) {
+                        data.current.total_duplicate_read_count.fetch_add(1, Relaxed);
+                    }
+                }
+            })
         }
     }
 
     #[inline]
-    pub fn read_index(&self, dep_node_index: DepNodeIndex) {
-        if let Some(ref data) = self.data {
-            data.read_index(dep_node_index);
-        }
+    pub fn dep_node_index_of(&self, dep_node: &DepNode<K>) -> DepNodeIndex {
+        self.dep_node_index_of_opt(dep_node).unwrap()
     }
 
     #[inline]
-    pub fn dep_node_index_of(&self, dep_node: &DepNode<K>) -> DepNodeIndex {
-        self.data
-            .as_ref()
-            .unwrap()
-            .current
-            .node_to_node_index
-            .get_shard_by_value(dep_node)
-            .lock()
-            .get(dep_node)
-            .cloned()
-            .unwrap()
+    pub fn dep_node_index_of_opt(&self, dep_node: &DepNode<K>) -> Option<DepNodeIndex> {
+        let data = self.data.as_ref().unwrap();
+        let current = &data.current;
+
+        if let Some(prev_index) = data.previous.node_to_index_opt(dep_node) {
+            current.prev_index_to_index.lock()[prev_index]
+        } else {
+            current.new_node_to_index.get_shard_by_value(dep_node).lock().get(dep_node).copied()
+        }
     }
 
     #[inline]
     pub fn dep_node_exists(&self, dep_node: &DepNode<K>) -> bool {
-        if let Some(ref data) = self.data {
-            data.current
-                .node_to_node_index
-                .get_shard_by_value(&dep_node)
-                .lock()
-                .contains_key(dep_node)
-        } else {
-            false
+        self.data.is_some() && self.dep_node_index_of_opt(dep_node).is_some()
+    }
+
+    #[inline]
+    pub fn dep_node_of(&self, dep_node_index: DepNodeIndex) -> DepNode<K> {
+        let data = self.data.as_ref().unwrap();
+        let previous = &data.previous;
+        let data = data.current.data.lock();
+
+        match data.hybrid_indices[dep_node_index].into() {
+            HybridIndex::New(new_index) => data.new.nodes[new_index],
+            HybridIndex::Red(red_index) => previous.index_to_node(data.red.node_indices[red_index]),
+            HybridIndex::Green(green_index) => {
+                previous.index_to_node(data.green.node_indices[green_index])
+            }
         }
     }
 
     #[inline]
     pub fn fingerprint_of(&self, dep_node_index: DepNodeIndex) -> Fingerprint {
-        let data = self.data.as_ref().expect("dep graph enabled").current.data.lock();
-        data[dep_node_index].fingerprint
+        let data = self.data.as_ref().unwrap();
+        let previous = &data.previous;
+        let data = data.current.data.lock();
+
+        match data.hybrid_indices[dep_node_index].into() {
+            HybridIndex::New(new_index) => data.new.fingerprints[new_index],
+            HybridIndex::Red(red_index) => data.red.fingerprints[red_index],
+            HybridIndex::Green(green_index) => {
+                previous.fingerprint_by_index(data.green.node_indices[green_index])
+            }
+        }
     }
 
     pub fn prev_fingerprint_of(&self, dep_node: &DepNode<K>) -> Option<Fingerprint> {
@@ -444,29 +548,62 @@ impl<K: DepKind> DepGraph<K> {
     }
 
     pub fn serialize(&self) -> SerializedDepGraph<K> {
-        let data = self.data.as_ref().unwrap().current.data.lock();
+        type SDNI = SerializedDepNodeIndex;
 
-        let fingerprints: IndexVec<SerializedDepNodeIndex, _> =
-            data.iter().map(|d| d.fingerprint).collect();
-        let nodes: IndexVec<SerializedDepNodeIndex, _> = data.iter().map(|d| d.node).collect();
+        let data = self.data.as_ref().unwrap();
+        let previous = &data.previous;
+        let data = data.current.data.lock();
 
-        let total_edge_count: usize = data.iter().map(|d| d.edges.len()).sum();
+        let node_count = data.hybrid_indices.len();
 
-        let mut edge_list_indices = IndexVec::with_capacity(nodes.len());
-        let mut edge_list_data = Vec::with_capacity(total_edge_count);
+        let edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.green.edges.iter().map(|e| e.len()).sum::<usize>();
 
-        for (current_dep_node_index, edges) in data.iter_enumerated().map(|(i, d)| (i, &d.edges)) {
+        let mut nodes = IndexVec::with_capacity(node_count);
+        let mut fingerprints = IndexVec::with_capacity(node_count);
+        let mut edge_list_indices = IndexVec::with_capacity(node_count);
+        let mut edge_list_data = Vec::with_capacity(edge_count);
+
+        fn add_edges<'a, I: Iterator<Item = &'a DepNodeIndex>>(
+            edge_list_indices: &mut IndexVec<SerializedDepNodeIndex, (u32, u32)>,
+            edge_list_data: &mut Vec<SerializedDepNodeIndex>,
+            iter: I,
+        ) {
             let start = edge_list_data.len() as u32;
-            // This should really just be a memcpy :/
-            edge_list_data.extend(edges.iter().map(|i| SerializedDepNodeIndex::new(i.index())));
+            edge_list_data.extend(iter.map(|i| SDNI::new(i.index())));
             let end = edge_list_data.len() as u32;
-
-            debug_assert_eq!(current_dep_node_index.index(), edge_list_indices.len());
             edge_list_indices.push((start, end));
+        };
+
+        for &hybrid_index in data.hybrid_indices.iter() {
+            match hybrid_index.into() {
+                HybridIndex::New(i) => {
+                    let new = &data.new;
+                    nodes.push(new.nodes[i]);
+                    fingerprints.push(new.fingerprints[i]);
+                    add_edges(&mut edge_list_indices, &mut edge_list_data, new.edges[i].iter());
+                }
+                HybridIndex::Red(i) => {
+                    let red = &data.red;
+                    nodes.push(previous.index_to_node(red.node_indices[i]));
+                    fingerprints.push(red.fingerprints[i]);
+                    add_edges(&mut edge_list_indices, &mut edge_list_data, red.edges[i].iter());
+                }
+                HybridIndex::Green(i) => {
+                    let green = &data.green;
+                    nodes.push(previous.index_to_node(green.node_indices[i]));
+                    fingerprints.push(previous.fingerprint_by_index(green.node_indices[i]));
+                    add_edges(&mut edge_list_indices, &mut edge_list_data, green.edges[i].iter());
+                }
+            }
         }
 
+        debug_assert_eq!(nodes.len(), node_count);
+        debug_assert_eq!(fingerprints.len(), node_count);
+        debug_assert_eq!(edge_list_indices.len(), node_count);
+        debug_assert_eq!(edge_list_data.len(), edge_count);
         debug_assert!(edge_list_data.len() <= u32::MAX as usize);
-        debug_assert_eq!(edge_list_data.len(), total_edge_count);
 
         SerializedDepGraph { nodes, fingerprints, edge_list_indices, edge_list_data }
     }
@@ -540,14 +677,7 @@ impl<K: DepKind> DepGraph<K> {
 
         #[cfg(not(parallel_compiler))]
         {
-            debug_assert!(
-                !data
-                    .current
-                    .node_to_node_index
-                    .get_shard_by_value(dep_node)
-                    .lock()
-                    .contains_key(dep_node)
-            );
+            debug_assert!(!self.dep_node_exists(dep_node));
             debug_assert!(data.colors.get(prev_dep_node_index).is_none());
         }
 
@@ -690,13 +820,9 @@ impl<K: DepKind> DepGraph<K> {
         // There may be multiple threads trying to mark the same dep node green concurrently
 
         let dep_node_index = {
-            // Copy the fingerprint from the previous graph,
-            // so we don't have to recompute it
-            let fingerprint = data.previous.fingerprint_by_index(prev_dep_node_index);
-
             // We allocating an entry for the node in the current dependency graph and
             // adding all the appropriate edges imported from the previous graph
-            data.current.intern_node(*dep_node, current_deps, fingerprint)
+            data.current.intern_green_node(&data.previous, prev_dep_node_index, current_deps)
         };
 
         // ... emitting any stored diagnostic ...
@@ -871,31 +997,181 @@ pub struct WorkProduct {
     pub saved_file: Option<String>,
 }
 
-#[derive(Clone)]
+// The maximum value of the follow index types leaves the upper two bits unused
+// so that we can store multiple index types in `CompressedHybridIndex`, and use
+// those bits to encode which index type it contains.
+
+// Index type for `NewDepNodeData`.
+rustc_index::newtype_index! {
+    struct NewDepNodeIndex {
+        MAX = 0x7FFF_FFFF
+    }
+}
+
+// Index type for `RedDepNodeData`.
+rustc_index::newtype_index! {
+    struct RedDepNodeIndex {
+        MAX = 0x7FFF_FFFF
+    }
+}
+
+// Index type for `GreenDepNodeData`.
+rustc_index::newtype_index! {
+    struct GreenDepNodeIndex {
+        MAX = 0x7FFF_FFFF
+    }
+}
+
+/// Compressed representation of `HybridIndex` enum. Bits unused by the
+/// contained index types are used to encode which index type it contains.
+#[derive(Copy, Clone)]
+struct CompressedHybridIndex(u32);
+
+impl CompressedHybridIndex {
+    const NEW_TAG: u32 = 0b0000_0000_0000_0000_0000_0000_0000_0000;
+    const RED_TAG: u32 = 0b0100_0000_0000_0000_0000_0000_0000_0000;
+    const GREEN_TAG: u32 = 0b1000_0000_0000_0000_0000_0000_0000_0000;
+
+    const TAG_MASK: u32 = 0b1100_0000_0000_0000_0000_0000_0000_0000;
+    const INDEX_MASK: u32 = !Self::TAG_MASK;
+}
+
+impl From<NewDepNodeIndex> for CompressedHybridIndex {
+    #[inline]
+    fn from(index: NewDepNodeIndex) -> Self {
+        CompressedHybridIndex(Self::NEW_TAG | index.as_u32())
+    }
+}
+
+impl From<RedDepNodeIndex> for CompressedHybridIndex {
+    #[inline]
+    fn from(index: RedDepNodeIndex) -> Self {
+        CompressedHybridIndex(Self::RED_TAG | index.as_u32())
+    }
+}
+
+impl From<GreenDepNodeIndex> for CompressedHybridIndex {
+    #[inline]
+    fn from(index: GreenDepNodeIndex) -> Self {
+        CompressedHybridIndex(Self::GREEN_TAG | index.as_u32())
+    }
+}
+
+/// Contains an index into one of several node data collections. Elsewhere, we
+/// store `CompressedHyridIndex` instead of this to save space, but convert to
+/// this type during processing to take advantage of the enum match ergonomics.
+enum HybridIndex {
+    New(NewDepNodeIndex),
+    Red(RedDepNodeIndex),
+    Green(GreenDepNodeIndex),
+}
+
+impl From<CompressedHybridIndex> for HybridIndex {
+    #[inline]
+    fn from(hybrid_index: CompressedHybridIndex) -> Self {
+        let index = hybrid_index.0 & CompressedHybridIndex::INDEX_MASK;
+
+        match hybrid_index.0 & CompressedHybridIndex::TAG_MASK {
+            CompressedHybridIndex::NEW_TAG => HybridIndex::New(NewDepNodeIndex::from_u32(index)),
+            CompressedHybridIndex::RED_TAG => HybridIndex::Red(RedDepNodeIndex::from_u32(index)),
+            CompressedHybridIndex::GREEN_TAG => {
+                HybridIndex::Green(GreenDepNodeIndex::from_u32(index))
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// Data for nodes in the current graph, divided into different collections
+/// based on their presence in the previous graph, and if present, their color.
+/// We divide nodes this way because different types of nodes are able to share
+/// more or less data with the previous graph.
+///
+/// Node data is stored in parallel vectors to eliminate the padding between
+/// elements that would be needed to satisfy alignment requirements of the
+/// structure that would contain all of a node's data. We could group tightly
+/// packing subsets of node data together and use fewer vectors, but for
+/// consistency's sake, we use separate vectors for each piece of data.
 struct DepNodeData<K> {
-    node: DepNode<K>,
-    edges: EdgesVec,
-    fingerprint: Fingerprint,
+    /// Data for nodes not in previous graph.
+    new: NewDepNodeData<K>,
+
+    /// Data for nodes in previous graph that have been marked red.
+    red: RedDepNodeData,
+
+    /// Data for nodes in previous graph that have been marked green.
+    green: GreenDepNodeData,
+
+    /// Mapping from `DepNodeIndex` to an index into a collection above.
+    /// Indicates which of the above collections contains a node's data.
+    ///
+    /// This collection is wasteful in time and space during incr-full builds,
+    /// because for those, all nodes are new. However, the waste is relatively
+    /// small, and the maintenance cost of avoiding using this for incr-full
+    /// builds is somewhat high and prone to bugginess. It does not seem worth
+    /// it at the time of this writing, but we may want to revisit the idea.
+    hybrid_indices: IndexVec<DepNodeIndex, CompressedHybridIndex>,
+}
+
+/// Data for nodes not in previous graph. Since we cannot share any data with
+/// the previous graph, so we must store all of such a node's data here.
+struct NewDepNodeData<K> {
+    nodes: IndexVec<NewDepNodeIndex, DepNode<K>>,
+    edges: IndexVec<NewDepNodeIndex, EdgesVec>,
+    fingerprints: IndexVec<NewDepNodeIndex, Fingerprint>,
+}
+
+/// Data for nodes in previous graph that have been marked red. We can share the
+/// dep node with the previous graph, but the edges may be different, and the
+/// fingerprint is known to be different, so we store the latter two directly.
+struct RedDepNodeData {
+    node_indices: IndexVec<RedDepNodeIndex, SerializedDepNodeIndex>,
+    edges: IndexVec<RedDepNodeIndex, EdgesVec>,
+    fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>,
+}
+
+/// Data for nodes in previous graph that have been marked green. We can share
+/// both the dep node and the fingerprint with previous graph, but the edges may
+/// be different, so we store the latter directly.
+struct GreenDepNodeData {
+    node_indices: IndexVec<GreenDepNodeIndex, SerializedDepNodeIndex>,
+    edges: IndexVec<GreenDepNodeIndex, EdgesVec>,
 }
 
-/// `CurrentDepGraph` stores the dependency graph for the current session.
-/// It will be populated as we run queries or tasks.
+/// `CurrentDepGraph` stores the dependency graph for the current session. It
+/// will be populated as we run queries or tasks. We never remove nodes from the
+/// graph: they are only added.
 ///
-/// The nodes in it are identified by an index (`DepNodeIndex`).
-/// The data for each node is stored in its `DepNodeData`, found in the `data` field.
+/// The nodes in it are identified by a `DepNodeIndex`. Internally, this maps to
+/// a `HybridIndex`, which identifies which collection in the `data` field
+/// contains a node's data. Which collection is used for a node depends on
+/// whether the node was present in the `PreviousDepGraph`, and if so, the color
+/// of the node. Each type of node can share more or less data with the previous
+/// graph. When possible, we can store just the index of the node in the
+/// previous graph, rather than duplicating its data in our own collections.
+/// This is important, because these graph structures are some of the largest in
+/// the compiler.
 ///
-/// We never remove nodes from the graph: they are only added.
+/// For the same reason, we also avoid storing `DepNode`s more than once as map
+/// keys. The `new_node_to_index` map only contains nodes not in the previous
+/// graph, and we map nodes in the previous graph to indices via a two-step
+/// mapping. `PreviousDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`,
+/// and the `prev_index_to_index` vector (which is more compact and faster than
+/// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`.
 ///
-/// This struct uses two locks internally. The `data` and `node_to_node_index` fields are
-/// locked separately. Operations that take a `DepNodeIndex` typically just access
-/// the data field.
+/// This struct uses three locks internally. The `data`, `new_node_to_index`,
+/// and `prev_index_to_index` fields are locked separately. Operations that take
+/// a `DepNodeIndex` typically just access the `data` field.
 ///
-/// The only operation that must manipulate both locks is adding new nodes, in which case
-/// we first acquire the `node_to_node_index` lock and then, once a new node is to be inserted,
-/// acquire the lock on `data.`
+/// We only need to manipulate at most two locks simultaneously:
+/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. The
+/// only operation that must manipulate both locks is adding new nodes, in which
+/// case we first acquire the `new_node_to_index` or `prev_index_to_index` lock
+/// and then, once a new node is to be inserted, acquire the lock on `data`.
 pub(super) struct CurrentDepGraph<K> {
-    data: Lock<IndexVec<DepNodeIndex, DepNodeData<K>>>,
-    node_to_node_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
+    data: Lock<DepNodeData<K>>,
+    new_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
+    prev_index_to_index: Lock<IndexVec<SerializedDepNodeIndex, Option<DepNodeIndex>>>,
 
     /// Used to trap when a specific edge is added to the graph.
     /// This is used for debug purposes and is only active with `debug_assertions`.
@@ -944,18 +1220,46 @@ impl<K: DepKind> CurrentDepGraph<K> {
 
         // Pre-allocate the dep node structures. We over-allocate a little so
         // that we hopefully don't have to re-allocate during this compilation
-        // session. The over-allocation is 2% plus a small constant to account
-        // for the fact that in very small crates 2% might not be enough.
-        let new_node_count_estimate = (prev_graph_node_count * 102) / 100 + 200;
+        // session. The over-allocation for new nodes is 2% plus a small
+        // constant to account for the fact that in very small crates 2% might
+        // not be enough. The allocation for red and green node data doesn't
+        // include a constant, as we don't want to allocate anything for these
+        // structures during full incremental builds, where they aren't used.
+        let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200;
+        let red_node_count_estimate = (prev_graph_node_count * 3) / 100;
+        let green_node_count_estimate = (prev_graph_node_count * 95) / 100;
+        let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate;
+
+        // We store a large collection of these in `prev_index_to_index` during
+        // non-full incremental builds, and want to ensure that the element size
+        // doesn't inadvertently increase.
+        static_assert_size!(Option<DepNodeIndex>, 4);
 
         CurrentDepGraph {
-            data: Lock::new(IndexVec::with_capacity(new_node_count_estimate)),
-            node_to_node_index: Sharded::new(|| {
+            data: Lock::new(DepNodeData {
+                new: NewDepNodeData {
+                    nodes: IndexVec::with_capacity(new_node_count_estimate),
+                    edges: IndexVec::with_capacity(new_node_count_estimate),
+                    fingerprints: IndexVec::with_capacity(new_node_count_estimate),
+                },
+                red: RedDepNodeData {
+                    node_indices: IndexVec::with_capacity(red_node_count_estimate),
+                    edges: IndexVec::with_capacity(red_node_count_estimate),
+                    fingerprints: IndexVec::with_capacity(red_node_count_estimate),
+                },
+                green: GreenDepNodeData {
+                    node_indices: IndexVec::with_capacity(green_node_count_estimate),
+                    edges: IndexVec::with_capacity(green_node_count_estimate),
+                },
+                hybrid_indices: IndexVec::with_capacity(total_node_count_estimate),
+            }),
+            new_node_to_index: Sharded::new(|| {
                 FxHashMap::with_capacity_and_hasher(
                     new_node_count_estimate / sharded::SHARDS,
                     Default::default(),
                 )
             }),
+            prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)),
             anon_id_seed: stable_hasher.finish(),
             forbidden_edge,
             total_read_count: AtomicU64::new(0),
@@ -963,113 +1267,92 @@ impl<K: DepKind> CurrentDepGraph<K> {
         }
     }
 
-    fn complete_task(
-        &self,
-        node: DepNode<K>,
-        task_deps: TaskDeps<K>,
-        fingerprint: Fingerprint,
-    ) -> DepNodeIndex {
-        self.alloc_node(node, task_deps.reads, fingerprint)
-    }
-
-    fn complete_anon_task(&self, kind: K, task_deps: TaskDeps<K>) -> DepNodeIndex {
-        debug_assert!(!kind.is_eval_always());
-
-        let mut hasher = StableHasher::new();
-
-        // The dep node indices are hashed here instead of hashing the dep nodes of the
-        // dependencies. These indices may refer to different nodes per session, but this isn't
-        // a problem here because we that ensure the final dep node hash is per session only by
-        // combining it with the per session random number `anon_id_seed`. This hash only need
-        // to map the dependencies to a single value on a per session basis.
-        task_deps.reads.hash(&mut hasher);
-
-        let target_dep_node = DepNode {
-            kind,
-
-            // Fingerprint::combine() is faster than sending Fingerprint
-            // through the StableHasher (at least as long as StableHasher
-            // is so slow).
-            hash: self.anon_id_seed.combine(hasher.finish()).into(),
-        };
-
-        self.intern_node(target_dep_node, task_deps.reads, Fingerprint::ZERO)
-    }
-
-    fn alloc_node(
+    fn intern_node(
         &self,
+        prev_graph: &PreviousDepGraph<K>,
         dep_node: DepNode<K>,
         edges: EdgesVec,
         fingerprint: Fingerprint,
     ) -> DepNodeIndex {
         debug_assert!(
-            !self.node_to_node_index.get_shard_by_value(&dep_node).lock().contains_key(&dep_node)
+            prev_graph.node_to_index_opt(&dep_node).is_none(),
+            "node in previous graph should be interned using \
+            `intern_red_node` or `intern_green_node`"
         );
-        self.intern_node(dep_node, edges, fingerprint)
+
+        match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let mut data = self.data.lock();
+                let new_index = data.new.nodes.push(dep_node);
+                data.new.edges.push(edges);
+                data.new.fingerprints.push(fingerprint);
+                let dep_node_index = data.hybrid_indices.push(new_index.into());
+                entry.insert(dep_node_index);
+                dep_node_index
+            }
+        }
     }
 
-    fn intern_node(
+    fn intern_red_node(
         &self,
-        dep_node: DepNode<K>,
+        prev_graph: &PreviousDepGraph<K>,
+        prev_index: SerializedDepNodeIndex,
         edges: EdgesVec,
         fingerprint: Fingerprint,
     ) -> DepNodeIndex {
-        match self.node_to_node_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
+        self.debug_assert_not_in_new_nodes(prev_graph, prev_index);
+
+        let mut prev_index_to_index = self.prev_index_to_index.lock();
+
+        match prev_index_to_index[prev_index] {
+            Some(dep_node_index) => dep_node_index,
+            None => {
                 let mut data = self.data.lock();
-                let dep_node_index = DepNodeIndex::new(data.len());
-                data.push(DepNodeData { node: dep_node, edges, fingerprint });
-                entry.insert(dep_node_index);
+                let red_index = data.red.node_indices.push(prev_index);
+                data.red.edges.push(edges);
+                data.red.fingerprints.push(fingerprint);
+                let dep_node_index = data.hybrid_indices.push(red_index.into());
+                prev_index_to_index[prev_index] = Some(dep_node_index);
                 dep_node_index
             }
         }
     }
-}
 
-impl<K: DepKind> DepGraphData<K> {
-    #[inline(never)]
-    fn read_index(&self, source: DepNodeIndex) {
-        K::read_deps(|task_deps| {
-            if let Some(task_deps) = task_deps {
-                let mut task_deps = task_deps.lock();
-                let task_deps = &mut *task_deps;
-                if cfg!(debug_assertions) {
-                    self.current.total_read_count.fetch_add(1, Relaxed);
-                }
+    fn intern_green_node(
+        &self,
+        prev_graph: &PreviousDepGraph<K>,
+        prev_index: SerializedDepNodeIndex,
+        edges: EdgesVec,
+    ) -> DepNodeIndex {
+        self.debug_assert_not_in_new_nodes(prev_graph, prev_index);
 
-                // As long as we only have a low number of reads we can avoid doing a hash
-                // insert and potentially allocating/reallocating the hashmap
-                let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
-                    task_deps.reads.iter().all(|other| *other != source)
-                } else {
-                    task_deps.read_set.insert(source)
-                };
-                if new_read {
-                    task_deps.reads.push(source);
-                    if task_deps.reads.len() == TASK_DEPS_READS_CAP {
-                        // Fill `read_set` with what we have so far so we can use the hashset next
-                        // time
-                        task_deps.read_set.extend(task_deps.reads.iter().copied());
-                    }
+        let mut prev_index_to_index = self.prev_index_to_index.lock();
 
-                    #[cfg(debug_assertions)]
-                    {
-                        if let Some(target) = task_deps.node {
-                            let data = self.current.data.lock();
-                            if let Some(ref forbidden_edge) = self.current.forbidden_edge {
-                                let source = data[source].node;
-                                if forbidden_edge.test(&source, &target) {
-                                    panic!("forbidden edge {:?} -> {:?} created", source, target)
-                                }
-                            }
-                        }
-                    }
-                } else if cfg!(debug_assertions) {
-                    self.current.total_duplicate_read_count.fetch_add(1, Relaxed);
-                }
+        match prev_index_to_index[prev_index] {
+            Some(dep_node_index) => dep_node_index,
+            None => {
+                let mut data = self.data.lock();
+                let green_index = data.green.node_indices.push(prev_index);
+                data.green.edges.push(edges);
+                let dep_node_index = data.hybrid_indices.push(green_index.into());
+                prev_index_to_index[prev_index] = Some(dep_node_index);
+                dep_node_index
             }
-        })
+        }
+    }
+
+    #[inline]
+    fn debug_assert_not_in_new_nodes(
+        &self,
+        prev_graph: &PreviousDepGraph<K>,
+        prev_index: SerializedDepNodeIndex,
+    ) {
+        let node = &prev_graph.index_to_node(prev_index);
+        debug_assert!(
+            !self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node),
+            "node from previous graph present in new node collection"
+        );
     }
 }
 
diff --git a/compiler/rustc_query_system/src/dep_graph/query.rs b/compiler/rustc_query_system/src/dep_graph/query.rs
index a27b716b95aee..82e99d64afeb6 100644
--- a/compiler/rustc_query_system/src/dep_graph/query.rs
+++ b/compiler/rustc_query_system/src/dep_graph/query.rs
@@ -9,16 +9,16 @@ pub struct DepGraphQuery<K> {
 }
 
 impl<K: DepKind> DepGraphQuery<K> {
-    pub fn new(nodes: &[DepNode<K>], edges: &[(DepNode<K>, DepNode<K>)]) -> DepGraphQuery<K> {
+    pub fn new(nodes: &[DepNode<K>], edges: &[(usize, usize)]) -> DepGraphQuery<K> {
         let mut graph = Graph::with_capacity(nodes.len(), edges.len());
         let mut indices = FxHashMap::default();
         for node in nodes {
             indices.insert(*node, graph.add_node(*node));
         }
 
-        for &(ref source, ref target) in edges {
-            let source = indices[source];
-            let target = indices[target];
+        for &(source, target) in edges {
+            let source = indices[&nodes[source]];
+            let target = indices[&nodes[target]];
             graph.add_edge(source, target, ());
         }
 

From ea47269f5f50cd7804141ba00811b79be1e156c6 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Sat, 28 Nov 2020 17:42:41 -0800
Subject: [PATCH 2/8] rustc_query_system: share previous graph edges with
 current graph

Reduce memory consumption by sharing the previous dependency graph's
edges with the current graph when it is known to be valid to do so. It
is known to be valid whenever we mark a node green because all of its
dependencies were green. It is *not* known to be valid when we mark a
node green because we re-executed its query and its result was the same
as in the previous compilation session. In that case, the dependency set
might have changed (we don't try to determine whether or not it changed
and whether or not we can share).
---
 .../rustc_query_system/src/dep_graph/graph.rs | 217 ++++++++++++------
 .../src/dep_graph/serialized.rs               |   7 +-
 2 files changed, 159 insertions(+), 65 deletions(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 3bc93f63905bd..24508a5237369 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -134,16 +134,14 @@ impl<K: DepKind> DepGraph<K> {
     }
 
     pub fn query(&self) -> DepGraphQuery<K> {
+        // We call this before acquiring locks, since it also acquires them.
+        let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
+        let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
-
         let node_count = data.hybrid_indices.len();
 
-        let edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.green.edges.iter().map(|e| e.len()).sum::<usize>();
-
         let mut nodes = Vec::with_capacity(node_count);
         let mut edges = Vec::with_capacity(edge_count);
 
@@ -161,10 +159,18 @@ impl<K: DepKind> DepGraph<K> {
                     nodes.push(previous.index_to_node(red.node_indices[red_index]));
                     edges.extend(red.edges[red_index].iter().map(|dst| (src, dst.index())));
                 }
-                HybridIndex::Green(green_index) => {
-                    let green = &data.green;
-                    nodes.push(previous.index_to_node(green.node_indices[green_index]));
-                    edges.extend(green.edges[green_index].iter().map(|dst| (src, dst.index())));
+                HybridIndex::LightGreen(lg_index) => {
+                    let lg = &data.light_green;
+                    nodes.push(previous.index_to_node(lg.node_indices[lg_index]));
+                    edges.extend(lg.edges[lg_index].iter().map(|dst| (src, dst.index())));
+                }
+                HybridIndex::DarkGreen(prev_index) => {
+                    nodes.push(previous.index_to_node(prev_index));
+                    let edges_iter = previous
+                        .edge_targets_from(prev_index)
+                        .iter()
+                        .map(|&dst| (src, prev_index_to_index[dst].unwrap().index()));
+                    edges.extend(edges_iter);
                 }
             }
         }
@@ -287,7 +293,7 @@ impl<K: DepKind> DepGraph<K> {
                         }
 
                         let dep_node_index =
-                            data.current.intern_green_node(&data.previous, prev_index, edges);
+                            data.current.intern_light_green_node(&data.previous, prev_index, edges);
 
                         (DepNodeColor::Green(dep_node_index), dep_node_index)
                     } else {
@@ -479,9 +485,10 @@ impl<K: DepKind> DepGraph<K> {
         match data.hybrid_indices[dep_node_index].into() {
             HybridIndex::New(new_index) => data.new.nodes[new_index],
             HybridIndex::Red(red_index) => previous.index_to_node(data.red.node_indices[red_index]),
-            HybridIndex::Green(green_index) => {
-                previous.index_to_node(data.green.node_indices[green_index])
+            HybridIndex::LightGreen(light_green_index) => {
+                previous.index_to_node(data.light_green.node_indices[light_green_index])
             }
+            HybridIndex::DarkGreen(prev_index) => previous.index_to_node(prev_index),
         }
     }
 
@@ -494,9 +501,10 @@ impl<K: DepKind> DepGraph<K> {
         match data.hybrid_indices[dep_node_index].into() {
             HybridIndex::New(new_index) => data.new.fingerprints[new_index],
             HybridIndex::Red(red_index) => data.red.fingerprints[red_index],
-            HybridIndex::Green(green_index) => {
-                previous.fingerprint_by_index(data.green.node_indices[green_index])
+            HybridIndex::LightGreen(light_green_index) => {
+                previous.fingerprint_by_index(data.light_green.node_indices[light_green_index])
             }
+            HybridIndex::DarkGreen(prev_index) => previous.fingerprint_by_index(prev_index),
         }
     }
 
@@ -547,19 +555,38 @@ impl<K: DepKind> DepGraph<K> {
         }
     }
 
+    #[inline]
+    fn edge_count(&self) -> usize {
+        let data = self.data.as_ref().unwrap();
+        let previous = &data.previous;
+        let data = data.current.data.lock();
+
+        // Linearly scanning each collection is a bit faster than scanning
+        // `hybrid_indices` and bouncing around the different collections.
+        let mut edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
+            + data.light_green.edges.iter().map(|e| e.len()).sum::<usize>();
+
+        for &hybrid_index in data.hybrid_indices.iter() {
+            if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() {
+                edge_count += previous.edge_targets_from(prev_index).len()
+            }
+        }
+
+        edge_count
+    }
+
     pub fn serialize(&self) -> SerializedDepGraph<K> {
         type SDNI = SerializedDepNodeIndex;
 
+        // We call this before acquiring locks, since it also acquires them.
+        let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
+        let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
-
         let node_count = data.hybrid_indices.len();
 
-        let edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.green.edges.iter().map(|e| e.len()).sum::<usize>();
-
         let mut nodes = IndexVec::with_capacity(node_count);
         let mut fingerprints = IndexVec::with_capacity(node_count);
         let mut edge_list_indices = IndexVec::with_capacity(node_count);
@@ -590,11 +617,20 @@ impl<K: DepKind> DepGraph<K> {
                     fingerprints.push(red.fingerprints[i]);
                     add_edges(&mut edge_list_indices, &mut edge_list_data, red.edges[i].iter());
                 }
-                HybridIndex::Green(i) => {
-                    let green = &data.green;
-                    nodes.push(previous.index_to_node(green.node_indices[i]));
-                    fingerprints.push(previous.fingerprint_by_index(green.node_indices[i]));
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, green.edges[i].iter());
+                HybridIndex::LightGreen(i) => {
+                    let lg = &data.light_green;
+                    nodes.push(previous.index_to_node(lg.node_indices[i]));
+                    fingerprints.push(previous.fingerprint_by_index(lg.node_indices[i]));
+                    add_edges(&mut edge_list_indices, &mut edge_list_data, lg.edges[i].iter());
+                }
+                HybridIndex::DarkGreen(prev_index) => {
+                    nodes.push(previous.index_to_node(prev_index));
+                    fingerprints.push(previous.fingerprint_by_index(prev_index));
+                    let edges_iter = previous
+                        .edge_targets_from(prev_index)
+                        .iter()
+                        .map(|&dst| prev_index_to_index[dst].as_ref().unwrap());
+                    add_edges(&mut edge_list_indices, &mut edge_list_data, edges_iter);
                 }
             }
         }
@@ -688,13 +724,11 @@ impl<K: DepKind> DepGraph<K> {
 
         let prev_deps = data.previous.edge_targets_from(prev_dep_node_index);
 
-        let mut current_deps = SmallVec::new();
-
         for &dep_dep_node_index in prev_deps {
             let dep_dep_node_color = data.colors.get(dep_dep_node_index);
 
             match dep_dep_node_color {
-                Some(DepNodeColor::Green(node_index)) => {
+                Some(DepNodeColor::Green(_)) => {
                     // This dependency has been marked as green before, we are
                     // still fine and can continue with checking the other
                     // dependencies.
@@ -704,7 +738,6 @@ impl<K: DepKind> DepGraph<K> {
                         dep_node,
                         data.previous.index_to_node(dep_dep_node_index)
                     );
-                    current_deps.push(node_index);
                 }
                 Some(DepNodeColor::Red) => {
                     // We found a dependency the value of which has changed
@@ -737,13 +770,12 @@ impl<K: DepKind> DepGraph<K> {
                             dep_dep_node_index,
                             dep_dep_node,
                         );
-                        if let Some(node_index) = node_index {
+                        if node_index.is_some() {
                             debug!(
                                 "try_mark_previous_green({:?}) --- managed to MARK \
                                     dependency {:?} as green",
                                 dep_node, dep_dep_node
                             );
-                            current_deps.push(node_index);
                             continue;
                         }
                     }
@@ -758,13 +790,12 @@ impl<K: DepKind> DepGraph<K> {
                         let dep_dep_node_color = data.colors.get(dep_dep_node_index);
 
                         match dep_dep_node_color {
-                            Some(DepNodeColor::Green(node_index)) => {
+                            Some(DepNodeColor::Green(_)) => {
                                 debug!(
                                     "try_mark_previous_green({:?}) --- managed to \
                                         FORCE dependency {:?} to green",
                                     dep_node, dep_dep_node
                                 );
-                                current_deps.push(node_index);
                             }
                             Some(DepNodeColor::Red) => {
                                 debug!(
@@ -822,7 +853,7 @@ impl<K: DepKind> DepGraph<K> {
         let dep_node_index = {
             // We allocating an entry for the node in the current dependency graph and
             // adding all the appropriate edges imported from the previous graph
-            data.current.intern_green_node(&data.previous, prev_dep_node_index, current_deps)
+            data.current.intern_dark_green_node(&data.previous, prev_dep_node_index)
         };
 
         // ... emitting any stored diagnostic ...
@@ -1015,9 +1046,9 @@ rustc_index::newtype_index! {
     }
 }
 
-// Index type for `GreenDepNodeData`.
+// Index type for `LightGreenDepNodeData`.
 rustc_index::newtype_index! {
-    struct GreenDepNodeIndex {
+    struct LightGreenDepNodeIndex {
         MAX = 0x7FFF_FFFF
     }
 }
@@ -1030,7 +1061,8 @@ struct CompressedHybridIndex(u32);
 impl CompressedHybridIndex {
     const NEW_TAG: u32 = 0b0000_0000_0000_0000_0000_0000_0000_0000;
     const RED_TAG: u32 = 0b0100_0000_0000_0000_0000_0000_0000_0000;
-    const GREEN_TAG: u32 = 0b1000_0000_0000_0000_0000_0000_0000_0000;
+    const LIGHT_GREEN_TAG: u32 = 0b1000_0000_0000_0000_0000_0000_0000_0000;
+    const DARK_GREEN_TAG: u32 = 0b1100_0000_0000_0000_0000_0000_0000_0000;
 
     const TAG_MASK: u32 = 0b1100_0000_0000_0000_0000_0000_0000_0000;
     const INDEX_MASK: u32 = !Self::TAG_MASK;
@@ -1050,10 +1082,17 @@ impl From<RedDepNodeIndex> for CompressedHybridIndex {
     }
 }
 
-impl From<GreenDepNodeIndex> for CompressedHybridIndex {
+impl From<LightGreenDepNodeIndex> for CompressedHybridIndex {
+    #[inline]
+    fn from(index: LightGreenDepNodeIndex) -> Self {
+        CompressedHybridIndex(Self::LIGHT_GREEN_TAG | index.as_u32())
+    }
+}
+
+impl From<SerializedDepNodeIndex> for CompressedHybridIndex {
     #[inline]
-    fn from(index: GreenDepNodeIndex) -> Self {
-        CompressedHybridIndex(Self::GREEN_TAG | index.as_u32())
+    fn from(index: SerializedDepNodeIndex) -> Self {
+        CompressedHybridIndex(Self::DARK_GREEN_TAG | index.as_u32())
     }
 }
 
@@ -1063,7 +1102,8 @@ impl From<GreenDepNodeIndex> for CompressedHybridIndex {
 enum HybridIndex {
     New(NewDepNodeIndex),
     Red(RedDepNodeIndex),
-    Green(GreenDepNodeIndex),
+    LightGreen(LightGreenDepNodeIndex),
+    DarkGreen(SerializedDepNodeIndex),
 }
 
 impl From<CompressedHybridIndex> for HybridIndex {
@@ -1074,8 +1114,11 @@ impl From<CompressedHybridIndex> for HybridIndex {
         match hybrid_index.0 & CompressedHybridIndex::TAG_MASK {
             CompressedHybridIndex::NEW_TAG => HybridIndex::New(NewDepNodeIndex::from_u32(index)),
             CompressedHybridIndex::RED_TAG => HybridIndex::Red(RedDepNodeIndex::from_u32(index)),
-            CompressedHybridIndex::GREEN_TAG => {
-                HybridIndex::Green(GreenDepNodeIndex::from_u32(index))
+            CompressedHybridIndex::LIGHT_GREEN_TAG => {
+                HybridIndex::LightGreen(LightGreenDepNodeIndex::from_u32(index))
+            }
+            CompressedHybridIndex::DARK_GREEN_TAG => {
+                HybridIndex::DarkGreen(SerializedDepNodeIndex::from_u32(index))
             }
             _ => unreachable!(),
         }
@@ -1087,6 +1130,32 @@ impl From<CompressedHybridIndex> for HybridIndex {
 /// We divide nodes this way because different types of nodes are able to share
 /// more or less data with the previous graph.
 ///
+/// To enable more sharing, we distinguish between two kinds of green nodes.
+/// Light green nodes are nodes in the previous graph that have been marked
+/// green because we re-executed their queries and the results were the same as
+/// in the previous session. Dark green nodes are nodes in the previous graph
+/// that have been marked green because we were able to mark all of their
+/// dependencies green.
+///
+/// Both light and dark green nodes can share the dep node and fingerprint with
+/// the previous graph, but for light green nodes, we can't be sure that the
+/// edges may be shared without comparing them against the previous edges, so we
+/// store them directly (an approach in which we compare edges with the previous
+/// edges to see if they can be shared was evaluated, but was not found to be
+/// very profitable).
+///
+/// For dark green nodes, we can share everything with the previous graph, which
+/// is why the `HybridIndex::DarkGreen` enum variant contains the index of the
+/// node in the previous graph, and why we don't have a separate collection for
+/// dark green node data--the collection is the `PreviousDepGraph` itself.
+///
+/// (Note that for dark green nodes, the edges in the previous graph
+/// (`SerializedDepNodeIndex`s) must be converted to edges in the current graph
+/// (`DepNodeIndex`s). `CurrentDepGraph` contains `prev_index_to_index`, which
+/// can perform this conversion. It should always be possible, as by definition,
+/// a dark green node is one whose dependencies from the previous session have
+/// all been marked green--which means `prev_index_to_index` contains them.)
+///
 /// Node data is stored in parallel vectors to eliminate the padding between
 /// elements that would be needed to satisfy alignment requirements of the
 /// structure that would contain all of a node's data. We could group tightly
@@ -1099,8 +1168,8 @@ struct DepNodeData<K> {
     /// Data for nodes in previous graph that have been marked red.
     red: RedDepNodeData,
 
-    /// Data for nodes in previous graph that have been marked green.
-    green: GreenDepNodeData,
+    /// Data for nodes in previous graph that have been marked light green.
+    light_green: LightGreenDepNodeData,
 
     /// Mapping from `DepNodeIndex` to an index into a collection above.
     /// Indicates which of the above collections contains a node's data.
@@ -1130,12 +1199,13 @@ struct RedDepNodeData {
     fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>,
 }
 
-/// Data for nodes in previous graph that have been marked green. We can share
-/// both the dep node and the fingerprint with previous graph, but the edges may
-/// be different, so we store the latter directly.
-struct GreenDepNodeData {
-    node_indices: IndexVec<GreenDepNodeIndex, SerializedDepNodeIndex>,
-    edges: IndexVec<GreenDepNodeIndex, EdgesVec>,
+/// Data for nodes in previous graph that have been marked green because we
+/// re-executed their queries and the results were the same as in the previous
+/// session. We can share the dep node and the fingerprint with the previous
+/// graph, but the edges may be different, so we store them directly.
+struct LightGreenDepNodeData {
+    node_indices: IndexVec<LightGreenDepNodeIndex, SerializedDepNodeIndex>,
+    edges: IndexVec<LightGreenDepNodeIndex, EdgesVec>,
 }
 
 /// `CurrentDepGraph` stores the dependency graph for the current session. It
@@ -1164,10 +1234,9 @@ struct GreenDepNodeData {
 /// a `DepNodeIndex` typically just access the `data` field.
 ///
 /// We only need to manipulate at most two locks simultaneously:
-/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. The
-/// only operation that must manipulate both locks is adding new nodes, in which
-/// case we first acquire the `new_node_to_index` or `prev_index_to_index` lock
-/// and then, once a new node is to be inserted, acquire the lock on `data`.
+/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When
+/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index`
+/// first, and `data` second.
 pub(super) struct CurrentDepGraph<K> {
     data: Lock<DepNodeData<K>>,
     new_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
@@ -1227,7 +1296,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
         // structures during full incremental builds, where they aren't used.
         let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200;
         let red_node_count_estimate = (prev_graph_node_count * 3) / 100;
-        let green_node_count_estimate = (prev_graph_node_count * 95) / 100;
+        let light_green_node_count_estimate = (prev_graph_node_count * 25) / 100;
         let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate;
 
         // We store a large collection of these in `prev_index_to_index` during
@@ -1247,9 +1316,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
                     edges: IndexVec::with_capacity(red_node_count_estimate),
                     fingerprints: IndexVec::with_capacity(red_node_count_estimate),
                 },
-                green: GreenDepNodeData {
-                    node_indices: IndexVec::with_capacity(green_node_count_estimate),
-                    edges: IndexVec::with_capacity(green_node_count_estimate),
+                light_green: LightGreenDepNodeData {
+                    node_indices: IndexVec::with_capacity(light_green_node_count_estimate),
+                    edges: IndexVec::with_capacity(light_green_node_count_estimate),
                 },
                 hybrid_indices: IndexVec::with_capacity(total_node_count_estimate),
             }),
@@ -1276,8 +1345,8 @@ impl<K: DepKind> CurrentDepGraph<K> {
     ) -> DepNodeIndex {
         debug_assert!(
             prev_graph.node_to_index_opt(&dep_node).is_none(),
-            "node in previous graph should be interned using \
-            `intern_red_node` or `intern_green_node`"
+            "node in previous graph should be interned using one \
+            of `intern_red_node`, `intern_light_green_node`, etc."
         );
 
         match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
@@ -1319,7 +1388,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
         }
     }
 
-    fn intern_green_node(
+    fn intern_light_green_node(
         &self,
         prev_graph: &PreviousDepGraph<K>,
         prev_index: SerializedDepNodeIndex,
@@ -1333,9 +1402,29 @@ impl<K: DepKind> CurrentDepGraph<K> {
             Some(dep_node_index) => dep_node_index,
             None => {
                 let mut data = self.data.lock();
-                let green_index = data.green.node_indices.push(prev_index);
-                data.green.edges.push(edges);
-                let dep_node_index = data.hybrid_indices.push(green_index.into());
+                let light_green_index = data.light_green.node_indices.push(prev_index);
+                data.light_green.edges.push(edges);
+                let dep_node_index = data.hybrid_indices.push(light_green_index.into());
+                prev_index_to_index[prev_index] = Some(dep_node_index);
+                dep_node_index
+            }
+        }
+    }
+
+    fn intern_dark_green_node(
+        &self,
+        prev_graph: &PreviousDepGraph<K>,
+        prev_index: SerializedDepNodeIndex,
+    ) -> DepNodeIndex {
+        self.debug_assert_not_in_new_nodes(prev_graph, prev_index);
+
+        let mut prev_index_to_index = self.prev_index_to_index.lock();
+
+        match prev_index_to_index[prev_index] {
+            Some(dep_node_index) => dep_node_index,
+            None => {
+                let mut data = self.data.lock();
+                let dep_node_index = data.hybrid_indices.push(prev_index.into());
                 prev_index_to_index[prev_index] = Some(dep_node_index);
                 dep_node_index
             }
diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs
index 932c6d2a2f184..28e0740691852 100644
--- a/compiler/rustc_query_system/src/dep_graph/serialized.rs
+++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs
@@ -4,8 +4,13 @@ use super::{DepKind, DepNode};
 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_index::vec::IndexVec;
 
+// The maximum value of `SerializedDepNodeIndex` leaves the upper two bits
+// unused so that we can store multiple index types in `CompressedHybridIndex`,
+// and use those bits to encode which index type it contains.
 rustc_index::newtype_index! {
-    pub struct SerializedDepNodeIndex { .. }
+    pub struct SerializedDepNodeIndex {
+        MAX = 0x7FFF_FFFF
+    }
 }
 
 /// Data for use when recompiling the **current crate**.

From dd1ab840d279c96adf5d7778670f14a6398b595a Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Mon, 30 Nov 2020 21:07:08 -0800
Subject: [PATCH 3/8] rustc_query_system: use more space-efficient edges
 representation

Use single vector of edges rather than per-node vector. There is a small
hit to instruction counts (< 0.5%), but the memory savings make up for it.
---
 .../rustc_query_system/src/dep_graph/graph.rs | 137 ++++++++++++------
 .../rustc_query_system/src/dep_graph/query.rs |  18 ++-
 2 files changed, 103 insertions(+), 52 deletions(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 24508a5237369..076dcb4fed1fc 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -15,6 +15,7 @@ use std::env;
 use std::hash::Hash;
 use std::marker::PhantomData;
 use std::mem;
+use std::ops::Range;
 use std::sync::atomic::Ordering::Relaxed;
 
 use super::debug::EdgeFilter;
@@ -143,42 +144,48 @@ impl<K: DepKind> DepGraph<K> {
         let node_count = data.hybrid_indices.len();
 
         let mut nodes = Vec::with_capacity(node_count);
-        let mut edges = Vec::with_capacity(edge_count);
-
-        for (index, &hybrid_index) in data.hybrid_indices.iter_enumerated() {
-            let src = index.index();
+        let mut edge_list_indices = Vec::with_capacity(node_count);
+        let mut edge_list_data = Vec::with_capacity(edge_count);
+        edge_list_data.extend(data.unshared_edges.iter().map(|i| i.index()));
 
+        for &hybrid_index in data.hybrid_indices.iter() {
             match hybrid_index.into() {
                 HybridIndex::New(new_index) => {
-                    let new = &data.new;
-                    nodes.push(new.nodes[new_index]);
-                    edges.extend(new.edges[new_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(data.new.nodes[new_index]);
+                    let edges = &data.new.edges[new_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::Red(red_index) => {
-                    let red = &data.red;
-                    nodes.push(previous.index_to_node(red.node_indices[red_index]));
-                    edges.extend(red.edges[red_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(previous.index_to_node(data.red.node_indices[red_index]));
+                    let edges = &data.red.edges[red_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::LightGreen(lg_index) => {
-                    let lg = &data.light_green;
-                    nodes.push(previous.index_to_node(lg.node_indices[lg_index]));
-                    edges.extend(lg.edges[lg_index].iter().map(|dst| (src, dst.index())));
+                    nodes.push(previous.index_to_node(data.light_green.node_indices[lg_index]));
+                    let edges = &data.light_green.edges[lg_index];
+                    edge_list_indices.push((edges.start.index(), edges.end.index()));
                 }
                 HybridIndex::DarkGreen(prev_index) => {
                     nodes.push(previous.index_to_node(prev_index));
+
                     let edges_iter = previous
                         .edge_targets_from(prev_index)
                         .iter()
-                        .map(|&dst| (src, prev_index_to_index[dst].unwrap().index()));
-                    edges.extend(edges_iter);
+                        .map(|&dst| prev_index_to_index[dst].unwrap().index());
+
+                    let start = edge_list_data.len();
+                    edge_list_data.extend(edges_iter);
+                    let end = edge_list_data.len();
+                    edge_list_indices.push((start, end));
                 }
             }
         }
 
         debug_assert_eq!(nodes.len(), node_count);
-        debug_assert_eq!(edges.len(), edge_count);
+        debug_assert_eq!(edge_list_indices.len(), node_count);
+        debug_assert_eq!(edge_list_data.len(), edge_count);
 
-        DepGraphQuery::new(&nodes[..], &edges[..])
+        DepGraphQuery::new(&nodes[..], &edge_list_indices[..], &edge_list_data[..])
     }
 
     pub fn assert_ignored(&self) {
@@ -561,11 +568,7 @@ impl<K: DepKind> DepGraph<K> {
         let previous = &data.previous;
         let data = data.current.data.lock();
 
-        // Linearly scanning each collection is a bit faster than scanning
-        // `hybrid_indices` and bouncing around the different collections.
-        let mut edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.red.edges.iter().map(|e| e.len()).sum::<usize>()
-            + data.light_green.edges.iter().map(|e| e.len()).sum::<usize>();
+        let mut edge_count = data.unshared_edges.len();
 
         for &hybrid_index in data.hybrid_indices.iter() {
             if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() {
@@ -591,17 +594,7 @@ impl<K: DepKind> DepGraph<K> {
         let mut fingerprints = IndexVec::with_capacity(node_count);
         let mut edge_list_indices = IndexVec::with_capacity(node_count);
         let mut edge_list_data = Vec::with_capacity(edge_count);
-
-        fn add_edges<'a, I: Iterator<Item = &'a DepNodeIndex>>(
-            edge_list_indices: &mut IndexVec<SerializedDepNodeIndex, (u32, u32)>,
-            edge_list_data: &mut Vec<SerializedDepNodeIndex>,
-            iter: I,
-        ) {
-            let start = edge_list_data.len() as u32;
-            edge_list_data.extend(iter.map(|i| SDNI::new(i.index())));
-            let end = edge_list_data.len() as u32;
-            edge_list_indices.push((start, end));
-        };
+        edge_list_data.extend(data.unshared_edges.iter().map(|i| SDNI::new(i.index())));
 
         for &hybrid_index in data.hybrid_indices.iter() {
             match hybrid_index.into() {
@@ -609,28 +602,36 @@ impl<K: DepKind> DepGraph<K> {
                     let new = &data.new;
                     nodes.push(new.nodes[i]);
                     fingerprints.push(new.fingerprints[i]);
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, new.edges[i].iter());
+                    let edges = &new.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::Red(i) => {
                     let red = &data.red;
                     nodes.push(previous.index_to_node(red.node_indices[i]));
                     fingerprints.push(red.fingerprints[i]);
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, red.edges[i].iter());
+                    let edges = &red.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::LightGreen(i) => {
                     let lg = &data.light_green;
                     nodes.push(previous.index_to_node(lg.node_indices[i]));
                     fingerprints.push(previous.fingerprint_by_index(lg.node_indices[i]));
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, lg.edges[i].iter());
+                    let edges = &lg.edges[i];
+                    edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
                 }
                 HybridIndex::DarkGreen(prev_index) => {
                     nodes.push(previous.index_to_node(prev_index));
                     fingerprints.push(previous.fingerprint_by_index(prev_index));
+
                     let edges_iter = previous
                         .edge_targets_from(prev_index)
                         .iter()
                         .map(|&dst| prev_index_to_index[dst].as_ref().unwrap());
-                    add_edges(&mut edge_list_indices, &mut edge_list_data, edges_iter);
+
+                    let start = edge_list_data.len() as u32;
+                    edge_list_data.extend(edges_iter.map(|i| SDNI::new(i.index())));
+                    let end = edge_list_data.len() as u32;
+                    edge_list_indices.push((start, end));
                 }
             }
         }
@@ -1125,6 +1126,11 @@ impl From<CompressedHybridIndex> for HybridIndex {
     }
 }
 
+// Index type for `DepNodeData`'s edges.
+rustc_index::newtype_index! {
+    struct EdgeIndex { .. }
+}
+
 /// Data for nodes in the current graph, divided into different collections
 /// based on their presence in the previous graph, and if present, their color.
 /// We divide nodes this way because different types of nodes are able to share
@@ -1171,6 +1177,16 @@ struct DepNodeData<K> {
     /// Data for nodes in previous graph that have been marked light green.
     light_green: LightGreenDepNodeData,
 
+    // Edges for all nodes other than dark-green ones. Edges for each node
+    // occupy a contiguous region of this collection, which a node can reference
+    // using two indices. Storing edges this way rather than using an `EdgesVec`
+    // for each node reduces memory consumption by a not insignificant amount
+    // when compiling large crates. The downside is that we have to copy into
+    // this collection the edges from the `EdgesVec`s that are built up during
+    // query execution. But this is mostly balanced out by the more efficient
+    // implementation of `DepGraph::serialize` enabled by this representation.
+    unshared_edges: IndexVec<EdgeIndex, DepNodeIndex>,
+
     /// Mapping from `DepNodeIndex` to an index into a collection above.
     /// Indicates which of the above collections contains a node's data.
     ///
@@ -1186,7 +1202,7 @@ struct DepNodeData<K> {
 /// the previous graph, so we must store all of such a node's data here.
 struct NewDepNodeData<K> {
     nodes: IndexVec<NewDepNodeIndex, DepNode<K>>,
-    edges: IndexVec<NewDepNodeIndex, EdgesVec>,
+    edges: IndexVec<NewDepNodeIndex, Range<EdgeIndex>>,
     fingerprints: IndexVec<NewDepNodeIndex, Fingerprint>,
 }
 
@@ -1195,7 +1211,7 @@ struct NewDepNodeData<K> {
 /// fingerprint is known to be different, so we store the latter two directly.
 struct RedDepNodeData {
     node_indices: IndexVec<RedDepNodeIndex, SerializedDepNodeIndex>,
-    edges: IndexVec<RedDepNodeIndex, EdgesVec>,
+    edges: IndexVec<RedDepNodeIndex, Range<EdgeIndex>>,
     fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>,
 }
 
@@ -1205,7 +1221,7 @@ struct RedDepNodeData {
 /// graph, but the edges may be different, so we store them directly.
 struct LightGreenDepNodeData {
     node_indices: IndexVec<LightGreenDepNodeIndex, SerializedDepNodeIndex>,
-    edges: IndexVec<LightGreenDepNodeIndex, EdgesVec>,
+    edges: IndexVec<LightGreenDepNodeIndex, Range<EdgeIndex>>,
 }
 
 /// `CurrentDepGraph` stores the dependency graph for the current session. It
@@ -1294,11 +1310,27 @@ impl<K: DepKind> CurrentDepGraph<K> {
         // not be enough. The allocation for red and green node data doesn't
         // include a constant, as we don't want to allocate anything for these
         // structures during full incremental builds, where they aren't used.
+        //
+        // These estimates are based on the distribution of node and edge counts
+        // seen in rustc-perf benchmarks, adjusted somewhat to account for the
+        // fact that these benchmarks aren't perfectly representative.
+        //
+        // FIXME Use a collection type that doesn't copy node and edge data and
+        // grow multiplicatively on reallocation. Without such a collection or
+        // solution having the same effect, there is a performance hazard here
+        // in both time and space, as growing these collections means copying a
+        // large amount of data and doubling already large buffer capacities. A
+        // solution for this will also mean that it's less important to get
+        // these estimates right.
         let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200;
         let red_node_count_estimate = (prev_graph_node_count * 3) / 100;
         let light_green_node_count_estimate = (prev_graph_node_count * 25) / 100;
         let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate;
 
+        let average_edges_per_node_estimate = 6;
+        let unshared_edge_count_estimate = average_edges_per_node_estimate
+            * (new_node_count_estimate + red_node_count_estimate + light_green_node_count_estimate);
+
         // We store a large collection of these in `prev_index_to_index` during
         // non-full incremental builds, and want to ensure that the element size
         // doesn't inadvertently increase.
@@ -1320,6 +1352,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
                     node_indices: IndexVec::with_capacity(light_green_node_count_estimate),
                     edges: IndexVec::with_capacity(light_green_node_count_estimate),
                 },
+                unshared_edges: IndexVec::with_capacity(unshared_edge_count_estimate),
                 hybrid_indices: IndexVec::with_capacity(total_node_count_estimate),
             }),
             new_node_to_index: Sharded::new(|| {
@@ -1352,9 +1385,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
             Entry::Occupied(entry) => *entry.get(),
             Entry::Vacant(entry) => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let new_index = data.new.nodes.push(dep_node);
-                data.new.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.new.edges, edges);
                 data.new.fingerprints.push(fingerprint);
                 let dep_node_index = data.hybrid_indices.push(new_index.into());
                 entry.insert(dep_node_index);
@@ -1377,9 +1410,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match prev_index_to_index[prev_index] {
             Some(dep_node_index) => dep_node_index,
             None => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let red_index = data.red.node_indices.push(prev_index);
-                data.red.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.red.edges, edges);
                 data.red.fingerprints.push(fingerprint);
                 let dep_node_index = data.hybrid_indices.push(red_index.into());
                 prev_index_to_index[prev_index] = Some(dep_node_index);
@@ -1401,9 +1434,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
         match prev_index_to_index[prev_index] {
             Some(dep_node_index) => dep_node_index,
             None => {
-                let mut data = self.data.lock();
+                let data = &mut *self.data.lock();
                 let light_green_index = data.light_green.node_indices.push(prev_index);
-                data.light_green.edges.push(edges);
+                add_edges(&mut data.unshared_edges, &mut data.light_green.edges, edges);
                 let dep_node_index = data.hybrid_indices.push(light_green_index.into());
                 prev_index_to_index[prev_index] = Some(dep_node_index);
                 dep_node_index
@@ -1445,6 +1478,18 @@ impl<K: DepKind> CurrentDepGraph<K> {
     }
 }
 
+#[inline]
+fn add_edges<I: Idx>(
+    edges: &mut IndexVec<EdgeIndex, DepNodeIndex>,
+    edge_indices: &mut IndexVec<I, Range<EdgeIndex>>,
+    new_edges: EdgesVec,
+) {
+    let start = edges.next_index();
+    edges.extend(new_edges);
+    let end = edges.next_index();
+    edge_indices.push(start..end);
+}
+
 /// The capacity of the `reads` field `SmallVec`
 const TASK_DEPS_READS_CAP: usize = 8;
 type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;
diff --git a/compiler/rustc_query_system/src/dep_graph/query.rs b/compiler/rustc_query_system/src/dep_graph/query.rs
index 82e99d64afeb6..cc25d08cb54f4 100644
--- a/compiler/rustc_query_system/src/dep_graph/query.rs
+++ b/compiler/rustc_query_system/src/dep_graph/query.rs
@@ -9,17 +9,23 @@ pub struct DepGraphQuery<K> {
 }
 
 impl<K: DepKind> DepGraphQuery<K> {
-    pub fn new(nodes: &[DepNode<K>], edges: &[(usize, usize)]) -> DepGraphQuery<K> {
-        let mut graph = Graph::with_capacity(nodes.len(), edges.len());
+    pub fn new(
+        nodes: &[DepNode<K>],
+        edge_list_indices: &[(usize, usize)],
+        edge_list_data: &[usize],
+    ) -> DepGraphQuery<K> {
+        let mut graph = Graph::with_capacity(nodes.len(), edge_list_data.len());
         let mut indices = FxHashMap::default();
         for node in nodes {
             indices.insert(*node, graph.add_node(*node));
         }
 
-        for &(source, target) in edges {
-            let source = indices[&nodes[source]];
-            let target = indices[&nodes[target]];
-            graph.add_edge(source, target, ());
+        for (source, &(start, end)) in edge_list_indices.iter().enumerate() {
+            for &target in &edge_list_data[start..end] {
+                let source = indices[&nodes[source]];
+                let target = indices[&nodes[target]];
+                graph.add_edge(source, target, ());
+            }
         }
 
         DepGraphQuery { graph, indices }

From 4f76266295ce1a3ecedc7de178c8b82fc8ba5206 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Mon, 30 Nov 2020 21:13:49 -0800
Subject: [PATCH 4/8] rustc_query_system: minor cleanup

Remove effectively unused parameter and delete out of date comment.
---
 .../rustc_query_system/src/dep_graph/graph.rs | 20 +++----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 076dcb4fed1fc..fefc8ed6f1a99 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -242,7 +242,6 @@ impl<K: DepKind> DepGraph<K> {
             key,
             cx,
             arg,
-            false,
             task,
             |_key| {
                 Some(TaskDeps {
@@ -262,29 +261,16 @@ impl<K: DepKind> DepGraph<K> {
         key: DepNode<K>,
         cx: Ctxt,
         arg: A,
-        no_tcx: bool,
         task: fn(Ctxt, A) -> R,
         create_task: fn(DepNode<K>) -> Option<TaskDeps<K>>,
         hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>,
     ) -> (R, DepNodeIndex) {
         if let Some(ref data) = self.data {
             let task_deps = create_task(key).map(Lock::new);
-
-            // In incremental mode, hash the result of the task. We don't
-            // do anything with the hash yet, but we are computing it
-            // anyway so that
-            //  - we make sure that the infrastructure works and
-            //  - we can get an idea of the runtime cost.
-            let mut hcx = cx.create_stable_hashing_context();
-
-            let result = if no_tcx {
-                task(cx, arg)
-            } else {
-                K::with_deps(task_deps.as_ref(), || task(cx, arg))
-            };
-
+            let result = K::with_deps(task_deps.as_ref(), || task(cx, arg));
             let edges = task_deps.map_or_else(|| smallvec![], |lock| lock.into_inner().reads);
 
+            let mut hcx = cx.create_stable_hashing_context();
             let current_fingerprint = hash_result(&mut hcx, &result);
 
             let print_status = cfg!(debug_assertions) && cx.debug_dep_tasks();
@@ -413,7 +399,7 @@ impl<K: DepKind> DepGraph<K> {
         task: fn(Ctxt, A) -> R,
         hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>,
     ) -> (R, DepNodeIndex) {
-        self.with_task_impl(key, cx, arg, false, task, |_| None, hash_result)
+        self.with_task_impl(key, cx, arg, task, |_| None, hash_result)
     }
 
     #[inline]

From 712fcae13a17940d180a4756eb8fff40470c86d4 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Tue, 15 Dec 2020 22:42:38 -0800
Subject: [PATCH 5/8] rustc_query_system: remove inline annotation from
 edge_count

This isn't called frequently enough to justify inlining.
---
 compiler/rustc_query_system/src/dep_graph/graph.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index fefc8ed6f1a99..87087dc7b7bf9 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -548,7 +548,6 @@ impl<K: DepKind> DepGraph<K> {
         }
     }
 
-    #[inline]
     fn edge_count(&self) -> usize {
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;

From d6b2aaed7d67e0e24e9120830697196a847a0b43 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Tue, 15 Dec 2020 22:57:21 -0800
Subject: [PATCH 6/8] rustc_query_system: rename intern_node to intern_new_node

---
 compiler/rustc_query_system/src/dep_graph/graph.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 87087dc7b7bf9..c739ec3e6fd43 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -333,7 +333,7 @@ impl<K: DepKind> DepGraph<K> {
                     eprintln!("[task::new] {:?}", key);
                 }
 
-                data.current.intern_node(
+                data.current.intern_new_node(
                     &data.previous,
                     key,
                     edges,
@@ -376,7 +376,7 @@ impl<K: DepKind> DepGraph<K> {
                 hash: data.current.anon_id_seed.combine(hasher.finish()).into(),
             };
 
-            let dep_node_index = data.current.intern_node(
+            let dep_node_index = data.current.intern_new_node(
                 &data.previous,
                 target_dep_node,
                 task_deps.reads,
@@ -1354,7 +1354,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
         }
     }
 
-    fn intern_node(
+    fn intern_new_node(
         &self,
         prev_graph: &PreviousDepGraph<K>,
         dep_node: DepNode<K>,

From 22ed75158bd1ef32d1ba08a58b56f2608c105c0d Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Tue, 15 Dec 2020 22:58:19 -0800
Subject: [PATCH 7/8] rustc_query_system: add more comments for dependency
 graph

---
 .../rustc_query_system/src/dep_graph/graph.rs | 34 ++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index c739ec3e6fd43..8c7217dd32943 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -136,9 +136,12 @@ impl<K: DepKind> DepGraph<K> {
 
     pub fn query(&self) -> DepGraphQuery<K> {
         // We call this before acquiring locks, since it also acquires them.
+        // The extra locking is not a big deal, as this gets called rarely.
         let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
+
+        // Note locking order: `prev_index_to_index`, then `data`.
         let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
         let node_count = data.hybrid_indices.len();
@@ -146,6 +149,9 @@ impl<K: DepKind> DepGraph<K> {
         let mut nodes = Vec::with_capacity(node_count);
         let mut edge_list_indices = Vec::with_capacity(node_count);
         let mut edge_list_data = Vec::with_capacity(edge_count);
+
+        // See `serialize` for notes on the approach used here.
+
         edge_list_data.extend(data.unshared_edges.iter().map(|i| i.index()));
 
         for &hybrid_index in data.hybrid_indices.iter() {
@@ -285,6 +291,8 @@ impl<K: DepKind> DepGraph<K> {
                             eprintln!("[task::green] {:?}", key);
                         }
 
+                        // This is a light green node: it existed in the previous compilation,
+                        // its query was re-executed, and it has the same result as before.
                         let dep_node_index =
                             data.current.intern_light_green_node(&data.previous, prev_index, edges);
 
@@ -294,6 +302,8 @@ impl<K: DepKind> DepGraph<K> {
                             eprintln!("[task::red] {:?}", key);
                         }
 
+                        // This is a red node: it existed in the previous compilation, its query
+                        // was re-executed, but it has a different result from before.
                         let dep_node_index = data.current.intern_red_node(
                             &data.previous,
                             prev_index,
@@ -308,6 +318,10 @@ impl<K: DepKind> DepGraph<K> {
                         eprintln!("[task::unknown] {:?}", key);
                     }
 
+                    // This is a red node, effectively: it existed in the previous compilation
+                    // session, its query was re-executed, but it doesn't compute a result hash
+                    // (i.e. it represents a `no_hash` query), so we have no way of determining
+                    // whether or not the result was the same as before.
                     let dep_node_index = data.current.intern_red_node(
                         &data.previous,
                         prev_index,
@@ -315,7 +329,6 @@ impl<K: DepKind> DepGraph<K> {
                         Fingerprint::ZERO,
                     );
 
-                    // Mark the node as Red if we can't hash the result
                     (DepNodeColor::Red, dep_node_index)
                 };
 
@@ -333,6 +346,7 @@ impl<K: DepKind> DepGraph<K> {
                     eprintln!("[task::new] {:?}", key);
                 }
 
+                // This is a new node: it didn't exist in the previous compilation session.
                 data.current.intern_new_node(
                     &data.previous,
                     key,
@@ -343,6 +357,10 @@ impl<K: DepKind> DepGraph<K> {
 
             (result, dep_node_index)
         } else {
+            // Incremental compilation is turned off. We just execute the task
+            // without tracking. We still provide a dep-node index that uniquely
+            // identifies the task so that we have a cheap way of referring to
+            // the query for self-profiling.
             (task(cx, arg), self.next_virtual_depnode_index())
         }
     }
@@ -568,9 +586,12 @@ impl<K: DepKind> DepGraph<K> {
         type SDNI = SerializedDepNodeIndex;
 
         // We call this before acquiring locks, since it also acquires them.
+        // The extra locking is not a big deal, as this only gets called once.
         let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
+
+        // Note locking order: `prev_index_to_index`, then `data`.
         let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
         let node_count = data.hybrid_indices.len();
@@ -579,6 +600,17 @@ impl<K: DepKind> DepGraph<K> {
         let mut fingerprints = IndexVec::with_capacity(node_count);
         let mut edge_list_indices = IndexVec::with_capacity(node_count);
         let mut edge_list_data = Vec::with_capacity(edge_count);
+
+        // `rustc_middle::ty::query::OnDiskCache` expects nodes to be in
+        // `DepNodeIndex` order. The edges in `edge_list_data`, on the other
+        // hand, don't need to be in a particular order, as long as each node
+        // can reference its edges as a contiguous range within it. This is why
+        // we're able to copy `unshared_edges` directly into `edge_list_data`.
+        // It meets the above requirements, and each non-dark-green node already
+        // knows the range of edges to reference within it, which they'll push
+        // onto `edge_list_indices`. Dark green nodes, however, don't have their
+        // edges in `unshared_edges`, so need to add them to `edge_list_data`.
+
         edge_list_data.extend(data.unshared_edges.iter().map(|i| SDNI::new(i.index())));
 
         for &hybrid_index in data.hybrid_indices.iter() {

From 03eb75f759a0c027d07b7d6460f630a373b03638 Mon Sep 17 00:00:00 2001
From: Tyson Nottingham <tgnottingham@gmail.com>
Date: Tue, 22 Dec 2020 10:38:48 -0800
Subject: [PATCH 8/8] rustc_query_system: avoid race condition when using
 edge_count

---
 .../rustc_query_system/src/dep_graph/graph.rs   | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
index 8c7217dd32943..605d7ae4af678 100644
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -3,7 +3,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::profiling::QueryInvocationId;
 use rustc_data_structures::sharded::{self, Sharded};
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
-use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
+use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, LockGuard, Lrc, Ordering};
 use rustc_data_structures::unlikely;
 use rustc_errors::Diagnostic;
 use rustc_index::vec::{Idx, IndexVec};
@@ -135,9 +135,6 @@ impl<K: DepKind> DepGraph<K> {
     }
 
     pub fn query(&self) -> DepGraphQuery<K> {
-        // We call this before acquiring locks, since it also acquires them.
-        // The extra locking is not a big deal, as this gets called rarely.
-        let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
 
@@ -145,6 +142,7 @@ impl<K: DepKind> DepGraph<K> {
         let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
         let node_count = data.hybrid_indices.len();
+        let edge_count = self.edge_count(&data);
 
         let mut nodes = Vec::with_capacity(node_count);
         let mut edge_list_indices = Vec::with_capacity(node_count);
@@ -566,14 +564,13 @@ impl<K: DepKind> DepGraph<K> {
         }
     }
 
-    fn edge_count(&self) -> usize {
+    fn edge_count(&self, node_data: &LockGuard<'_, DepNodeData<K>>) -> usize {
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
-        let data = data.current.data.lock();
 
-        let mut edge_count = data.unshared_edges.len();
+        let mut edge_count = node_data.unshared_edges.len();
 
-        for &hybrid_index in data.hybrid_indices.iter() {
+        for &hybrid_index in node_data.hybrid_indices.iter() {
             if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() {
                 edge_count += previous.edge_targets_from(prev_index).len()
             }
@@ -585,9 +582,6 @@ impl<K: DepKind> DepGraph<K> {
     pub fn serialize(&self) -> SerializedDepGraph<K> {
         type SDNI = SerializedDepNodeIndex;
 
-        // We call this before acquiring locks, since it also acquires them.
-        // The extra locking is not a big deal, as this only gets called once.
-        let edge_count = self.edge_count();
         let data = self.data.as_ref().unwrap();
         let previous = &data.previous;
 
@@ -595,6 +589,7 @@ impl<K: DepKind> DepGraph<K> {
         let prev_index_to_index = data.current.prev_index_to_index.lock();
         let data = data.current.data.lock();
         let node_count = data.hybrid_indices.len();
+        let edge_count = self.edge_count(&data);
 
         let mut nodes = IndexVec::with_capacity(node_count);
         let mut fingerprints = IndexVec::with_capacity(node_count);