Skip to content

Commit c9d7f33

Browse files
authored
fix: Only consider available nodes for network operator assignment (#1067)
1 parent 97dfe8f commit c9d7f33

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

deny.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ ignore = [
7777
{ id = "RUSTSEC-2021-0145", reason = "the potential security issue only affects Windows, which we do not target" },
7878
{ id = "RUSTSEC-2024-0370", reason = "alternative not yet available" },
7979
{ id = "RUSTSEC-2024-0375", reason = "migration away from atty pending" },
80+
{ id = "RUSTSEC-2024-0384", reason = "instant is a transitive dependency" },
8081
#"[email protected]", # you can also ignore yanked crate versions if you wish
8182
#{ crate = "[email protected]", reason = "you can specify why you are ignoring the yanked crate" },
8283
]

rs/cli/src/runner.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ impl Runner {
451451
}
452452

453453
pub async fn remove_nodes(&self, nodes_remover: NodesRemover) -> anyhow::Result<RunnerProposal> {
454-
let (healths, nodes_with_proposals) = try_join(self.health_client.nodes(), self.registry.nodes_with_proposals()).await?;
454+
let (healths, nodes_with_proposals) = try_join(self.health_client.nodes(), self.registry.nodes_and_proposals()).await?;
455455
let (mut node_removals, motivation) = nodes_remover.remove_nodes(healths, nodes_with_proposals);
456456
node_removals.sort_by_key(|nr| nr.reason.message());
457457

@@ -528,27 +528,32 @@ impl Runner {
528528
let node_operators = self.registry.operators().await?;
529529
// Filter out nodes that are not healthy
530530
let nodes_all = self.registry.nodes().await?;
531-
let healthy_nodes = nodes_all
531+
let healthy_nodes = available_nodes
532532
.iter()
533-
.filter(|(node_id, _node)| health_of_nodes.get(*node_id) == Some(&HealthStatus::Healthy))
533+
.filter(|node| health_of_nodes.get(&node.id) == Some(&HealthStatus::Healthy))
534534
.collect::<Vec<_>>();
535535
// Build a hashmap {node_id -> node} of all nodes that are in some (any) subnet
536536
let nodes_in_subnets = subnets
537537
.values()
538538
.flat_map(|s| s.nodes.iter().map(|n| (n.principal, n)))
539539
.collect::<AHashMap<_, _>>();
540540
// Build a hashmap of healthy nodes, grouped by node operator {operator -> Vec<Node>}
541-
let healthy_nodes_by_operator = healthy_nodes.iter().into_group_map_by(|(_node_id, node)| node.operator.principal);
541+
let healthy_nodes_by_operator = healthy_nodes
542+
.iter()
543+
.into_group_map_by(|node| nodes_all.get(&node.id).expect("Node should exist").operator.principal);
542544
// Finally, prepare a list of operators that have at least 1 node, but NONE of their nodes are in a subnet
543545
let operators_without_nodes_in_subnets = node_operators
544546
.iter()
545547
.filter_map(|(operator_id, operator)| {
546548
if let Some(operator_nodes) = healthy_nodes_by_operator.get(operator_id) {
547-
if operator_nodes.iter().all(|(node_id, _node)| !nodes_in_subnets.contains_key(node_id)) {
549+
if operator_nodes.iter().all(|node| !nodes_in_subnets.contains_key(&node.id)) {
548550
Some((
549551
operator_id,
550552
operator.datacenter.as_ref().map(|dc| dc.name.clone()).unwrap_or_default(),
551-
operator_nodes.iter().map(|(_, node)| (*node).clone()).collect::<Vec<_>>(),
553+
operator_nodes
554+
.iter()
555+
.map(|node| nodes_all.get(&node.id).expect("Node should exist").clone())
556+
.collect::<Vec<_>>(),
552557
))
553558
} else {
554559
None

rs/decentralization/src/network.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,8 +1140,8 @@ impl SubnetChangeRequest {
11401140
.collect_vec();
11411141

11421142
info!(
1143-
"Resizing subnet {} by removing {} healthy and {} unhealthy nodes, and adding {}. Total available {} healthy nodes.",
1144-
self.subnet.id,
1143+
"Evaluating change in subnet {} membership: removing ({} healthy + {} unhealthy) and adding {} node. Total available {} healthy nodes.",
1144+
self.subnet.id.to_string().split_once('-').expect("subnet id is expected to have a -").0,
11451145
how_many_nodes_to_remove,
11461146
how_many_nodes_unhealthy,
11471147
how_many_nodes_to_add + self.include_nodes.len(),

rs/ic-management-backend/src/lazy_registry.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ pub trait LazyRegistry:
7171

7272
fn subnets(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Subnet>>>>;
7373

74-
fn nodes_with_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>>;
74+
fn nodes_and_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>>;
7575

7676
fn nns_replica_version(&self) -> BoxFuture<'_, anyhow::Result<Option<String>>> {
7777
Box::pin(async {
@@ -683,7 +683,7 @@ impl LazyRegistry for LazyRegistryImpl {
683683
})
684684
}
685685

686-
fn nodes_with_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>> {
686+
fn nodes_and_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>> {
687687
Box::pin(async {
688688
let nodes = self.nodes().await?;
689689
if nodes.iter().any(|(_, n)| n.proposal.is_some()) {
@@ -897,15 +897,15 @@ impl decentralization::network::TopologyManager for LazyRegistryImpl {}
897897
impl AvailableNodesQuerier for LazyRegistryImpl {
898898
fn available_nodes(&self) -> BoxFuture<'_, Result<Vec<decentralization::network::Node>, ic_management_types::NetworkError>> {
899899
Box::pin(async {
900-
let (nodes, healths) = try_join!(self.nodes_with_proposals(), self.health_client.nodes())
900+
let (nodes_and_proposals, healths) = try_join!(self.nodes_and_proposals(), self.health_client.nodes())
901901
.map_err(|e| ic_management_types::NetworkError::DataRequestError(e.to_string()))?;
902-
let nodes = nodes
902+
let available_nodes = nodes_and_proposals
903903
.values()
904904
.filter(|n| n.subnet_id.is_none() && n.proposal.is_none() && n.duplicates.is_none() && !n.is_api_boundary_node)
905905
.cloned()
906906
.collect_vec();
907907

908-
Ok(nodes
908+
Ok(available_nodes
909909
.iter()
910910
.filter(|n| {
911911
// Keep only healthy nodes.
@@ -940,7 +940,7 @@ mock! {
940940

941941
fn subnets(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Subnet>>>>;
942942

943-
fn nodes_with_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>>;
943+
fn nodes_and_proposals(&self) -> BoxFuture<'_, anyhow::Result<Arc<IndexMap<PrincipalId, Node>>>>;
944944

945945
fn unassigned_nodes_replica_version(&self) -> BoxFuture<'_, anyhow::Result<Arc<String>>>;
946946

0 commit comments

Comments
 (0)