37 / 385 min read

Union-Find (Disjoint Set Union)

A data structure that tracks which elements belong to the same component. Union by rank and path compression give near-O(1) per operation — faster than BFS/DFS for dynamic connectivity.

The Pattern

Union-Find (also called Disjoint Set Union, DSU) maintains a forest of trees where each tree is a connected component. Two operations:

  • find(x): return the root (representative) of x's component
  • union(x, y): merge the components of x and y

Naive implementations are slow. Two optimizations bring both operations to nearly O(1) amortized:

  1. Path compression: During find, make every node on the path point directly to the root.
  2. Union by rank: Always attach the shorter tree under the taller one, keeping trees flat.

With both, the amortized cost is O(α(n)) per operation where α is the inverse Ackermann function — effectively constant for all practical n.

struct UnionFind {
    parent: Vec<usize>,
    rank:   Vec<usize>,
    count:  usize, // number of components
}
 
impl UnionFind {
    fn new(n: usize) -> Self {
        UnionFind {
            parent: (0..n).collect(),
            rank:   vec![0; n],
            count:  n,
        }
    }
 
    fn find(&mut self, x: usize) -> usize {
        if self.parent[x] != x {
            self.parent[x] = self.find(self.parent[x]); // path compression
        }
        self.parent[x]
    }
 
    fn union(&mut self, x: usize, y: usize) -> bool {
        let (rx, ry) = (self.find(x), self.find(y));
        if rx == ry { return false; } // already connected
        match self.rank[rx].cmp(&self.rank[ry]) {
            std::cmp::Ordering::Less    => self.parent[rx] = ry,
            std::cmp::Ordering::Greater => self.parent[ry] = rx,
            std::cmp::Ordering::Equal   => { self.parent[ry] = rx; self.rank[rx] += 1; }
        }
        self.count -= 1;
        true
    }
 
    fn connected(&mut self, x: usize, y: usize) -> bool {
        self.find(x) == self.find(y)
    }
}

Number of Connected Components

Given n nodes and a list of undirected edges, count connected components. Start with n components, subtract one for each successful union (edges that connect two previously separate components).

fn count_components(n: usize, edges: &[[usize; 2]]) -> usize {
    let mut uf = UnionFind::new(n);
    for &[a, b] in edges { uf.union(a, b); }
    uf.count
}
 
fn main() {
    // 0-1, 1-2, 3-4  →  two components: {0,1,2} and {3,4}
    println!("{}", count_components(5, &[[0,1],[1,2],[3,4]])); // 2
    // 0-1, 1-2, 2-3, 3-4  →  one component
    println!("{}", count_components(5, &[[0,1],[1,2],[2,3],[3,4]])); // 1
}

Redundant Connection

Given a graph that started as a tree with n nodes and one extra edge added, find the redundant edge. Process edges one by one; the first edge that connects two already-connected nodes is the redundant one.

fn find_redundant_connection(edges: &[[usize; 2]]) -> [usize; 2] {
    // nodes are 1-indexed in the problem
    let n = edges.len();
    let mut uf = UnionFind::new(n + 1);
    for &[a, b] in edges {
        if !uf.union(a, b) { return [a, b]; }
    }
    unreachable!()
}
 
fn main() {
    println!("{:?}", find_redundant_connection(&[[1,2],[1,3],[2,3]])); // [2,3]
    println!("{:?}", find_redundant_connection(&[[1,2],[2,3],[3,4],[1,4],[1,5]])); // [1,4]
}

Accounts Merge

Given a list of accounts where each account is [name, email1, email2, ...], merge accounts that share at least one email address. Use Union-Find on email addresses: union all emails in the same account together, then group emails by their root representative.

use std::collections::HashMap;
 
fn accounts_merge(accounts: Vec<Vec<String>>) -> Vec<Vec<String>> {
    let mut email_to_id: HashMap<String, usize> = HashMap::new();
    let mut email_to_name: HashMap<String, String> = HashMap::new();
    let mut id_counter = 0;
 
    // Assign an ID to each unique email
    for account in &accounts {
        let name = &account[0];
        for email in &account[1..] {
            if !email_to_id.contains_key(email) {
                email_to_id.insert(email.clone(), id_counter);
                email_to_name.insert(email.clone(), name.clone());
                id_counter += 1;
            }
        }
    }
 
    let mut uf = UnionFind::new(id_counter);
 
    // Union all emails in the same account
    for account in &accounts {
        let first_id = email_to_id[&account[1]];
        for email in &account[2..] {
            uf.union(first_id, email_to_id[email]);
        }
    }
 
    // Group emails by root
    let mut root_to_emails: HashMap<usize, Vec<String>> = HashMap::new();
    for (email, &id) in &email_to_id {
        let root = uf.find(id);
        root_to_emails.entry(root).or_default().push(email.clone());
    }
 
    let mut result = Vec::new();
    for (root, mut emails) in root_to_emails {
        emails.sort();
        let name = email_to_name[&emails[0]].clone();
        let mut account = vec![name];
        account.extend(emails);
        result.push(account);
    }
    result.sort();
    result
}
 
fn main() {
    let accounts = vec![
        vec!["John".into(),"johnsmith@mail.com".into(),"john_newyork@mail.com".into()],
        vec!["John".into(),"johnsmith@mail.com".into(),"john00@mail.com".into()],
        vec!["Mary".into(),"mary@mail.com".into()],
        vec!["John".into(),"johnnybravo@mail.com".into()],
    ];
    let merged = accounts_merge(accounts);
    for account in &merged {
        println!("{:?}", account);
    }
    // ["John", "john00@mail.com", "john_newyork@mail.com", "johnsmith@mail.com"]
    // ["John", "johnnybravo@mail.com"]
    // ["Mary", "mary@mail.com"]
}

Satisfiability of Equality Equations

Given equations like "a==b", "b!=c", determine if all equations can be satisfied simultaneously. Union all == pairs first, then check that no != pair has both sides in the same component.

fn equations_possible(equations: &[&str]) -> bool {
    let mut uf = UnionFind::new(26);
 
    // First pass: process all == constraints
    for eq in equations {
        let bytes = eq.as_bytes();
        if bytes[1] == b'=' { // "=="
            uf.union((bytes[0] - b'a') as usize, (bytes[3] - b'a') as usize);
        }
    }
 
    // Second pass: check != constraints
    for eq in equations {
        let bytes = eq.as_bytes();
        if bytes[1] == b'!' { // "!="
            let (a, b) = ((bytes[0] - b'a') as usize, (bytes[3] - b'a') as usize);
            if uf.connected(a, b) { return false; }
        }
    }
    true
}
 
fn main() {
    println!("{}", equations_possible(&["a==b","b!=c","b==c"])); // false
    println!("{}", equations_possible(&["c==c","b==d","x!=z"])); // true
    println!("{}", equations_possible(&["a==b","b!=a"]));        // false
}

Minimum Spanning Tree — Kruskal's Algorithm

Find the minimum-weight spanning tree of a weighted undirected graph. Sort edges by weight. Process edges in order: add an edge if it connects two different components (doesn't form a cycle). Stop when n-1 edges are added.

fn kruskal_mst(n: usize, mut edges: Vec<(usize, usize, i32)>) -> (i32, Vec<(usize, usize, i32)>) {
    edges.sort_by_key(|&(_, _, w)| w);
    let mut uf = UnionFind::new(n);
    let mut mst = Vec::new();
    let mut total_weight = 0;
 
    for (u, v, w) in edges {
        if uf.union(u, v) {
            mst.push((u, v, w));
            total_weight += w;
            if mst.len() == n - 1 { break; }
        }
    }
    (total_weight, mst)
}
 
fn main() {
    // 5 nodes, 7 edges
    let edges = vec![
        (0,1,2),(0,3,6),(1,2,3),(1,3,8),(1,4,5),(2,4,7),(3,4,9)
    ];
    let (weight, mst) = kruskal_mst(5, edges);
    println!("MST weight: {}", weight); // 16
    for (u, v, w) in &mst {
        println!("  {}-{} ({})", u, v, w);
    }
    // 0-1 (2), 1-2 (3), 1-4 (5), 0-3 (6)
}