1584 Min Cost to Connect All Points

1584 Min Cost to Connect All Points#

Problem#

You are given an array points representing integer coordinates of some points on a 2D-plane, where points[i] = [xi, yi].

The cost of connecting two points [xi, yi] and [xj, yj] is the manhattan distance between them: |xi - xj| + |yi - yj|, where |val| denotes the absolute value of val.

Return the minimum cost to make all points connected. All points are connected if there is exactly one simple path between any two points.

Examples#

Example 1:

Input: points = [[0,0],[2,2],[3,10],[5,2],[7,0]]
Output: 20

Example 2:

Input: points = [[3,12],[-2,5],[-4,1]]
Output: 18

Constraint#

1 <= points.length <= 1000
-106 <= xi, yi <= 106
All pairs (xi, yi) are distinct.

Analysis#

Minimum spanning tree problem

Solution#

# use Prim's algorithm
def minCostConnectPoints(points):
    # initialize distance to infinity for all points and 0 for the first point
    d, res = {(x, y): float('inf') if i else 0 for i, (x, y) in enumerate(points)}, 0
    
    while d:
        x, y = min(d, key=d.get)  # obtain the current minimum edge
        res += d.pop((x, y))      # and remove the corresponding point
        for x1, y1 in d:          # for the rest of the points, update the minimum manhattan distance
            d[(x1, y1)] = min(d[(x1, y1)], abs(x-x1)+abs(y-y1))
    return res

# test
points = [[0,0],[2,2],[3,10],[5,2],[7,0]]
print(minCostConnectPoints(points))

points = [[3,12],[-2,5],[-4,1]]
print(minCostConnectPoints(points))

# 165
points = [[11,12],[-9,5],[-1,5],[9,-8],[20,-17],[18,19],[-1,14],[16,19],[2,16],[14,3],[1,-12],[19,4],[5,-17],[-13,6],[-4,1],[-7,-16],[13,7],[-20,-7],[20,-15]]
print(minCostConnectPoints(points))

# test on a list of 1000 points
import random
npoints = 1000
points = [[random.randint(-1000, 1000), random.randint(-1000, 1000)] for _ in range(npoints)]
print(minCostConnectPoints(points))
20
18
165
52394

Check why the following implementation is time-consuming for large graphs??

  • the code using heapq is slow because of Line 38, which should remove the original (dist[v],v) from the queue and then push. Without removing it, the history of (dist[v], v) are all recorded in queue.

  • but after adding the removal, the results are not correct anymore.

Why???

# Prim's Algorithm
import heapq
def minCostConnectPoints(points):
    # get weight between two points
    def weights(points, u, v):
        return abs(points[u][0] - points[v][0]) + abs(points[u][1] - points[v][1])

    def buildGraph(n):
        graph = {}
        weight = {}
        for i in range(n):
            graph[i] = []
        for i in range(n):
            for j in range(i+1, n):
                graph[i].append(j)
                graph[j].append(i)
                weight[(i,j)] = weight[(j,i)] = weights(points, i, j)

        return graph, weight 

    n = len(points)
    graph, weight = buildGraph(n)
    # initialize distance array to infinity
    dist = [float('inf')] * n
    # intialize distance to 0 for the root node
    dist[0] = 0
    # initialize priority queue
    q = [(dist[i], i) for i in range(n)]

    # main loop
    while q:
        # extrac min
        du, u = heapq.heappop(q)
        # if distance is infinity, then we can't reach this node
        for v in graph[u]:
            w = weight[(u,v)]
            if (dist[v], v) in q and w < dist[v]:
                # without this, the code will be very slow
                q.remove((dist[v],v))

                # update distance table and push to priority queue
                dist[v] = w
                heapq.heappush(q, (dist[v], v))
    
    return sum(dist)
    
# test
# 165
points = [[11,12],[-9,5],[-1,5],[9,-8],[20,-17],[18,19],[-1,14],[16,19],[2,16],[14,3],[1,-12],[19,4],[5,-17],[-13,6],[-4,1],[-7,-16],[13,7],[-20,-7],[20,-15]]
print(minCostConnectPoints(points))

# The following test is too time consuming because the above code 
# test on a list of 1000 points
import random
npoints = 1000
points = [[random.randint(-1000, 1000), random.randint(-1000, 1000)] for _ in range(npoints)]
print(minCostConnectPoints(points))
167
52102
# Prim's Algorithm: this is a good implementation
def minCostConnectPoints(points):
    # get weight between two points
    def weights(points, u, v):
        return abs(points[u][0] - points[v][0]) + abs(points[u][1] - points[v][1])

    def buildGraph(n):
        graph = {}
        weight = {}
        for i in range(n):
            graph[i] = []
        for i in range(n):
            for j in range(i+1, n):
                graph[i].append(j)
                graph[j].append(i)
                weight[(i,j)] = weight[(j,i)] = weights(points, i, j)

        return graph, weight 

    n = len(points)
    graph, weight = buildGraph(n)
    # initialize distance array to infinity
    dist = [float('inf')] * n
    # intialize distance to 0 for the root node
    dist[0] = 0
    # initialize priority queue
    q = {i:dist[i] for i in range(n)}

    # main loop
    while q:
        # extract node with minimum value
        u = min(q, key=q.get)
        # remove node from queue
        q.pop(u)

        # if distance is infinity, then we can't reach this node
        for v in graph[u]:
            w = weight[(u, v)]
            if v in q and w < dist[v]:
                dist[v] = w
                q[v] = dist[v]
    
    return sum(dist)

# test
# 165
points = [[11,12],[-9,5],[-1,5],[9,-8],[20,-17],[18,19],[-1,14],[16,19],[2,16],[14,3],[1,-12],[19,4],[5,-17],[-13,6],[-4,1],[-7,-16],[13,7],[-20,-7],[20,-15]]
print(minCostConnectPoints(points))

# The following test runs in a reasonable time
import random
npoints = 1000
points = [[random.randint(-1000, 1000), random.randint(-1000, 1000)] for _ in range(npoints)]
print(minCostConnectPoints(points))
165
51960
# Another implementation using Prim's algorithm, which is similar to Dijkstra's algorithm but adds a visited set
# Note the following code has to use a priority heap instead of a queue. Otherwise the results are wrong.
from heapq import heappush, heappop
def minCostConnectPoints(points):
    def buildGraph(n):
        graph = {}
        for i in range(n):
            graph[i] = []
        for i in range(n):
            for j in range(i+1, n):
                w = abs(points[i][0] - points[j][0]) + abs(points[i][1] - points[j][1])
                graph[i].append((j, w))
                graph[j].append((i, w))

        return graph
    
    n = len(points)
    graph = buildGraph(n)
    dist = [float('inf') for _ in range(n)]
    dist[0] = 0
    q = [(0, 0)]
    visited = set()

    while q:
        # pop the node with minimum distance
        # if we pop the head of the queue, the results are wrong
        #du, u = q.pop(0)
        du, u = heappop(q)
        
        # if the node is visited, skip
        if u in visited:
            continue

        # else, add to visited set
        visited.add(u)
        
        # explore neighbors
        for v, w in graph[u]:
            if v not in visited and dist[v] > w:
                dist[v] = w
                #q.append((dist[v], v))
                heappush(q, (dist[v], v))
    return sum(dist)

# test
# 165
points = [[11,12],[-9,5],[-1,5],[9,-8],[20,-17],[18,19],[-1,14],[16,19],[2,16],[14,3],[1,-12],[19,4],[5,-17],[-13,6],[-4,1],[-7,-16],[13,7],[-20,-7],[20,-15]]
print(minCostConnectPoints(points))

# The following test runs in a reasonable time
import random
npoints = 1000
points = [[random.randint(-1000, 1000), random.randint(-1000, 1000)] for _ in range(npoints)]
print(minCostConnectPoints(points))
165
51603
# Kruskal's Algorithm
class UnionFind:
    def __init__(self, n):
        self.parent = [i for i in range(n)]
        self.count = n 
    
    def find(self, node):
        if node != self.parent[node]:
            self.parent[node] = self.find(self.parent[node])
        return self.parent[node]
    
    def union(self, node1, node2):
        root1 = self.find(node1)
        root2 = self.find(node2)
        if root1 != root2:
            self.parent[root1] = root2
            self.count -= 1
    def connected(self, node1, node2):
        return self.find(node1) == self.find(node2)
    def count(self):
        return self.count

def minCostConnectPoints(points):
    #1. build graph
    n = len(points)
    graph = []
    for i in range(n):
        for j in range(i+1, n):
            w = abs(points[i][0] - points[j][0]) + abs(points[i][1] - points[j][1])
            graph.append((w, i, j))
    # sort the graph by weight
    graph.sort()

    #2. Kruskal's Algorithm
    uf = UnionFind(n)
    res = 0
    # get the edge with minimum weight one by one
    for w, u, v in graph:
        if not uf.connected(u, v):
            uf.union(u, v)
            res += w
    return res

# test
points = [[0,0],[2,2],[3,10],[5,2],[7,0]]
print(minCostConnectPoints(points))

points = [[3,12],[-2,5],[-4,1]]
print(minCostConnectPoints(points))

points = [[11,12],[-9,5],[-1,5],[9,-8],[20,-17],[18,19],[-1,14],[16,19],[2,16],[14,3],[1,-12],[19,4],[5,-17],[-13,6],[-4,1],[-7,-16],[13,7],[-20,-7],[20,-15]]
print(minCostConnectPoints(points))

import random
npoints = 1000
points = [[random.randint(-1000, 1000), random.randint(-1000, 1000)] for _ in range(npoints)]
print(minCostConnectPoints(points))
20
18
165
52121