Edit Distance Backtracing

In [1]:
from stack import *
import numpy as np
In [15]:
def trace_paths(moves, i, j, s):
    """
    moves: list of moves at each cell (3D array)
    i, j: The current cell I'm visiting
    s: Stack holding all of the cells I've visited up to this point
    """
    if i == 0 and j == 0:
        # Base case
        print(s.get_entire_stack())
    else:
        s.push([i, j])
        for move in moves[i][j]:
            if move == 1:
                trace_paths(moves, i, j-1, s)
            elif move == 2:
                trace_paths(moves, i-1, j, s)
            elif move == 3:
                trace_paths(moves, i-1, j-1, s)
        s.pop()

def edit(s1, s2):
    """
    An iterative, dynamic programming version of the string
    edit distance

    Parameters
    ----------
    s1: string of length M
        The first string to match
    s2: string of length N
        The second string to match
    
    Returns
    -------
    cost: int
        The cost of an optimal match
    paths: list of lists
        Each list 
    """
    M = len(s1)
    N = len(s2)
    # Create a 2D array with M+1 rows and N+1 columns
    # to store the costs
    table = np.zeros((M+1, N+1))
    # Fill in the base cases
    table[0, :] = np.arange(N+1)
    table[:, 0] = np.arange(M+1)

    # Make 2D array that stores the optimal moves
    moves = []
    for i in range(M+1):
        moves.append([])
        for j in range(N+1):
            moves[i].append([])
    # Fill in the base cases
    for j in range(N+1):
        moves[0][j] = [1] # Move left if we're at the top row
    for i in range(M+1):
        moves[i][0] = [2] # Move up if we're at the left column
    
    # Do the dynamic programming to fill in the table and moves
    for i in range(1, M+1):
        for j in range(1, N+1):
            cost1 = table[i][j-1] + 1 # Delete the last character from s2
            cost2 = table[i-1][j] + 1 # Delete the last character from s1
            cost3 = table[i-1][j-1] # Match or swap both characters at the end
            if s1[i-1] != s2[j-1]:
                cost3 += 1
            table[i][j] = min(cost1, cost2, cost3)
            if cost1 == table[i][j]:
                moves[i][j].append(1)
            if cost2 == table[i][j]:
                moves[i][j].append(2)
            if cost3 == table[i][j]:
                moves[i][j].append(3)
    cost = int(table[-1, -1])
    s = Stack()
    trace_paths(moves, M, N, s)
    return cost

edit("topology", "topography")
[[8, 10], [7, 9], [7, 8], [7, 7], [6, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [7, 8], [6, 7], [6, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [7, 8], [6, 7], [5, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [7, 8], [6, 7], [5, 6], [4, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [6, 7], [6, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [6, 7], [5, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [6, 7], [5, 6], [4, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [5, 7], [5, 6], [5, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [5, 7], [5, 6], [4, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
[[8, 10], [7, 9], [6, 8], [5, 7], [4, 6], [4, 5], [4, 4], [3, 3], [2, 2], [1, 1]]
Out[15]:
5

In [ ]: