1/*
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.xml.internal.bind.v2.util;
27
28import java.util.AbstractMap;
29import java.util.Arrays;
30import java.util.Collection;
31import java.util.WeakHashMap;
32
33/**
34 * Computes the string edit distance.
35 *
36 * <p>
37 * Refer to a computer science text book for the definition
38 * of the "string edit distance".
39 *
40 * @author
41 *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
42 */
43public class EditDistance {
44
45    /**
46     * Weak results cache to avoid additional computations.
47     * Because of high complexity caching is required.
48     */
49    private static final WeakHashMap<AbstractMap.SimpleEntry<String,String>, Integer> CACHE = new WeakHashMap<AbstractMap.SimpleEntry<String, String>, Integer>();
50
51    /**
52     * Computes the edit distance between two strings.
53     *
54     * <p>
55     * The complexity is O(nm) where n=a.length() and m=b.length().
56     */
57    public static int editDistance( String a, String b ) {
58        // let's check cache
59        AbstractMap.SimpleEntry<String,String> entry = new AbstractMap.SimpleEntry<String, String>(a, b); // using this class to avoid creation of my own which will handle PAIR of values
60        Integer result = null;
61        if (CACHE.containsKey(entry))
62            result = CACHE.get(entry); // looks like we have it
63
64        if (result == null) {
65            result = new EditDistance(a, b).calc();
66            CACHE.put(entry, result); // cache the result
67        }
68        return result;
69    }
70
71    /**
72     * Finds the string in the <code>group</code> closest to
73     * <code>key</code> and returns it.
74     *
75     * @return null if group.length==0.
76     */
77    public static String findNearest( String key, String[] group ) {
78        return findNearest(key, Arrays.asList(group));
79    }
80
81    /**
82     * Finds the string in the <code>group</code> closest to
83     * <code>key</code> and returns it.
84     *
85     * @return null if group.length==0.
86     */
87    public static String findNearest( String key, Collection<String> group ) {
88        int c = Integer.MAX_VALUE;
89        String r = null;
90
91        for (String s : group) {
92            int ed = editDistance(key,s);
93            if( c>ed ) {
94                c = ed;
95                r = s;
96            }
97        }
98        return r;
99    }
100
101    /** cost vector. */
102    private int[] cost;
103    /** back buffer. */
104    private int[] back;
105
106    /** Two strings to be compared. */
107    private final String a,b;
108
109    private EditDistance( String a, String b ) {
110        this.a=a;
111        this.b=b;
112        cost = new int[a.length()+1];
113        back = new int[a.length()+1]; // back buffer
114
115        for( int i=0; i<=a.length(); i++ )
116            cost[i] = i;
117    }
118
119    /**
120     * Swaps two buffers.
121     */
122    private void flip() {
123        int[] t = cost;
124        cost = back;
125        back = t;
126    }
127
128    private int min(int a,int b,int c) {
129        return Math.min(a,Math.min(b,c));
130    }
131
132    private int calc() {
133        for( int j=0; j<b.length(); j++ ) {
134            flip();
135            cost[0] = j+1;
136            for( int i=0; i<a.length(); i++ ) {
137                int match = (a.charAt(i)==b.charAt(j))?0:1;
138                cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 );
139            }
140        }
141        return cost[a.length()];
142    }
143}
144