package de.brightbyte.data.measure;


/**
 * Calculates the edit distance between two strings, 
 * as defined by Vladimir Levenshtein 1965/66. Complexity 
 * of this implementation is in O(n) for both time and memory use.  
 */
public class LevenshteinDistance implements Distance<CharSequence> {

	public static LevenshteinDistance instance = new LevenshteinDistance();
	
	protected boolean caseSensitive;
	protected int max;
	
	public LevenshteinDistance() {
		this(true);
	}
	
	public LevenshteinDistance(boolean caseSensitive) {
		this(caseSensitive, 0);
	}
	
	public LevenshteinDistance(boolean caseSensitive, int max) {
		this.max = max;
		this.caseSensitive = caseSensitive;
	}
	
	public double distance(CharSequence s, CharSequence t) {
		if (s==t) return 0;
		
		  int sc = s.length(); 
		  int tc = t.length(); 
				
		  if (sc == 0) {
			  if (max>0 && max<tc) return max;
			  else return tc;
		  }
		  
		  if (tc == 0) {
			  if (max>0 && max<sc) return max;
			  else return sc;
		  }
		  
		  //levenshtein distance can't be smaller than distance in length
		  //abort early if max is hit
		  if (max>0 && Math.abs(sc - tc) > max) return max; 

		  int prev[] = new int[sc+1]; //previous row
		  int curr[] = new int[sc+1]; //current row

		  for (int i = 0; i<=sc; i++) {
		     prev[i] = i;
		  }
		  
		  //System.out.println("  "+s+" <-> "+t);
		  //System.out.println("----------------------------");
				
		  for (int j = 1; j<=tc; j++) {
		     char tj = t.charAt(j-1);
	    	 if (!caseSensitive) tj = Character.toLowerCase(tj);
	    	
		     curr[0] = j;
    	     //System.out.println(Arrays.toString(prev));
		     
		     int min = Integer.MAX_VALUE;
		     for (int i=1; i<=sc; i++) {
		    	char si = s.charAt(i-1);
		    	if (!caseSensitive) si = Character.toLowerCase(si);
		    	
		    	//find replacement cost (0 if same char)
		        int cost = si == tj ? 0 : 1;
		        
		        //find minimum of 3
		        int c = prev[i-1]+cost;
		        if (curr[i-1]+1 < c) c = curr[i-1]+1; //insert cost is 1
		        if (prev[i]+1 < c) c = prev[i]+1;     //delete cost is 1
		        
		        curr[i] = c;  
		        if (c<min) min = c;
		     }
		     
		     //disctance can't sink below the minimum value in a given row
		     //abort early if max is hit
		     //System.out.println("  min: "+min);
		     if (max>0 && min > max) {
		    	 return max; 
		     }
		     
		     // swap current and previous 
		     int[] tmp = prev;
		     prev = curr;
		     curr = tmp;
		  } 
				
	      //System.out.println(Arrays.toString(prev));
		  //System.out.println("----------------------------");
		  //System.out.println("==> "+prev[sc]);
		  if (max>0 && max<prev[sc]) return max;
		  else return prev[sc];
	}

}
