package de.brightbyte.data.measure;

import de.brightbyte.data.LabeledVector;

/**
 * Tanimoto similarity measure for fuzzy sets (weighted entires) represented 
 * using LabeledVector.  
 * This implementation uses the sum of the values from each set instead
 * of the plain cardinality of the sets. 
 * The similarity is thus given by <tt>&Sigma;(A&cap;B) / &Sigma;(A&cup;B)</tt>. 
 *  
 * When intersecting two fuzzy sets, 
 * the value for each entry is the minimum of the values from each original set
 * (fuzzy AND).
 * When forming the union, the value for an entry that occurrs in both original
 * sets will be the maximum of the two original values (fuzzy OR). 
 *
 * @param <T> The key type of the LabeledVectors to use with this the 
 *            WeightedTanimotoVectorSimilarity measure.
 */
public class WeightedTanimotoVectorSimilarity<T> implements Similarity<LabeledVector<T>> {

	public double similarity(LabeledVector<T> a, LabeledVector<T> b) {
		if (a==b || a.equals(b)) return 1;
		
		double union = a.total() + b.total();
		double overlap = 0;
		
		for (T x: a.labels()) {
			double va = a.get(x);
			double vb = b.get(x);
			
			if (va<0 || vb<0) throw new IllegalArgumentException("encountered negative weight");
			
			if (vb!=0) {
				union   -= va > vb ? vb : va; // - min (fuzza OR = MAX, so remove min from the sum)
				overlap += va > vb ? vb : va; // + min (fuzzy AND = MIN)
			}
		}
		
		return (double)overlap / (double)union;
	}

}
