package de.brightbyte.wikiword.eval;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Map;

import de.brightbyte.application.Arguments;
import de.brightbyte.data.LabeledVector;
import de.brightbyte.data.cursor.DataSet;
import de.brightbyte.data.measure.CosineVectorSimilarity;
import de.brightbyte.data.measure.Similarity;
import de.brightbyte.db.DatabaseConnectionInfo;
import de.brightbyte.db.DatabaseUtil;
import de.brightbyte.io.ConsoleIO;
import de.brightbyte.io.Output;
import de.brightbyte.util.PersistenceException;
import de.brightbyte.wikiword.Corpus;
import de.brightbyte.wikiword.TweakSet;
import de.brightbyte.wikiword.disambig.CoherenceDisambiguator;
import de.brightbyte.wikiword.disambig.Disambiguator;
import de.brightbyte.wikiword.disambig.FeatureFetcher;
import de.brightbyte.wikiword.disambig.LinkFeatureFetcher;
import de.brightbyte.wikiword.eval.schema.DatabaseSurveyBuilder;
import de.brightbyte.wikiword.model.WikiWordConceptReference;
import de.brightbyte.wikiword.store.DatabaseLocalConceptStore;

public class ZGEval {
	
	protected static class Result {
		public final double relatedness;
		public final WikiWordConceptReference a;
		public final WikiWordConceptReference b;
		
		public Result(final double relatedness, final WikiWordConceptReference a, final WikiWordConceptReference b) {
			super();
			this.relatedness = relatedness;
			this.a = a;
			this.b = b;
		}
		
		@Override
		public String toString() {
			return relatedness + " ("+a+" / "+b+")";
		}
	}
	
	protected DatabaseSurveyBuilder store;
	protected Output out = ConsoleIO.output;
	protected Disambiguator disambiguator;
	
	public ZGEval(Disambiguator disambiguator, DatabaseSurveyBuilder store) {
		this.disambiguator = disambiguator;
		this.store = store;
	}

	public Disambiguator getDisambiguator() {
		return disambiguator;
	}

	public void setDisambiguator(Disambiguator disambiguator) {
		this.disambiguator = disambiguator;
	}

	public Output getOut() {
		return out;
	}

	public void setOut(Output out) {
		this.out = out;
	}

	public void evalDataset(String base, String batch) throws IOException, PersistenceException {
		store.deleteRelatednessBatch(batch);
		
		DataSet<Map<String, Object>> data = store.listRelatednessBatch(base);
		
		for (Map<String, Object> row : data) {
			String a = DatabaseUtil.asString(row.get("term_a"));
			String b = DatabaseUtil.asString(row.get("term_b"));
			
			int seq = DatabaseUtil.asInt(row.get("seq"));
			boolean nn = DatabaseUtil.asInt(row.get("is_nn")) > 0;
			
			Result r = evalPair(a, b);
			out.println(" - pair: "+a+" / "+b+" ---> "+r);
			
			store.storeRelatednessAssessment(
						batch, seq, a, b, 
						r==null ? null : r.a, 
						r==null ? null : r.b, 
						nn, r!=null, r==null ? 0 : r.relatedness );
		}
		
		double correl = store.getRelatednessCorrelation(base, batch, true, false);
		
		out.println("RESULT: "+correl);
		//out.println("goodsum: "+goodsum+"; badsum: "+badsum); //XXX: hack for debugging! nasty pass-as-state...
	}
	
	public void importDataset(File data, String batch) throws IOException, PersistenceException {
		store.deleteRelatednessBatch(batch);
		
		InputStream in = new FileInputStream(data);
		Reader rd = new InputStreamReader(in, "ISO-8859-15");
		BufferedReader lines = new BufferedReader(rd);
		
		int i = 0;
		String s = lines.readLine();
		while ( (s = lines.readLine()) != null ) {
			i++;
			
			s = s.trim();
			if (s.length()==0) continue;
			
			String[] ss = s.split("\\s*;\\s*");
			if (ss.length<3) throw new IOException("expected four fields, found "+s);
			
			double mean = Double.parseDouble(ss[2]);
			//double stddev = Double.parseDouble(ss[3]);
			
			boolean nn = isNoun(ss[0]) && isNoun(ss[1]); 
			store.storeRelatednessAssessment(batch, i, ss[0], ss[1], null, null, nn, true, mean);
		}
		
		lines.close();
		rd.close();
		in.close();
	}
	
	private boolean isNoun(String s) {
		//FIXME//FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME //FIXME
		char ch = s.charAt(0);
		return Character.isUpperCase(ch);
	}

	public Result evalPair(String a, String b) throws PersistenceException {
		Disambiguator.Result r = disambiguator.disambiguate(Arrays.asList(new String[] {a, b}));

		if (r==null || r.getMeanings().size()!=2) {
			return null;
		}
		else {
			double sim = r.getCoherence(); //ZG use 0-4 range for eval!
			
			Result res = new Result(sim, r.getMeanings().get(a).getReference(), r.getMeanings().get(b).getReference());
			return res;
		}
	}

	protected static DatabaseLocalConceptStore openStore(Arguments args, TweakSet tweaks) throws IOException, SQLException {
		Corpus corpus = Corpus.forName(args.getParameter(0));
		DatabaseConnectionInfo dbi = new DatabaseConnectionInfo(new File(args.getParameter(1)));
		
		if (args.isSet("prefix")) tweaks.setTweak("dbstore.prefix", args.getOption("prefix", null));

		DatabaseLocalConceptStore db = new DatabaseLocalConceptStore(corpus, dbi, tweaks);
		return db;
	}

	public static void main(String[] argv) throws IOException, SQLException, PersistenceException {
		Arguments args = new Arguments();
		args.parse(argv);
		
		TweakSet tweaks = new TweakSet();
		if (args.isSet("tweaks")) tweaks.loadTweaks(new File(args.getStringOption("tweaks", null)));
		tweaks.setTweaks(System.getProperties(), "wikiword.tweak."); //XXX: doc
		tweaks.setTweaks(args, "tweak."); //XXX: doc

		DatabaseLocalConceptStore conceptStore = openStore(args, tweaks);
		FeatureFetcher features = LinkFeatureFetcher.weighted; //FIXME: try unweighted, cooc, etc!
		Similarity<LabeledVector<String>> similarity = new CosineVectorSimilarity<String>();
		
		CoherenceDisambiguator disambig = new CoherenceDisambiguator(conceptStore, features, similarity);
		disambig.setFrequencyThreshold(0);
		disambig.setScoreThreshold(0);
		disambig.setPopularityBias(0);

		DatabaseSurveyBuilder surveyStore = new DatabaseSurveyBuilder(conceptStore, tweaks);
		surveyStore.prepare(false);
		
		ZGEval eval = new ZGEval(disambig, surveyStore);
		
		File data = new File(args.getParameter(2));
		String name = data.getName().replaceAll("\\..*$", "").toUpperCase();
		
		System.out.println("importing data from "+data);
		eval.importDataset(data, name);
		
		System.out.println("evluating data in "+name);
		String batch = args.getParameter(3);
		eval.evalDataset(name, batch);
	}

}
