/*
 * Copyright (C) 2012-2020 Sebastiano Vigna
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

package it.unimi.dsi.law.rank;

import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;

import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.NodeIterator;

//RELEASE-STATUS: DIST

/** Computes the SALSA score using a non-iterative method.
 *
 * <p>This class implements a direct computation of the SALSA score. The method used
 * is based on the description given by Ron Lempel and Shlomo Moran in Proposition 2 of &ldquo;The
 * stochastic approach for link-structure analysis (SALSA) and the TKC effect&rdquo;, <i>Computer Networks</i>,
 * 33(1):387&minus;401, 2000, Elsevier.
 *
 * <p>First, the connected components
 * of the symmetric graph underlying the matrix <var>G</var><sup>T</sup><var>G</var> are identified. Then,
 * each nodes gets a first score that is proportional to its indegree in <var>G</var>
 * (<em>not</em> in <var>G</var><sup>T</sup><var>G</var>) so that the sum of the scores on each component is one.
 * Finally, the first score of each node is scaled proportionally to the size of the connected component it belongs to.
 *
 * <p>We remark that this implementation needs a single sequential pass on the graph.
 * The computational cost is entirely concentrated in the computation of the connected components defined above, as it requires
 * to enumerate the neighbourhoods of all nodes of the graph, and, for each pair of vertices in a neighbourhood,
 * to add a virtual arc to a union-find data structure. At the same time, we compute the indegree of each node, which
 * is then scaled as explained above to obtain the final rank.
 */

public class Salsa {
	private final static Logger LOGGER = LoggerFactory.getLogger(Salsa.class);

	protected final static class UnionFind {
		private final int[] id;
		private final int[] size;
		public int components;

		public UnionFind(final int n) {
			id = new int[n];
			size = new int[n];
			components = n;
			for (int i = 0; i < n; i++) {
				id[i] = i;
				size[i] = 1;
			}
		}

		public int find(final int x) {
			if (x != id[x]) return id[x] = find(id[x]);
			return x;
		}

		public boolean unite(final int p, final int q) {
			final int i = find(p);
			final int j = find(q);
			if (i == j) return false;
			if (size[i] < size[j]) {
				id[i] = j;
				size[j] += size[i];
			}
			else {
				id[j] = i;
				size[i] += size[j];
			}
			components--;
			return true;
		}

	}

	/** Returns the SALSA ranks for the given graph.
	 *
	 * @param graph a graph.
	 * @param pl a progress logger, or {@code null}.
	 */
	public static double[] rank(final ImmutableGraph graph, final ProgressLogger pl) {
		final int n = graph.numNodes();
		final double[] rank = new double[n];
		final UnionFind unionFind = new UnionFind(n);

		if (pl != null) {
			pl.expectedUpdates = n;
			pl.itemsName = "nodes";
			pl.start("Computing indegrees and connected components of the intersection graph of in-neighbourhoods...");
		}

		final NodeIterator nodeIterator = graph.nodeIterator();

		for(int i = n; i-- != 0;) {
			nodeIterator.nextInt();
			final int d = nodeIterator.outdegree();
			final int[] succ = nodeIterator.successorArray();
			for(int j = d; j-- != 0;) {
				rank[succ[j]]++;
				for(int k = j; k-- != 0;)
					unionFind.unite(succ[j], succ[k]);
			}

			if (pl != null) pl.lightUpdate();
		}

		if (pl != null) {
			pl.done();
			pl.start("Flattening and renumbering union-find structure...");
		}

		final Int2IntOpenHashMap componentIds = new Int2IntOpenHashMap();
		for(int i = n; i-- != 0;) {
			final int id = unionFind.find(i);
			if (! componentIds.containsKey(id)) componentIds.addTo(id, componentIds.size());
			if (pl != null) pl.lightUpdate();
		}

		if (pl != null) pl.done();

		final int components = unionFind.components;
		final long[] totalIndegree = new long[components];
		final int size[] = new int[components];

		if (pl != null) pl.start("Computing component sizes and per-component cumulative indegree...");

		for(int i = 0; i < n; i++) {
			final int t = componentIds.get(unionFind.find(i));
			totalIndegree[t] += rank[i];
			size[t]++;
			if (pl != null) pl.lightUpdate();
		}

		if (pl != null) {
			pl.done();
			pl.start("Normalising ranks...");
		}

		final double norm[] = new double[components];
		for(int i = components; i-- != 0;) norm[i] = size[i] / (n * (double)totalIndegree[i]);
		for(int i = n; i-- != 0;) {
			// We must fix manually nodes with zero indegree, or they might generate NaNs.
			if (rank[i] != 0) rank[i] *= norm[componentIds.get(unionFind.find(i))];
			if (pl != null) pl.lightUpdate();
		}

		if (pl != null) pl.done();

		return rank;
	}


	public static void main(final String[] arg) throws IOException, JSAPException {

		final SimpleJSAP jsap = new SimpleJSAP(Salsa.class.getName(), "Computes the SALSA score using the non-iterative characterization.",
			new Parameter[] {
			new UnflaggedOption("graphBasename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."),
			new UnflaggedOption("rankFilename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename where the resulting ranks (doubles in binary form) are stored.")
		}
		);

		final JSAPResult jsapResult = jsap.parse(arg);
		if (jsap.messagePrinted()) System.exit(1);

		final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
		final ImmutableGraph graph = ImmutableGraph.loadOffline(jsapResult.getString("graphBasename"), progressLogger);
		BinIO.storeDoubles(rank(graph, progressLogger), jsapResult.getString("rankFilename"));
	}
}
