package it.unimi.dsi.law.fibrations;

/*
 * Copyright (C) 2005-2020 Paolo Boldi and Sebastiano Vigna
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

import java.io.IOException;
import java.util.Arrays;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.LazyIntIterator;
import it.unimi.dsi.webgraph.NodeIterator;

//RELEASE-STATUS: DIST

/**
 * Static methods to compute the minimum fibration base of a given graph. More precisely, the method
 * {@link #compute(ImmutableGraph, NodeColouringStrategy, ArcColouringStrategy)} starts from a graph
 * (possibly with a colouring on its nodes and/or on its arcs) and returns an array, say
 * <code>a[]</code>, with exactly as many elements as there are nodes in the graph, and with the
 * following properties:
 * <ul>
 * <li><code>a[x]==a[y]</code> iff the total graph of the universal fibration of <code>x</code> and
 * <code>y</code> is the same;
 * <li>the values contained in <code>a[]</code> range from 0 to <code>k&minus;1</code> where
 * <code>k</code> is the number of distinct total graphs;
 * <li>the values in <code>a[]</code> are assigned <em>canonically</em>, that is, if
 * <code>b[]</code> is the array returned by the method on an isomorphic graph (with the same
 * colours, if the graph is coloured) and if <code>f</code> represents the isomorphism, then
 * <code>a[x]=b[f(x)]</code> for every node <code>x</code>.
 * </ul>
 *
 * <h2>Algorithm implementation</h2>
 *
 * <p>
 * The algorithm merges ideas from <code>nauty</code>'s partitioning algorithm (Brendan D. McKay,
 * <i>Practical Graph Isomorphism</i>, Congressus Numerantium, 30:45&minus;87, 1981) and from
 * Cardon&minus;Crochemore's partitioning algorithm (A. Cardon, Maxime Crochemore, <i>Partitioning a
 * Graph in <i>O</i>(|<var>A</var>| log<sub>2</sub> |<var>V</var>|).</i> Theor. Comput. Sci.,
 * 19:85&minus;98, 1982). It has been described by Paolo Boldi, Violetta Lonati, Massimo Santini and
 * Sebastiano Vigna in <i>Graph fibrations, graph isomorphism, and PageRank</i>, RAIRO Inform.
 * Th&eacute;or., 40:227&minus;253, 2006.
 *
 * <p>
 * The algorithm is oriented towards very large web graphs, and as such it has a very sober usage of
 * data structures&mdash;we use just <var>n</var> vectors comprising <var>m</var> integers plus six
 * vectors of <var>n</var> integers (where <var>n</var> is the number of nodes and <var>m</var> is
 * the number of arcs) with a theoretical time bound <var>O</var>(<var>n</var>&nbsp;log<var>n</var>
 * + <var>m</var> log(<var>n</var> + <var>m</var>+ <var>c</var>) log<var>n</var>) using <var>c</var>
 * arc colors. In fact, this implementation uses a Quicksort in a few places where a linear radix
 * exchange would be required to obtained the abovementioned bound.
 *
 * <p>
 * In the following, by <em>partition of a set</em> we mean a subdivision of the set into a number
 * of nonempty disjoint subsets, called <em>parts</em>.
 *
 * <p>
 * The algorithm execution happens in rounds; at the end of each round, a certain partition of the
 * nodes is established. The starting partition is the one determined by node colours, if any, or it
 * is simply the trivial partition with just one part. At every round, the old partition is refined
 * (i.e., some of the parts are further subdivided into subparts). The algorithm stops as soon as no
 * part is actually subdivided at the end of a round: the final partition is the desired one (i.e.,
 * the nodes are partitioned according to their universal-fibration total graph).
 *
 * <h3>Basic data structures</h3>
 *
 * <p>
 * <strong>Current partition:</strong> The current partition is stored into three arrays: the first,
 * called <code>part[]</code> simply contains a permutation of the nodes with the property that
 * nodes belonging to the same part appear consecutively; the second, called <code>start[]</code>,
 * contains, for each index, the first index of <code>part[]</code>, in the same part. More
 * formally, suppose that <code>part[begin]</code>, <code>part[begin+1]</code>, &hellip;,
 * <code>part[end-1]</code> is one of the parts; then, <code>start[x]=begin</code> for all
 * <code>x</code> between <code>begin</code> and <code>end</code>. Thus, <code>starts[]</code> is
 * made of blocks of identical integers, and the first integer of each block is equal to its index
 * in <code>start[]</code>.
 *
 * <p>
 * We also keep track in <code>inv[]</code> of the inverse of <code>part[]</code>. Unless otherwise
 * specified, we shall identify a part with its starting index in the array <code>part[]</code>.
 *
 * <p>
 * Finally, we keep track of the number of elements of each part. This would require an additional
 * array <code>card[]</code>, which we actually overlap to <code>start[]</code> by noting that if we
 * encode the cardinality of the part starting at <code>x</code> as a negative number in
 * <code>start[x]</code>, it is always possible to recover the original value in
 * <code>start[]</code>, as it is just <code>x</code>. This tricky encoding is used in the code, but
 * in the following for sake of simplicity we shall assume that <code>card[]</code> is a separate
 * array.
 *
 * <p>
 * <strong>Active parts:</strong> At the beginning of each round, there is a certain set of active
 * parts; their number is stored in <code>numActiveParts</code>, and they are stored in the
 * <code>startActivePart[]</code> array, in arbitrary order.
 *
 * <p>
 * <strong>Touched nodes:</strong> During each round, some of the nodes are deemed as touched; their
 * number is stored in <code>touchedListLength</code>, and they are stored in the
 * <code>touchedList[]</code> array.
 *
 * <h3>First phase: assigning labels to nodes</h3>
 *
 * <p>
 * The final aim of the first phase is to assign to each node <code>x</code> a label that is the
 * list of all nodes <code>y</code> that have an arc towards <code>x</code> and appear in some
 * active parts; such labels (whose length cannot be larger than the indegree of <code>x</code>)
 * will be contained in the array <code>inFrom[x][]</code>, its length being stored in
 * <code>inFill[x]</code>.
 *
 * <p>
 * To obtain this result, the algorithm scans all the nodes in all the active parts, and for each
 * such node <code>y</code> considers all outgoing arcs, writing <code>y</code> in all the labels of
 * the target nodes of such arcs. When the first node is ever added to <code>inFrom[x][]</code>, we
 * also add <code>x</code> to the list of touched nodes.
 *
 * <h3>Second phase: refining touched parts</h3>
 *
 * <p>
 * We consider all touched nodes. A part containing a touched node is said to be touched, too, but
 * some of the nodes of a touched part might not have been touched. Our purpose is to partition all
 * touched parts into subparts: this amounts in permuting the portion of the array
 * <code>part[]</code> where the part is stored (and the corresponding entries in
 * <code>inv[]</code>), and changing some of the entries of the array <code>start[]</code> (those
 * that presently point to the first element of the part).
 *
 * <p>
 * Note that at a certain point of this phase we have some parts that have already been
 * subpartitioned (we call them <em>completed</em>), a part that is being considered (we call it
 * <em>current</em>) and some other touched parts that will be considered later on.
 *
 * <p>
 * First of all we order the touched nodes by part index. In this way, the list of touched nodes is
 * made by segments contained in the same part. We now consider in turn each segment; the part
 * containing the segment will be the current part.
 *
 * <p>
 * Suppose that the current part starts at index <code>begin</code> and ends at index
 * <code>end</code> (exclusive). First of all, for each node <code>x</code> in the current segment
 * of the touched nodes, the label <code>inFrom[x][0..inFill[x]-1]</code> is sorted according to the
 * following lexicographic order:
 * <ul>
 * <li>if the colour of the arc (<code>y</code>,<code>x</code>) is smaller than the colour of
 * (<code>y'</code>,<code>x</code>), then <code>y</code> must appear before <code>y'</code>;
 * <li>if the colour of the arc (<code>y</code>,<code>x</code>) is the same as the colour of
 * (<code>y'</code>,<code>x</code>), but the part of <code>y</code> is smaller than the part of
 * <code>y'</code>, then <code>y</code> must appear before <code>y'</code>.
 * </ul>
 *
 * <p>
 * After sorting, we can identify the new parts by scanning the touched nodes of the current segment
 * and comparing their labels. We know in advance where each node belong, as they must be moved
 * after an initial (possibly empty) part of untouched nodes in the same order in which they now
 * appear in the touched list; since we know the size of the original part, and the number of
 * touched nodes in the part, we can compute the number of untouched nodes in the current part and
 * move the touched nodes after them.
 *
 * <p>
 * Some care must be taken, though: when comparing the parts of <code>y</code> and <code>y'</code>
 * we are considering the new partitioning for the completed parts, but we use the old partitioning
 * for the current part (i.e., nodes in the current part are considered to have partition number
 * <code>begin</code>). This guarantees a subpartitioning process quicker than
 * Cardon&minus;Crochemore's (which uses the old parts for this whole phase), but at the same time
 * does not incur into the asymptotic loss of <code>nauty</code>'s algorithm (which keeps active
 * parts in a queue and performs a full round for each part).
 */

public final class MinimumBase {
	private static final Logger LOGGER = LoggerFactory.getLogger(MinimumBase.class);
	private static final boolean ASSERTS = false;

	private MinimumBase() {}

	private static final class PartComparator implements IntComparator {
		private final int[] start;
		private final int[] inv;

		private PartComparator(final int[] start, final int[] inv) {
			this.start = start;
			this.inv = inv;
		}

		@Override
		public int compare(final int x, final int y) {
			return get(start, inv[x]) - get(start, inv[y]);
		}
	}

	private static final class ColourPartComparator implements IntComparator {
		public int targetNode;
		private final ArcColouringStrategy colouringStrategy;
		private final int[] start;
		private final int[] inv;
		private final boolean hasColours;

		public ColourPartComparator(final int[] start, final int[] inv, final ArcColouringStrategy colouringStrategy) {
			this.colouringStrategy = colouringStrategy;
			this.start = start;
			this.inv = inv;
			this.hasColours = colouringStrategy != null;
		}

		@Override
		public int compare(final int i, final int j) {
			if (hasColours) {
				final int diff = colouringStrategy.colour(i, targetNode) - colouringStrategy.colour(j, targetNode);
				if (diff != 0) return diff;
			}
			return get(start, inv[i]) - get(start, inv[j]);
		}
	}

	private static final class NodeLengthLexComparator implements IntComparator {
		private final int[] inFill;
		private final int[][] inFrom;
		private final ArcColouringStrategy colouringStrategy;
		private final boolean hasColours;
		private final int[] start;
		private final int[] inv;

		private NodeLengthLexComparator(final int[] inFill, final int[][] inFrom, final int[] start, final int[] inv, final ArcColouringStrategy colouringStrategy) {
			this.inFill = inFill;
			this.inFrom = inFrom;
			this.start = start;
			this.inv = inv;
			this.colouringStrategy = colouringStrategy;
			this.hasColours = colouringStrategy != null;
		}

		@Override
		public int compare(final int x, final int y) {
			final int lx = inFill[x], ly = inFill[y];
			int diff = lx - ly;
			if (diff != 0) return diff;
			int startx, starty;
			for(int i = 0; i < lx; i++) {
				if (hasColours) {
					diff = colouringStrategy.colour(inFrom[x][i], x) - colouringStrategy.colour(inFrom[y][i], y);
					if (diff != 0) return diff;
				}
				startx = get(start, inv[inFrom[x][i]]);
				starty = get(start, inv[inFrom[y][i]]);
				if (startx - starty != 0) return startx - starty;
			}
			return 0;
		}

		public int beginCurrentPart;
		public int endCurrentPart;

		public boolean equal(final int x, final int y) {
			final int lx = inFill[x], ly = inFill[y];
			if (lx - ly != 0) return false;
			int startx, starty;
			for(int i = 0; i < lx; i++) {
				if (hasColours && colouringStrategy.colour(inFrom[x][i], x) != colouringStrategy.colour(inFrom[y][i], y)) return false;
				startx = get(start, inv[inFrom[x][i]]);
				starty = get(start, inv[inFrom[y][i]]);
				if (startx >= beginCurrentPart && startx < endCurrentPart) startx = beginCurrentPart;
				if (starty >= beginCurrentPart && starty < endCurrentPart) starty = beginCurrentPart;
				if (startx - starty != 0) return false;
			}
			return true;
		}
	}

	/** Computes the actual value of the start array at the given index, working around cardinalities.
	 *
	 * @param start the start array.
	 * @param index an index.
	 * @return <code>start[index]</code>, if nonnegative; <code>index</code>, otherwise.
	 */
	private final static int get(final int[] start, final int index) {
		final int s = start[index];
		return s < 0 ? index : s;
	}

	/** Returns a labelling of an immutable graph such that two nodes have the same label iff they
	 * are in the same fibre of minimal fibrations.
	 *
	 * <p>Note that the labelling is surjective&mdash;if a node
	 * has label <var>k</var>, there are nodes with label <var>j</var>, for every 0&le;<var>j</var>&le;<var>k</var>.
	 *
	 * @param g an immutable graph.
	 * @param nodeColouring a colouring for the nodes, or {@code null}.
	 * @param arcColouring a colouring for the arcs, or {@code null}.
	 * @return an array of integers labelling the graph so that two nodes have the same label iff they
	 * are in the same fibre of minimal fibrations.
	 */

	public static int[] compute(final ImmutableGraph g, final NodeColouringStrategy nodeColouring, final ArcColouringStrategy arcColouring) {
		final int n = g.numNodes();

		// Precomputation of indegrees (for allocating the colour/part lists).
		final int[] inFill = new int[n];
		int[] succ;
		int d;

		final NodeIterator nodeIterator = g.nodeIterator();
		for(int i = 0; i < n; i++) {
			nodeIterator.nextInt();
			d = nodeIterator.outdegree();
			if (d == 0) continue;
			succ = nodeIterator.successorArray();
			while(d-- != 0) inFill[succ[d]]++;
		}

		// Allocation of colour/part lists.
		final int[][] inFrom = new int[n][];
		for(int i = n; i-- != 0;) inFrom[i] = new int[inFill[i]];

		Arrays.fill(inFill, 0);

		/* Parts array: a permutation of nodes. Each entry represent a node in the part.
		 * A sentinel is added at the end to avoid special cases. Parts are contiguous. */
		final int[] part = new int[n];
		for(int i = n; i-- != 0;) part[i] = i;
		/* The inverse of part. A sentinel is added at the end to avoid special cases. */
		final int[] inv = new int[n + 1];
		System.arraycopy(part, 0, inv, 0, n);
		inv[n] = n;
		/* The start of the part at the given index. More precisely, start[x] is the index in
		 * part[] where the part containing the node part[x] starts.
		 * A sentinel is added at the end to avoid special cases. */
		final int[] start = new int[n + 1];
		start[n] = n;

		/* At index i, the opposite of the cardinality of part starting at i minus one.
		 * This vector is actually overlapped to start, and get() takes care of disambiguation. */
		final int[] card = start;
		card[0] = -n - 1;

		// The number of active parts in the current round.
		int numActiveParts = 1;
		// The list of active parts (initially, part 0).
		final int[] startActivePart = new int[n];

		/* The list of parts that have been touched during the current round.
		   A sentinel will be added each time the list is refilled, to avoid special cases. */
		final int[] touchedList = new int[n + 1];
		int touchedListLength = 0;

		int s, x, y;
		LazyIntIterator successors;

		final NodeLengthLexComparator nodeLengthLexComparator = new NodeLengthLexComparator(inFill, inFrom, start, inv, arcColouring);
		final ColourPartComparator colourPartComparator = new ColourPartComparator(start, inv, arcColouring);
		final PartComparator partComparator = new PartComparator(start, inv);

		int overallMaxLength = n, overallMinLength = n, numSingletons = 0, numParts = 1;

		for(int k = 0; k < n; k++) {
			LOGGER.info("Starting phase " + k + " [parts=" + numParts + ", active=" + numActiveParts + ", size=" + overallMinLength + " -> " + overallMaxLength + ", {*}=" + numSingletons + "]...");
			overallMaxLength = -1;
			overallMinLength = Integer.MAX_VALUE;
			numSingletons = 0;

			// First phase: we run through the active parts and follow outlinks.
			for(int p = numActiveParts; p-- != 0;) {
				s = startActivePart[p];
				for(int j = -card[s] - 1; j-- != 0;) {
					x = part[j + s];
					d = g.outdegree(x);
					successors = g.successors(x);
					while(d-- != 0) {
						y = successors.nextInt();
						// If y has never been touched, we add it to the touched list.
						if (inFill[y] == 0) touchedList[touchedListLength++] = y;
						inFrom[y][inFill[y]++] = x;
					}
				}
			}

			LOGGER.info("Touched: " + touchedListLength);

			if (ASSERTS) {
				for(int i = n; i-- != 0;) {
					// At the first stage, the number of active predecessors must be equal to the number of predecessors.
					if (k == 0) assert inFill[i] == inFrom[i].length;
				}
			}

			// We update card to reflect the subtraction of the touched nodes, and sort the lists.
			for(int j = 0; j < touchedListLength; j++) card[get(start, inv[touchedList[j]])]++;

			IntArrays.quickSort(touchedList, 0, touchedListLength, partComparator);

			touchedList[touchedListLength] = n;

			// Now we do the final pass: we examine each part that has been touched.
			numActiveParts = 0;

			int currStart, currBegin, otherNode, positionX, positionOtherNode;

			for(int begin = 0; touchedList[begin] != n;) {
				final int oldStart = get(start, inv[touchedList[begin]]);

				// We find the end of the part.
				int end;
				for(end = begin; get(start, inv[x = touchedList[end]]) == oldStart; end++) {
					colourPartComparator.targetNode = x;
					IntArrays.quickSort(inFrom[x], 0, inFill[x], colourPartComparator);
				}

				// Whether there are untouched nodes in this partition
				final boolean someUntouched = card[oldStart] != -1;

				if (! someUntouched && begin + 1 == end) {
					// We need no processing for singletons.
					inFill[touchedList[begin]] = 0;
					card[oldStart] = -2;
					begin = end;
					continue;
				}

				if (! someUntouched) numParts--; // stats

				IntArrays.quickSort(touchedList, begin, end, nodeLengthLexComparator);

				if (ASSERTS)
					for(int i = begin; i < end; i++)
						assert get(start, inv[touchedList[i]]) == get(start, inv[touchedList[begin]]);

				/* Now we go through the sorted partition, identifying partition borders. We
				 keep track of some statistical data, and of the first largest part. */
				int maxLength = -1, currLength, maxStart = begin;

				/* Nodes in the current part are to be considered equal by the comparator. */
				final int oldEnd = oldStart - card[oldStart] - 1 + end - begin;
				nodeLengthLexComparator.beginCurrentPart = oldStart;
				nodeLengthLexComparator.endCurrentPart = oldEnd;

				if (ASSERTS) {
					assert oldEnd == n || get(start, oldEnd) == oldEnd;
					for(int i = oldStart + 1; i < oldEnd; i++) assert get(start, i) != i;
				}

				// Used by asserts to compute the number of breaks in the current part.
				int numberOfBreaks = 0;
				if (ASSERTS) {
					for(int l = begin; l < end; l++) {
						if (l != end - 1 && nodeLengthLexComparator.equal(touchedList[l], touchedList[l + 1])) continue;
						numberOfBreaks++;
					}
				}

				currStart = oldStart - card[oldStart] - 1;
				currBegin = begin;

				for(int l = begin; l < end; l++) {
					x = touchedList[l];

					/* Now node x should be put in position currStart + l - currBegin */
					otherNode = part[currStart + l - currBegin];
					positionX = inv[x];
					positionOtherNode = inv[otherNode];

					part[positionOtherNode] = x;
					part[positionX] = otherNode;
					inv[x] = positionOtherNode;
					inv[otherNode] = positionX;

					start[positionOtherNode] = currStart;

					if (l != end - 1 && nodeLengthLexComparator.equal(x, touchedList[l + 1])) continue;

					numParts++; // stats
					if (ASSERTS) numberOfBreaks--;
					currLength = l - currBegin + 1;
					if (currLength > maxLength) {
						maxStart = currStart;
						maxLength = currLength;
					}
					currBegin = l + 1;
					card[currStart] = -currLength - 1;
					currStart += currLength;

					// Stats
					if (currLength < overallMinLength) overallMinLength = currLength;
					if (currLength > overallMaxLength) overallMaxLength = currLength;
					if (currLength == 1) numSingletons++;
				}

				if (ASSERTS) {
					assert numberOfBreaks == 0;
					assert currStart == oldStart + (someUntouched ? -card[oldStart]-1 : 0) + end - begin
						: currStart + ", " + (oldStart + (someUntouched ? -card[oldStart]-1 : 0) + end - begin);
					for(int i = begin; i < end - 1; i++) assert get(start, inv[touchedList[i]]) <= get(start, inv[touchedList[i + 1]]);
					if (someUntouched) {
						for(int i = 0; i < -card[oldStart]-1; i++) assert inFill[part[oldStart + i]] == 0;
						for(int i = -card[oldStart]-1; i < oldEnd - oldStart; i++) assert inFill[part[oldStart + i]] > 0;
					}
				}

				/* We make a final scan through the parts, adding all
				 new parts (except the first largest part) to the active parts list. */

				if (someUntouched) {
					if (-card[oldStart]-1 < maxLength) startActivePart[numActiveParts++] = oldStart;
					else maxStart = -1;
				}
				for(int l = begin; l < end; l++) {
					s = get(start, inv[touchedList[l]]);
					inFill[touchedList[l]] = 0;
					if (s != get(start, inv[touchedList[l + 1]]) && s != maxStart) startActivePart[numActiveParts++] = s;
				}

				begin = end;
			}

			if (ASSERTS) {
				for(int i = n; i-- != 0;) {
					assert inFill[i] == 0;
					assert inv[part [i]] == i;
					assert part[inv[i]] == i;
					assert get(start, get(start, i)) == get(start, i);
					assert -card[get(start, inv[i])]-1 > 0;
					assert get(start, inv[i]) != inv[i] || -card[inv[i]]-1 != 0;
				}
			}
			if (numActiveParts == 0) break;
			touchedListLength = 0;
		}

		// We renumber parts from 0 and put the result into inFill.
		x = s = 0;
		inFill[part[0]] = 0;
		for(int i = 1; i < n; i++) {
			y = part[i];
			if (get(start, i) != s) {
				x++;
				s = get(start, i);
			}
			inFill[y] = x;
		}

		return inFill;
	}

	public static boolean equalLabellings(final int[] a, final int[] b) {
		if (a.length != b.length) throw new IllegalArgumentException();
		final int[] a2b = new int[a.length];

		Arrays.fill(a2b, -1);
		for(int i = a.length; i-- != 0;)
			if (a2b[a[i]] == -1) a2b[a[i]] = b[i];
			else if (a2b[a[i]] != b[i]) return false;

		Arrays.fill(a2b, -1);
		for(int i = b.length; i-- != 0;)
			if (a2b[b[i]] == -1) a2b[b[i]] = a[i];
			else if (a2b[b[i]] != a[i]) return false;

		return true;
	}

	public static void main(final String arg[]) throws IOException {
		if (arg.length == 1) System.out.println(IntArrayList.wrap(compute(ImmutableGraph.load(arg[0]), null, null)));
		else if (arg.length == 2) BinIO.storeInts(compute(ImmutableGraph.load(arg[0]), null, null), arg[1]);
		else if (arg.length == 3) {
			final int[] d = BinIO.loadInts(arg[1]);
			BinIO.storeInts(compute(ImmutableGraph.load(arg[0]), null, (x, y) -> d[x]), arg[2]);
		}
		else throw new IllegalArgumentException();
	}
}
