/* File compression using  Huffman tree.
   Based on code by Duane Bailey.
   Rewritten and adapted by Claude Anderson, 5/5/08.

   This program reads text from standard input, constructs a
   Huffman tree based on the frequency of characters, and prints
   out a table of characters, frequencies, and codes.
 */

import java.util.HashMap;
import java.util.Scanner;
import java.util.PriorityQueue;

public class Huffman {

	public static void main(String args[]) throws Exception {
		Scanner sc = new Scanner(System.in);
		HashMap<Character, Integer> freq = new HashMap<Character, Integer>(); 
		String oneLine; // current input line.
		// First read the data and count characters
		// Go through the input line, one character at a time.
		System.out.println("Message to be encoded (CTRL-Z to end):");
		while (sc.hasNext()) {
			oneLine = sc.next();
			for (int i = 0; i < oneLine.length(); i++) {
				char c = oneLine.charAt(i);
				if (freq.containsKey(c))
					freq.put(c, freq.get(c) + 1);
				else
					// first time we've seen c
					freq.put(c, 1);
			}
		}

		// Now the table of frequencies of each character is complete.
		// insert each character into a single-node Huffman tree
		PriorityQueue<HuffmanTree> treeQueue = new PriorityQueue<HuffmanTree>();
		for (char c : freq.keySet())
			treeQueue.add(new HuffmanTree(new Leaf(c, freq.get(c))));

		HuffmanTree smallest, secondSmallest;

		// merge trees in pairs until only one tree remains
		while (true) {
			smallest = treeQueue.poll();
			secondSmallest = treeQueue.poll();
			if (secondSmallest == null)
				break;
			// add bigger tree containing both to the sorted list.
			treeQueue.add(new HuffmanTree(smallest, secondSmallest));
		}

		// print the only tree that is left.
		smallest.print();
	}

} // End of Huffman class definition.

/*
 * 
 * C:\Personal\Courses\CS-220\java-source\HuffmanTrees>java Huffman If a
 * woodchuck could chuck wood Encoding of o is 00 (frequency was 5, length of
 * code is 2) Encoding of u is 010 (frequency was 3, length of code is 3)
 * Encoding of I is 01100 (frequency was 1, length of code is 5) Encoding of a
 * is 01101 (frequency was 1, length of code is 5) Encoding of f is 01110
 * (frequency was 1, length of code is 5) Encoding of l is 01111 (frequency was
 * 1, length of code is 5) Encoding of w is 1000 (frequency was 2, length of
 * code is 4) Encoding of k is 1001 (frequency was 2, length of code is 4)
 * Encoding of c is 101 (frequency was 5, length of code is 3) Encoding of is
 * 110 (frequency was 5, length of code is 3) Encoding of h is 1110 (frequency
 * was 2, length of code is 4) Encoding of d is 1111 (frequency was 3, length of
 * code is 4) Total bits required for message: 105
 */