{- |
Description: Huffman codes
Copyright: Copyright (C) 2021 Yoo Chung
License: GPL-3.0-or-later
Maintainer: dev@chungyc.org

Some solutions to "Problems.P50" of Ninety-Nine Haskell "Problems".
-}
module Solutions.P50 (huffman) where

import           Data.Map.Lazy (Map)
import qualified Data.Map.Lazy as Map

{- |
Given a list of symbols and their number of occurrences,
construct a list of the symbols and their [Huffman encoding](https://brilliant.org/wiki/huffman-encoding/).

The characters @'0'@ and @'1'@ will represent the 0 and 1 bits in the encoding.
-}
huffman :: [(Char,Int)] -> [(Char,String)]
huffman :: [(Char, Int)] -> [(Char, String)]
huffman []      = []
huffman [(Char
c,Int
_)] = [(Char
c,String
"0")]
huffman [(Char, Int)]
cs      = HuffmanTree -> [(Char, String)]
codes (HuffmanTree -> [(Char, String)])
-> HuffmanTree -> [(Char, String)]
forall a b. (a -> b) -> a -> b
$ ((Int, Char), HuffmanTree) -> HuffmanTree
forall a b. (a, b) -> b
snd (((Int, Char), HuffmanTree) -> HuffmanTree)
-> ((Int, Char), HuffmanTree) -> HuffmanTree
forall a b. (a -> b) -> a -> b
$ Map (Int, Char) HuffmanTree -> ((Int, Char), HuffmanTree)
forall k a. Map k a -> (k, a)
Map.findMin (Map (Int, Char) HuffmanTree -> ((Int, Char), HuffmanTree))
-> Map (Int, Char) HuffmanTree -> ((Int, Char), HuffmanTree)
forall a b. (a -> b) -> a -> b
$ Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree
build (Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree)
-> Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree
forall a b. (a -> b) -> a -> b
$ [(Char, Int)] -> Map (Int, Char) HuffmanTree
initial [(Char, Int)]
cs

data HuffmanTree = Leaf Char | Branch HuffmanTree HuffmanTree

-- | Maps each character to its initial Huffman tree keyed by weight.
-- The map keys by (weight,character) to be able to keep multiple
-- Huffman trees that have the same weight; otherwise, it only
-- needs to be able to get the Huffman tree with minimum weight.
--
-- A proper priority queue would usually be better to use here.
-- It is not done here to avoid having to implement a priority queue
-- or depend on another package.
initial :: [(Char,Int)] -> Map (Int,Char) HuffmanTree
initial :: [(Char, Int)] -> Map (Int, Char) HuffmanTree
initial [(Char, Int)]
cs = [((Int, Char), HuffmanTree)] -> Map (Int, Char) HuffmanTree
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([((Int, Char), HuffmanTree)] -> Map (Int, Char) HuffmanTree)
-> [((Int, Char), HuffmanTree)] -> Map (Int, Char) HuffmanTree
forall a b. (a -> b) -> a -> b
$ ((Char, Int) -> ((Int, Char), HuffmanTree))
-> [(Char, Int)] -> [((Int, Char), HuffmanTree)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Char
c,Int
n) -> ((Int
n,Char
c), Char -> HuffmanTree
Leaf Char
c)) [(Char, Int)]
cs

-- | Extract two Huffman trees with minimum weight and combine them into a single tree,
-- and repeat until there is only one tree left.
build :: Map (Int,Char) HuffmanTree -> Map (Int,Char) HuffmanTree
build :: Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree
build Map (Int, Char) HuffmanTree
m | Map (Int, Char) HuffmanTree -> Int
forall k a. Map k a -> Int
Map.size Map (Int, Char) HuffmanTree
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Map (Int, Char) HuffmanTree
m
        | Bool
otherwise      = Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree
build (Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree)
-> Map (Int, Char) HuffmanTree -> Map (Int, Char) HuffmanTree
forall a b. (a -> b) -> a -> b
$ (Int, Char)
-> HuffmanTree
-> Map (Int, Char) HuffmanTree
-> Map (Int, Char) HuffmanTree
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert (Int
wInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
w',Char
c) (HuffmanTree -> HuffmanTree -> HuffmanTree
Branch HuffmanTree
t HuffmanTree
t') Map (Int, Char) HuffmanTree
m''
  where (((Int
w,Char
c), HuffmanTree
t), Map (Int, Char) HuffmanTree
m') = Map (Int, Char) HuffmanTree
-> (((Int, Char), HuffmanTree), Map (Int, Char) HuffmanTree)
forall k a. Map k a -> ((k, a), Map k a)
Map.deleteFindMin Map (Int, Char) HuffmanTree
m
        (((Int
w',Char
_), HuffmanTree
t'), Map (Int, Char) HuffmanTree
m'') = Map (Int, Char) HuffmanTree
-> (((Int, Char), HuffmanTree), Map (Int, Char) HuffmanTree)
forall k a. Map k a -> ((k, a), Map k a)
Map.deleteFindMin Map (Int, Char) HuffmanTree
m'

-- | Turn a Huffman tree into the concrete encoding for each character.
codes :: HuffmanTree -> [(Char,String)]
codes :: HuffmanTree -> [(Char, String)]
codes (Leaf Char
c) = [(Char
c,String
"")]
codes (Branch HuffmanTree
l HuffmanTree
r) = Char -> [(Char, String)] -> [(Char, String)]
forall {a} {a}. a -> [(a, [a])] -> [(a, [a])]
prepend Char
'0' (HuffmanTree -> [(Char, String)]
codes HuffmanTree
l) [(Char, String)] -> [(Char, String)] -> [(Char, String)]
forall a. [a] -> [a] -> [a]
++ Char -> [(Char, String)] -> [(Char, String)]
forall {a} {a}. a -> [(a, [a])] -> [(a, [a])]
prepend Char
'1' (HuffmanTree -> [(Char, String)]
codes HuffmanTree
r)
  where prepend :: a -> [(a, [a])] -> [(a, [a])]
prepend a
b = ((a, [a]) -> (a, [a])) -> [(a, [a])] -> [(a, [a])]
forall a b. (a -> b) -> [a] -> [b]
map (\(a
c,[a]
e) -> (a
c, a
ba -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
e))