```{- |
Description: Huffman codes
Maintainer: dev@chungyc.org

Part of Ninety-Nine Haskell "Problems".  Some solutions are in "Solutions.P50".
-}
module Problems.P50
( huffman
-- * Supporting functions
-- | The functions below are not part of the problem.
-- Instead, they are used to illustrate the use of Huffman coding.
, countCharacters
, encodeHuffman
, decodeHuffman
, loweralpha
, ascii
, text
) where

import           Data.List     (group, sort)
import           Data.Map.Lazy (Map)
import qualified Data.Map.Lazy as Map
import qualified Solutions.P50 as Solution

{- |
Given a list of characters and their number of occurrences,
construct a list of the characters and their [Huffman encoding](https://brilliant.org/wiki/huffman-encoding/).

In the encoding, @'0'@ and @'1'@ will denote the 0 and 1 bits.

=== Examples

>>> huffman [('a',45),('b',13),('c',12),('d',16),('e',9),('f',5)]
[('a',"0"),('b',"101"),('c',"100"),('d',"111"),('e',"1101"),('f',"1100")]

The encoding table computed by 'huffman' can be used to compress data:

>>> length \$ encodeHuffman (huffman \$ countCharacters text) text
3552

Compare this to the length of a fixed-length 5-bit encoding:

>>> length \$ encodeHuffman loweralpha text
4375

or the length of the more standard ASCII encoding with 8 bits:

>>> length \$ encodeHuffman ascii text
7000

Huffman coding is unambiguous, so we can get back the original text:

>>> let table = huffman \$ countCharacters text
>>> let encodedText = encodeHuffman table text
>>> let decodedText = decodeHuffman table encodedText
>>> decodedText == text
True
-}
huffman :: [(Char,Int)] -> [(Char,String)]
huffman :: [(Char, Int)] -> [(Char, String)]
huffman = [(Char, Int)] -> [(Char, String)]
Solution.huffman

-- | Count the number of occurrences of a character in a string.
countCharacters :: String -> [(Char,Int)]
countCharacters :: String -> [(Char, Int)]
countCharacters String
s = forall a b. (a -> b) -> [a] -> [b]
map (\String
xs -> (forall a. [a] -> a
xs, forall (t :: * -> *) a. Foldable t => t a -> Int
length String
xs)) forall a b. (a -> b) -> a -> b
\$ forall a. Eq a => [a] -> [[a]]
group forall a b. (a -> b) -> a -> b
\$ forall a. Ord a => [a] -> [a]
sort String
s

-- | Given an encoding table and a string, encode the string.
--
-- While this is intended for use in illustrating Huffman coding, it is not limited to such.
-- In particular, it can encode with a fixed-length encoding table.
encodeHuffman :: [(Char,String)] -> String -> String
encodeHuffman :: [(Char, String)] -> String -> String
encodeHuffman [(Char, String)]
table String
string = Map Char String -> String -> String -> String
encodeHuffman' (forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(Char, String)]
table) String
string String
""

encodeHuffman' :: Map Char String -> String -> String -> String
encodeHuffman' :: Map Char String -> String -> String -> String
encodeHuffman' Map Char String
_ String
"" String
encoded = forall a. [a] -> [a]
reverse String
encoded
encodeHuffman' Map Char String
table (Char
c:String
string) String
encoded =
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Char
c Map Char String
table of
Maybe String
Nothing -> forall a. HasCallStack => a
undefined
Just String
e  -> Map Char String -> String -> String -> String
encodeHuffman' Map Char String
table String
string forall a b. (a -> b) -> a -> b
\$ forall a. [a] -> [a]
reverse String
e forall a. [a] -> [a] -> [a]
++ String
encoded

-- | Given an encoding table and a string, decode the string.
--
-- While this is intended for use in illustrating Huffman coding, it is not limited to such.
-- In particular, it can decode with a fixed-length encoding table.
decodeHuffman :: [(Char,String)] -> String -> String
decodeHuffman :: [(Char, String)] -> String -> String
decodeHuffman [(Char, String)]
table String
string = Map String Char -> String -> String -> String -> String
decodeHuffman' (forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList forall a b. (a -> b) -> a -> b
\$ forall a b. (a -> b) -> [a] -> [b]
map (\(Char
x,String
y) -> (forall a. [a] -> [a]
reverse String
y, Char
x)) [(Char, String)]
table) String
"" String
string String
""

decodeHuffman' :: Map String Char -> String -> String -> String -> String
decodeHuffman' :: Map String Char -> String -> String -> String -> String
decodeHuffman' Map String Char
_ String
"" String
"" String
decoded = forall a. [a] -> [a]
reverse String
decoded
decodeHuffman' Map String Char
table String
code String
encoded String
decoded =
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
code Map String Char
table of
Maybe Char
Nothing -> case String
encoded of
(Char
c:String
encoded') -> Map String Char -> String -> String -> String -> String
decodeHuffman' Map String Char
table (Char
cforall a. a -> [a] -> [a]
:String
code) String
encoded' String
decoded
String
""           -> forall a. HasCallStack => a
undefined
Just Char
c -> Map String Char -> String -> String -> String -> String
decodeHuffman' Map String Char
table String
"" String
encoded (Char
cforall a. a -> [a] -> [a]
:String
decoded)

-- | Fixed-length encoding of lower case letters and a space using 5 bits.
loweralpha :: [(Char,String)]
loweralpha :: [(Char, String)]
loweralpha = [forall {a}. Enum a => a -> (a, String)
encode Char
c | Char
c <- Char
' ' forall a. a -> [a] -> [a]
: [Char
'a'..Char
'z']]
where encode :: a -> (a, String)
encode a
c = (a
c, Int -> String
toBits forall a b. (a -> b) -> a -> b
\$ forall a. Enum a => a -> Int
c forall a. Num a => a -> a -> a
- forall a. Enum a => a -> Int
'a')
toBits :: Int -> String
toBits Int
n = Int -> Int -> String
getBits Int
n Int
5

-- | Fixed-length encoding of ASCII characters using 8 bits.
ascii :: [(Char,String)]
ascii :: [(Char, String)]
ascii = [forall {a}. Enum a => a -> (a, String)
encode Char
c | Char
c <- [Char
'\0'..Char
'\127']]
where encode :: a -> (a, String)
encode a
c = (a
c, Int -> String
toBits forall a b. (a -> b) -> a -> b
\$ forall a. Enum a => a -> Int
c forall a. Num a => a -> a -> a
- forall a. Enum a => a -> Int
'\0')
toBits :: Int -> String
toBits Int
n = Int -> Int -> String
getBits Int
n Int
8

getBits :: Int -> Int -> String
getBits :: Int -> Int -> String
getBits Int
_ Int
0 = []
getBits Int
n Int
b = Char
bit forall a. a -> [a] -> [a]
: Int -> Int -> String
getBits (Int
n forall a. Integral a => a -> a -> a
`div` Int
2) (Int
bforall a. Num a => a -> a -> a
-Int
1)
where bit :: Char
bit | Int
n forall a. Integral a => a -> a -> a
`mod` Int
2 forall a. Eq a => a -> a -> Bool
== Int
1 = Char
'1'
| Bool
otherwise      = Char
'0'

-- | Long text against which various encoding schemes can be tried.
text :: String
text :: String
text =
String
"this is going to be a very long string of text which tries to use all letters " forall a. [a] -> [a] -> [a]
++
String
"of the alphabet  there is no punctuation and no upper case letters because i " forall a. [a] -> [a] -> [a]
++
String
"did not want to write more code to create the encoding table  and it also means " forall a. [a] -> [a] -> [a]
++
String
"it can use fewer bits to encode each letter with a fixed size encoding  " forall a. [a] -> [a] -> [a]
++
String
"using most of the letters means that the five bit encoding is the smallest " forall a. [a] -> [a] -> [a]
++
String
"fixed size encoding for this text  and i actually find it easier to write out " forall a. [a] -> [a] -> [a]
++
String
"this text randomly instead of carefully gathering the letters used by a certain " forall a. [a] -> [a] -> [a]
++
String
"text and getting a smaller fixed size encoding from just those letters  " forall a. [a] -> [a] -> [a]
++
String
"it is trying to be zany for the sake of being zany  being quite long  it is also " forall a. [a] -> [a] -> [a]
++
String
"more convincing that the extra space for the huffman coding table can be worth " forall a. [a] -> [a] -> [a]
++
String
"the extra cost because it is less than the savings we get from encoding a long text " forall a. [a] -> [a] -> [a]
++
String
"with huffman coding"
```