{- | Description: Huffman codes Copyright: Copyright (C) 2021 Yoo Chung License: GPL-3.0-or-later Maintainer: dev@chungyc.org Part of Ninety-Nine Haskell "Problems". Some solutions are in "Solutions.P50". -} module Problems.P50 ( huffman -- * Supporting functions -- | The functions below are not part of the problem. -- Instead, they are used to illustrate the use of Huffman coding. , countCharacters , encodeHuffman , decodeHuffman , loweralpha , ascii , text ) where import Data.List (group, sort) import Data.Map.Lazy (Map) import qualified Data.Map.Lazy as Map import qualified Solutions.P50 as Solution {- | Given a list of characters and their number of occurrences, construct a list of the characters and their [Huffman encoding](https://brilliant.org/wiki/huffman-encoding/). In the encoding, @'0'@ and @'1'@ will denote the 0 and 1 bits. === Examples >>> huffman [('a',45),('b',13),('c',12),('d',16),('e',9),('f',5)] [('a',"0"),('b',"101"),('c',"100"),('d',"111"),('e',"1101"),('f',"1100")] The encoding table computed by 'huffman' can be used to compress data: >>> length $ encodeHuffman (huffman $ countCharacters text) text 3552 Compare this to the length of a fixed-length 5-bit encoding: >>> length $ encodeHuffman loweralpha text 4375 or the length of the more standard ASCII encoding with 8 bits: >>> length $ encodeHuffman ascii text 7000 Huffman coding is unambiguous, so we can get back the original text: >>> let table = huffman $ countCharacters text >>> let encodedText = encodeHuffman table text >>> let decodedText = decodeHuffman table encodedText >>> decodedText == text True -} huffman :: [(Char,Int)] -> [(Char,String)] huffman :: [(Char, Int)] -> [(Char, String)] huffman = [(Char, Int)] -> [(Char, String)] Solution.huffman -- | Count the number of occurrences of a character in a string. countCharacters :: String -> [(Char,Int)] countCharacters :: String -> [(Char, Int)] countCharacters String s = forall a b. (a -> b) -> [a] -> [b] map (\String xs -> (forall a. [a] -> a head String xs, forall (t :: * -> *) a. Foldable t => t a -> Int length String xs)) forall a b. (a -> b) -> a -> b $ forall a. Eq a => [a] -> [[a]] group forall a b. (a -> b) -> a -> b $ forall a. Ord a => [a] -> [a] sort String s -- | Given an encoding table and a string, encode the string. -- -- While this is intended for use in illustrating Huffman coding, it is not limited to such. -- In particular, it can encode with a fixed-length encoding table. encodeHuffman :: [(Char,String)] -> String -> String encodeHuffman :: [(Char, String)] -> String -> String encodeHuffman [(Char, String)] table String string = Map Char String -> String -> String -> String encodeHuffman' (forall k a. Ord k => [(k, a)] -> Map k a Map.fromList [(Char, String)] table) String string String "" encodeHuffman' :: Map Char String -> String -> String -> String encodeHuffman' :: Map Char String -> String -> String -> String encodeHuffman' Map Char String _ String "" String encoded = forall a. [a] -> [a] reverse String encoded encodeHuffman' Map Char String table (Char c:String string) String encoded = case forall k a. Ord k => k -> Map k a -> Maybe a Map.lookup Char c Map Char String table of Maybe String Nothing -> forall a. HasCallStack => a undefined Just String e -> Map Char String -> String -> String -> String encodeHuffman' Map Char String table String string forall a b. (a -> b) -> a -> b $ forall a. [a] -> [a] reverse String e forall a. [a] -> [a] -> [a] ++ String encoded -- | Given an encoding table and a string, decode the string. -- -- While this is intended for use in illustrating Huffman coding, it is not limited to such. -- In particular, it can decode with a fixed-length encoding table. decodeHuffman :: [(Char,String)] -> String -> String decodeHuffman :: [(Char, String)] -> String -> String decodeHuffman [(Char, String)] table String string = Map String Char -> String -> String -> String -> String decodeHuffman' (forall k a. Ord k => [(k, a)] -> Map k a Map.fromList forall a b. (a -> b) -> a -> b $ forall a b. (a -> b) -> [a] -> [b] map (\(Char x,String y) -> (forall a. [a] -> [a] reverse String y, Char x)) [(Char, String)] table) String "" String string String "" decodeHuffman' :: Map String Char -> String -> String -> String -> String decodeHuffman' :: Map String Char -> String -> String -> String -> String decodeHuffman' Map String Char _ String "" String "" String decoded = forall a. [a] -> [a] reverse String decoded decodeHuffman' Map String Char table String code String encoded String decoded = case forall k a. Ord k => k -> Map k a -> Maybe a Map.lookup String code Map String Char table of Maybe Char Nothing -> case String encoded of (Char c:String encoded') -> Map String Char -> String -> String -> String -> String decodeHuffman' Map String Char table (Char cforall a. a -> [a] -> [a] :String code) String encoded' String decoded String "" -> forall a. HasCallStack => a undefined Just Char c -> Map String Char -> String -> String -> String -> String decodeHuffman' Map String Char table String "" String encoded (Char cforall a. a -> [a] -> [a] :String decoded) -- | Fixed-length encoding of lower case letters and a space using 5 bits. loweralpha :: [(Char,String)] loweralpha :: [(Char, String)] loweralpha = [forall {a}. Enum a => a -> (a, String) encode Char c | Char c <- Char ' ' forall a. a -> [a] -> [a] : [Char 'a'..Char 'z']] where encode :: a -> (a, String) encode a c = (a c, Int -> String toBits forall a b. (a -> b) -> a -> b $ forall a. Enum a => a -> Int fromEnum a c forall a. Num a => a -> a -> a - forall a. Enum a => a -> Int fromEnum Char 'a') toBits :: Int -> String toBits Int n = Int -> Int -> String getBits Int n Int 5 -- | Fixed-length encoding of ASCII characters using 8 bits. ascii :: [(Char,String)] ascii :: [(Char, String)] ascii = [forall {a}. Enum a => a -> (a, String) encode Char c | Char c <- [Char '\0'..Char '\127']] where encode :: a -> (a, String) encode a c = (a c, Int -> String toBits forall a b. (a -> b) -> a -> b $ forall a. Enum a => a -> Int fromEnum a c forall a. Num a => a -> a -> a - forall a. Enum a => a -> Int fromEnum Char '\0') toBits :: Int -> String toBits Int n = Int -> Int -> String getBits Int n Int 8 getBits :: Int -> Int -> String getBits :: Int -> Int -> String getBits Int _ Int 0 = [] getBits Int n Int b = Char bit forall a. a -> [a] -> [a] : Int -> Int -> String getBits (Int n forall a. Integral a => a -> a -> a `div` Int 2) (Int bforall a. Num a => a -> a -> a -Int 1) where bit :: Char bit | Int n forall a. Integral a => a -> a -> a `mod` Int 2 forall a. Eq a => a -> a -> Bool == Int 1 = Char '1' | Bool otherwise = Char '0' -- | Long text against which various encoding schemes can be tried. text :: String text :: String text = String "this is going to be a very long string of text which tries to use all letters " forall a. [a] -> [a] -> [a] ++ String "of the alphabet there is no punctuation and no upper case letters because i " forall a. [a] -> [a] -> [a] ++ String "did not want to write more code to create the encoding table and it also means " forall a. [a] -> [a] -> [a] ++ String "it can use fewer bits to encode each letter with a fixed size encoding " forall a. [a] -> [a] -> [a] ++ String "using most of the letters means that the five bit encoding is the smallest " forall a. [a] -> [a] -> [a] ++ String "fixed size encoding for this text and i actually find it easier to write out " forall a. [a] -> [a] -> [a] ++ String "this text randomly instead of carefully gathering the letters used by a certain " forall a. [a] -> [a] -> [a] ++ String "text and getting a smaller fixed size encoding from just those letters " forall a. [a] -> [a] -> [a] ++ String "it is trying to be zany for the sake of being zany being quite long it is also " forall a. [a] -> [a] -> [a] ++ String "more convincing that the extra space for the huffman coding table can be worth " forall a. [a] -> [a] -> [a] ++ String "the extra cost because it is less than the savings we get from encoding a long text " forall a. [a] -> [a] -> [a] ++ String "with huffman coding"