A brief introduction to the types and occurrences through examples.
Ex1.
abbacb
a
,b
,c
are the types.
a
occurres 2 times;b
occurres 3 times;c
occurres 1 times.
This can be represented more concisely as [('a',2),('b',3),('c',1)]
(Indeed, the order doesn't matter).
Ex2.
abbacb
ab
,bb
,ba
,ac
,cb
are sequences of typesEach sequence occurs only once.
This can be represented as [("ab",1),("bb",1),("ba",1),("ac",1),("cb",1)]
The following graphical structure has the same informative content of the previous two:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',2) -- 'b' occurs 2 times
('a',1) -- "ba" occurs 1 times
('b',1) -- "bb" occurs 1 times
('c',1) -- 'c' occurs 1 times
('b',1) -- "cb" occurs 1 times
In Haskell: [(('a',2),[('b',1),('c',1)]),(('b',2),[('a',1),('b',1)]),(('c',1),[('b',1)])]
For occurrences of sequences of 3 elements:
('a',2) -- 'a' occurs 2 times
('b',1) -- "ab" occurs 1 times
('b',1) -- "abb" occurs 1 times
('c',1) -- "ac" occurs 1 times
('b',1) -- "acb" occurs 1 times
...
In Haskell:
[
(('a',2), [(('b',1),[('b',1)]),(('c',1),[('b',1)])]),
(('b',2), [(('a',1),[('c',1)]),(('b',1),[('a',1)])])
]
with type [((Char, Int), [((Char, Int), [(Char, Int)])])]
Even considering only the sequences of two and three elements, the comprehensibility of the graphical representation is much greater than that in Haskell.
In addition, lists are not very efficient, so I used the Data.Map
library and consequently a slightly different representation.
The following examples are based on Pi's digits. Interesting results can be obtained using the words of a novel.
My questions are:
Functions dedicated to the sequences of the three types are very complicated. It is possible to drastically simplify them?
I cannot even imagine how it is possible to generalize the functions for sequences of arbitrary length. Someone has an idea of how it could be done?
Using the following data type recursion should be easier to implement:
data TuplesTypesOccurences a = L (M.Map a Int) | B (M.Map a (Int,TuplesTypesOccurences a))
In this way however does not lose access to all of the functions in Data.Map
library?
import qualified Data.Map as M
import Data.List (sortBy)
piDigits = "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"
type TypesOccurrences a = M.Map a Int
toTypeOccurrences :: Ord k => [k] -> TypesOccurrences k -> TypesOccurrences k
toTypeOccurrences [] mp = mp
toTypeOccurrences (x:xs) mp = toTypeOccurrences xs $ M.insertWith (+) x 1 mp
-- ex. toTypeOccurrences piDigits M.empty
pprintTO :: Show a => TypesOccurrences a -> IO ()
pprintTO = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)). sortBy (\x y -> compare (snd y) (snd x)) . M.toList
-- ex. pprintTO . M.filter (> 22) . toTypeOccurrences piDigits $ M.empty
type Seq2TypeOccurrences a = M.Map a (Int,TypesOccurrences a)
toSQ2TO :: Ord a => [a] -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
toSQ2TO [] mp = mp
toSQ2TO [x] mp = mp
toSQ2TO (x:y:xs) mp = toSQ2TO (y:xs) $
case M.lookup x mp of
Nothing -> M.insert x (1,M.singleton y 1) mp
Just (_,mp2) -> case M.lookup y mp2 of
Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y 1 mp2)) x mp
Just _ -> M.update (\(n,mp2) -> Just (n+1,M.update (\m -> Just (m+1)) y mp2)) x mp
-- ex. toSQ2TO piDigits M.empty
pprintSQ2TO :: Show a => Seq2TypeOccurrences a -> IO ()
pprintSQ2TO = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")\n\t" ++ (drop 2 . concatMap (("\n\t" ++) . show) . M.toList $ mp)) . M.toList
-- ex. pprintSQ2TO (toSQ2TO piDigits M.empty)
greaterThanSQ2TO :: Ord a => Int -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
greaterThanSQ2TO n = M.filter (\(_,mp2) -> not . M.null $ mp2) . M.map (\(o,mp2) -> (o,M.filter (> n) mp2)) . M.filter (\(m,mp) -> m > n)
-- ex. pprintSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
descSortSQ2TO :: Ord a => Seq2TypeOccurrences a -> [([a], Int)]
descSortSQ2TO = sortBy (\xs ys -> compare (snd ys) (snd xs)) . concatMap (\(x,ys) -> zipWith (\x (y,n) -> ([x,y],n)) (repeat x) ys ) . map (\(x,(_,mp2)) -> (x,M.toList mp2)) . M.toList
-- mapM_ print . descSortSQ2TO . greaterThanSQ2TO 4 . toSQ2TO piDigits $ M.empty
unionSQ2TO :: Ord a => Seq2TypeOccurrences a -> Seq2TypeOccurrences a -> Seq2TypeOccurrences a
unionSQ2TO = M.unionWith (\(n1,mp1) (n2,mp2) -> (n1+n2, M.unionWith (+) mp1 mp2))
type Seq3TypeOccurrences a = M.Map a (Int,Seq2TypeOccurrences a)
toSQ3TO :: Ord k => [k] -> Seq3TypeOccurrences k -> Seq3TypeOccurrences k
toSQ3TO [] mp = mp
toSQ3TO [x] mp = mp
toSQ3TO [x,y] mp = mp
toSQ3TO (x:y:z:xs) mp = toSQ3TO (y:z:xs) $
case M.lookup x mp of
Nothing -> M.insert x (1,M.singleton y (1,M.singleton z 1)) mp
Just (_,mp2) -> case M.lookup y mp2 of
Nothing -> M.update (\(n,mp2) -> Just (n+1,M.insert y (1,M.singleton z 1) mp2)) x mp
Just (m,kns3) -> case M.lookup z kns3 of
Nothing -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.insert z 1 mp3)) y mp2)) x mp
Just _ -> M.update (\(n,_) -> Just (n+1,M.update (\(m,mp3) -> Just (m+1,M.update (Just . (+1)) z mp3)) y mp2)) x mp
-- ex. toSQ3TO piDigits M.empty
pprint3 :: Show a => Seq3TypeOccurrences a -> IO ()
pprint3 = mapM_ putStrLn . map (\(x,(n,mp)) -> "(" ++ (show x) ++ "," ++ (show n) ++ ")" ++ (concatMap (\(x2,(n2,mp2)) -> "\n\t(" ++ (show x2) ++ "," ++ (show n2) ++ ")" ++ (f mp2)) . M.toList $ mp)) . M.toList
where
f = concatMap (\(x,n) -> "\n\t\t(" ++ (show x) ++ "," ++ (show n) ++ ")") . M.toList
-- pprint3 . toSQ3TO piDigits $ M.empty
pprint3B :: Show a => Seq3TypeOccurrences a -> IO ()
pprint3B = mapM_ putStrLn . map (\(xs,n) -> show xs ++ " " ++ (show n)) . concatMap (\(xs,mp) -> zipWith (\ys (z,n) -> (ys ++ [z],n)) (repeat xs) mp) . concatMap (\(x,mp) -> zipWith (\y (z,mp2) -> ([y,z],mp2)) (repeat x) mp) . map (\(x,(_,mp)) -> (x, map (\(y,(_,mp2)) -> (y, M.toList mp2)) $ M.toList mp)) . M.toList
-- pprint3B . toSQ3TO piDigits $ M.empty
greaterThan3Q2TO :: Ord a => Int -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a
greaterThan3Q2TO n = M.filter (\(_,mp) -> not . M.null $ mp)
. M.map (\(m,mp) -> (m,M.filter (\(o,mp2) -> not . M.null $ mp2) mp))
. M.map (\(m,mp) -> (m,M.map (\(o,mp2) -> (o,M.filter (>n) mp2)) mp))
. M.filter (\(_,mp) -> not. M.null $ mp)
. M.map (\(m,mp) -> (m,M.filter ((n <) . fst) mp))
. M.filter (\(m,mp) -> m > n)
-- ex. pprint3B . greaterThan3Q2TO 2 . toSQ3TO piDigits $ M.empty
unionSQ3TO :: Ord a => Seq3TypeOccurrences a -> Seq3TypeOccurrences a -> Seq3TypeOccurrences a
unionSQ3TO = M.unionWith (\(n,mp2a) (m,mp2b) -> (n+m,unionSQ2TO mp2a mp2b))
You need to define a recursive data structure like this:
data Trie = Nil | Trie (Map Char (Int, Trie))
This allows the show and add functions to be defined recursively.
Here is an implementation. Run test3
to see an example of how it works.
import qualified Data.Map as M
import Text.PrettyPrint
import Data.List
data Trie = Nil | Trie (M.Map Char (Int, Trie))
showTrie :: String -> Trie -> Doc
showTrie _ Nil = empty
showTrie prefix (Trie m) =
vcat $
do (k,(count,t)) <- M.assocs m
let prefix' = prefix ++ [k]
return $
vcat [ lparen <> char '"' <> text prefix' <> char '"' <> comma <> int count <> rparen
, nest 4 (showTrie prefix' t)
]
-- add an element to a Trie
addTrie :: Trie -> String -> Trie
addTrie t [] = t
addTrie Nil xs = addTrie (Trie M.empty) xs
addTrie (Trie m) (x:xs) =
case M.lookup x m of
Nothing -> let t' = addTrie Nil xs
in Trie $ M.insert x (1,t') m
Just (c,t) -> let t' = addTrie t xs
in Trie $ M.insert x (c+1,t') m
test1 =
let t1 = addTrie Nil "abcd"
t2 = addTrie t1 "abce"
in putStrLn $ render $ showTrie "" t2
test2 n str =
putStrLn $ render $ showTrie "" $
foldr (flip addTrie) Nil (map (take n) (tails str))
test3 = test2 4 "31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756"