-
Notifications
You must be signed in to change notification settings - Fork 2
/
markov-test.rkt
143 lines (134 loc) · 7.85 KB
/
markov-test.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#lang racket
(require rackunit "markov.rkt")
; n-gram-at
(let ([corpus '(My name is Andrey Markov and this is my chain.)])
(check-equal? (n-gram-at 1 (- (length corpus) 1) corpus) '(chain.))
(check-equal? (n-gram-at 2 (- (length corpus) 1) corpus) '(my chain.))
(check-equal? (n-gram-at 3 (- (length corpus) 1) corpus) '(is my chain.))
(check-equal? (n-gram-at 4 (- (length corpus) 1) corpus) '(this is my chain.))
(check-equal? (n-gram-at 5 (- (length corpus) 1) corpus) '(and this is my chain.))
(check-equal? (n-gram-at 6 (- (length corpus) 1) corpus) '(Markov and this is my chain.))
(check-equal? (n-gram-at 7 (- (length corpus) 1) corpus) '(Andrey Markov and this is my chain.))
(check-equal? (n-gram-at 8 (- (length corpus) 1) corpus) '(is Andrey Markov and this is my chain.))
(check-equal? (n-gram-at 9 (- (length corpus) 1) corpus) '(name is Andrey Markov and this is my chain.))
(check-equal? (n-gram-at 10 (- (length corpus) 1) corpus) '(My name is Andrey Markov and this is my chain.))
(check-equal? (n-gram-at 1 0 corpus) '(My))
(check-equal? (n-gram-at 2 1 corpus) '(My name))
(check-equal? (n-gram-at 3 2 corpus) '(My name is))
(check-equal? (n-gram-at 4 3 corpus) '(My name is Andrey))
(check-equal? (n-gram-at 5 4 corpus) '(My name is Andrey Markov))
(check-equal? (n-gram-at 6 5 corpus) '(My name is Andrey Markov and))
(check-equal? (n-gram-at 7 6 corpus) '(My name is Andrey Markov and this))
(check-equal? (n-gram-at 8 7 corpus) '(My name is Andrey Markov and this is))
(check-equal? (n-gram-at 9 8 corpus) '(My name is Andrey Markov and this is my))
(check-equal? (n-gram-at 10 9 corpus) '(My name is Andrey Markov and this is my chain.)))
; n-grams
(check-equal? (n-grams 2 '(My name is Andrey Markov and this is my chain.))
(make-hash '(((My name) . 1)
((name is) . 1)
((is Andrey) . 1)
((Andrey Markov) . 1)
((Markov and) . 1)
((and this) . 1)
((this is) . 1)
((is my) . 1)
((my chain.) . 1))))
(check-equal? (n-grams 2 '(My chain is longer than my chain is short.))
(make-hash '(((My chain) . 1)
((chain is) . 2)
((is longer) . 1)
((longer than) . 1)
((than my) . 1)
((my chain) . 1)
((is short.) . 1))))
(check-equal? (n-grams 3 '(The wind at my back is calling my back to the wind at my back.))
(make-hash '(((The wind at) . 1)
((wind at my) . 2)
((at my back) . 1)
((my back is) . 1)
((back is calling) . 1)
((is calling my) . 1)
((calling my back) . 1)
((my back to) . 1)
((back to the) . 1)
((to the wind) . 1)
((the wind at) . 1)
((at my back.) . 1))))
(check-equal? (n-grams 1 '(My name is Andrey Markov and this is my chain.))
(make-hash '(((My) . 1)
((name) . 1)
((is) . 2)
((Andrey) . 1)
((Markov) . 1)
((and) . 1)
((this) . 1)
((my) . 1)
((chain.) . 1))))
(check-equal? (update-transition-list "potato" '(("potato" 10) ("surfing" 2))) '(("potato" 11) ("surfing" 2)))
(check-equal? (update-transition-list '("potato" "soup") '((("potato" "soup") 10) ("surfing" 2)))
'((("potato" "soup") 11) ("surfing" 2)))
; n-gram-at-with-next
(let ([corpus '(My name is Andrey Markov and this is my chain.)])
(check-equal? (chain-hash (generate-markov-chain 2 corpus))
(make-hash '(((My name) . ((is 1)))
((name is) . ((Andrey 1)))
((is Andrey) . ((Markov 1)))
((Andrey Markov) . ((and 1)))
((Markov and) . ((this 1)))
((and this) . ((is 1)))
((this is) . ((my 1)))
((is my) . ((chain. 1))))))
(check-equal? (chain-hash (generate-markov-chain 3 corpus))
(make-hash '(((My name is) . ((Andrey 1)))
((name is Andrey) . ((Markov 1)))
((is Andrey Markov) . ((and 1)))
((Andrey Markov and) . ((this 1)))
((Markov and this) . ((is 1)))
((and this is) . ((my 1)))
((this is my) . ((chain. 1)))))))
(let ([corpus '(My donkey is a donkey is a donkey is a donkey is your chain.)])
(check-equal? (chain-hash (generate-markov-chain 2 corpus))
(make-hash '(((My donkey) . ((is 1)))
((donkey is) . ((your 1) (a 3)))
((is a) . ((donkey 3)))
((is your) . ((chain. 1)))
((a donkey) . ((is 3))))))
(check-equal? (chain-hash (generate-markov-chain 3 corpus))
(make-hash '(((My donkey is) . ((a 1)))
((donkey is a) . ((donkey 3)))
((is a donkey) . ((is 3)))
((a donkey is) . ((your 1) (a 2)))
((donkey is your) . ((chain. 1)))))))
(check-equal? (chain-hash (build-word-level-markov-chain-from-file 2 "corpora/markov-test.txt"))
(make-hash '((("My" "name") . (("is" 1)))
(("name" "is") . (("Andrey" 1)))
(("is" "Andrey") . (("Markov" 1)))
(("Andrey" "Markov") . (("and" 1)))
(("Markov" "and") . (("this" 1)))
(("and" "this") . (("is" 1)))
(("this" "is") . (("my" 1)))
(("is" "my") . (("chain." 1))))))
(check-equal? (build-word-level-markov-chain-from-file 2 "corpora/markov-test-multiline.txt")
(list '((("My" "name") 2) (("His" "name") 1))
(make-hash '((("My" "name") . (("is" 2)))
(("His" "name") . (("is" 1)))
(("name" "is") . (("Andrey" 2) ("Bob" 1)))
(("is" "Andrey") . (("Markov" 2)))
(("is" "Bob") . (("Manlow" 1)))
(("Andrey" "Markov") . (("and" 2)))
(("Bob" "Manlow") . (("and" 1)))
(("Markov" "and") . (("this" 1) ("that" 1)))
(("Manlow" "and") . (("this" 1)))
(("and" "this") . (("is" 2)))
(("and" "that") . (("is" 1)))
(("this" "is") . (("my" 2)))
(("that" "is") . (("his" 1)))
(("is" "my") . (("chain." 1) ("couch." 1)))
(("is" "his") . (("chain." 1)))))))
(let ([corpus '(My name is Andrey Markov and this is my chain.)])
(check-equal? (generate-similar-corpus 2 corpus 20) corpus)
(check-equal? (generate-similar-corpus 3 corpus 20) corpus)
(check-equal? (generate-similar-corpus 2 corpus 4) '(My name is Andrey)))
(check-true (string-is-composed-of-symbols? "ABC" '(#\A #\B #\C)))
(check-true (string-is-composed-of-symbols? "ABC" '(#\A #\B #\C #\D)))
(check-false (string-is-composed-of-symbols? "ABC" '(#\B #\C #\D)))