-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFrequencyAnalysisTask.cs
61 lines (49 loc) · 2.06 KB
/
FrequencyAnalysisTask.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace TextAnalysis
{
static class FrequencyAnalysisTask
{
public static Dictionary<string, string> GetMostFrequentNextWords(List<List<string>> text)
{
var result = new Dictionary<string, string>();
var frequencyDictionary = new Dictionary<string, SortedDictionary<string, int>>();
foreach (var sentences in text)
{
for (int i = 1; i < sentences.Count; i++)
{
var j = i - 1;
var currentWord = sentences[i];
var previousWord = sentences[j];
UpdateDictionary(frequencyDictionary, previousWord, currentWord);
}
for (int i = 2; i < sentences.Count; i++)
{
var j = i - 1;
var k = j - 1;
var currentWord = sentences[i];
var previousWord = sentences[j];
var prepreviousWord = sentences[k];
var twoWords = $"{prepreviousWord} {previousWord}";
UpdateDictionary(frequencyDictionary, twoWords, currentWord);
}
}
foreach (var pair in frequencyDictionary)
{
var firstWord = pair.Key;
var secondWord = pair.Value.First().Key;
result.Add(firstWord, secondWord);
}
return result;
}
private static void UpdateDictionary(IDictionary<string, SortedDictionary<string, int>> frequencyDictionary, string previousWord, string currentWord)
{
if (!frequencyDictionary.ContainsKey(previousWord))
frequencyDictionary[previousWord] = new SortedDictionary<string, int>();
if (!frequencyDictionary[previousWord].ContainsKey(currentWord))
frequencyDictionary[previousWord].Add(currentWord, 0);
frequencyDictionary[previousWord][currentWord]++;
}
}
}