-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
Copy pathtest_classifier.rb
92 lines (74 loc) · 3 KB
/
test_classifier.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
require_relative "./helper"
class TestClassifier < Minitest::Test
include Linguist
def fixture(name)
File.read(File.join(samples_path, name))
end
def test_classify
db = {}
Classifier.train! db, "Ruby", fixture("Ruby/foo.rb")
Classifier.train! db, "Objective-C", fixture("Objective-C/Foo.h")
Classifier.train! db, "Objective-C", fixture("Objective-C/Foo.m")
Classifier.finalize_train! db
results = Classifier.classify(db, fixture("Objective-C/hello.m"))
assert_equal "Objective-C", results.first[0]
tokens = Tokenizer.tokenize(fixture("Objective-C/hello.m"))
results = Classifier.classify(db, tokens)
assert_equal "Objective-C", results.first[0]
end
def test_restricted_classify
db = {}
Classifier.train! db, "Ruby", fixture("Ruby/foo.rb")
Classifier.train! db, "Objective-C", fixture("Objective-C/Foo.h")
Classifier.train! db, "Objective-C", fixture("Objective-C/Foo.m")
Classifier.finalize_train! db
results = Classifier.classify(db, fixture("Objective-C/hello.m"), ["Objective-C"])
assert_equal "Objective-C", results.first[0]
results = Classifier.classify(db, fixture("Objective-C/hello.m"), ["Ruby"])
assert results.empty?
end
def test_instance_classify_empty
results = Classifier.classify(Samples.cache, "")
assert results.empty?
end
def test_instance_classify_nil
assert_equal [], Classifier.classify(Samples.cache, nil)
end
def test_classify_ambiguous_languages
# Failures are reasonable in some cases, such as when a file is fully valid in more than one language.
allowed_failures = {
# Valid C and C++
"#{samples_path}/C/rpc.h" => ["C", "C++"],
# Tricky samples
"#{samples_path}/C/syscalldefs.h" => ["C", "C++"],
"#{samples_path}/C++/Types.h" => ["C", "C++"],
}
# Skip extensions with catch-all rule
skip_extensions = Set.new
Heuristics.all.each do |h|
rules = h.instance_variable_get(:@rules)
if rules[-1]['pattern'].is_a? AlwaysMatch
skip_extensions |= Set.new(h.extensions)
end
end
Samples.each do |sample|
next if skip_extensions.include? sample[:extname]
language = Linguist::Language.find_by_name(sample[:language])
languages = Language.find_by_filename(sample[:path]).map(&:name)
next if languages.length == 1
languages = Language.find_by_extension(sample[:path]).map(&:name)
next if languages.length <= 1
results = Classifier.classify(Samples.cache, File.read(sample[:path]), languages)
if results.empty?
assert false, "no results for #{sample[:path]}"
elsif allowed_failures.has_key? sample[:path]
assert allowed_failures[sample[:path]].include?(results.first[0]), "#{sample[:path]}\n#{results.inspect}"
else
assert_equal language.name, results.first[0], "#{sample[:path]}\n#{results.inspect}"
end
end
end
def test_classify_empty_languages
assert_equal [], Classifier.classify(Samples.cache, fixture("Ruby/foo.rb"), [])
end
end