Skip to content

Commit

Permalink
[Changed] updated Simplifed BSD license
Browse files Browse the repository at this point in the history
made license detection from templates more flexible [#171961625]

Signed-off-by: Debbie Chen <[email protected]>
  • Loading branch information
Pivotal-Jeff-Jun authored and xtreme-debbie-chen committed May 11, 2020
1 parent 204798c commit acf5705
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 8 deletions.
4 changes: 0 additions & 4 deletions lib/license_finder/license/templates/SimplifiedBSD.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,3 @@ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of the FreeBSD Project.
21 changes: 17 additions & 4 deletions lib/license_finder/license/text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,28 @@ module Text
SPACES = /\s+/.freeze
QUOTES = /['`"]{1,2}/.freeze
PLACEHOLDERS = /<[^<>]+>/.freeze
SPECIAL_SINGLE_QUOTES = /[‘’]/.freeze
SPECIAL_DOUBLE_QUOTES = /[“”„«»]/.freeze
ALPHABET_ORDERED_LIST = /\\\([a-z]\\\)\\\s/.freeze
ALPHABET_ORDERED_LIST_OPTIONAL = '(\([a-z]\)\s)?'
LIST_BULLETS = /(\d{1,2}\\\.|\\\*)\\\s/.freeze
LIST_BULLETS_OPTIONAL = '(\d{1,2}.|\*)\s*'

def self.normalize_punctuation(text)
text.gsub(SPACES, ' ')
.gsub(QUOTES, '"')
.strip
text.dup.force_encoding('UTF-8')
.gsub(SPECIAL_DOUBLE_QUOTES, '"')
.gsub(SPECIAL_SINGLE_QUOTES, "'")
.gsub(SPACES, ' ')
.gsub(QUOTES, '"')
.strip
end

def self.compile_to_regex(text)
Regexp.new(Regexp.escape(text).gsub(PLACEHOLDERS, '(.*)'))
Regexp.new(Regexp.escape(normalize_punctuation(text))
.gsub(PLACEHOLDERS, '(.*)')
.gsub(',', '(,)?')
.gsub(ALPHABET_ORDERED_LIST, ALPHABET_ORDERED_LIST_OPTIONAL)
.gsub(LIST_BULLETS, LIST_BULLETS_OPTIONAL))
end
end
end
Expand Down
117 changes: 117 additions & 0 deletions spec/lib/license_finder/license/text_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# frozen_string_literal: true

require 'spec_helper'

describe LicenseFinder::License::Text do
describe '.normalize_punctuation' do
context 'when text contains special singe/double quotes' do
it 'normalizes specials quotes to generic double quotes' do
text = <<~TEXT
‘surrounded with special single quotes’
“surrounded with special double quotes”
“surrounded with special double quotes„
«surrounded with special double quotes»
TEXT

expected_text = '"surrounded with special single quotes" "surrounded with special double quotes" "surrounded with special double quotes" "surrounded with special double quotes"'

expect(described_class.normalize_punctuation(text)).to eq(expected_text)
end
end

context 'when text contains whitespace tags' do
it 'normalizes whitespace tag to a single space' do
text = <<~TEXT
far away
far far away
TEXT

expected_text = 'far away far far away'

expect(described_class.normalize_punctuation(text)).to eq(expected_text)
end
end

context 'when text contains multiple types of quotes' do
it 'normalizes multiple types of quotes to generic double quotes' do
text = <<~TEXT
'surrounded with single quotes'
"surrounded with double quotes"
`surrounded with backtick`
TEXT

expected_text = '"surrounded with single quotes" "surrounded with double quotes" "surrounded with backtick"'

expect(described_class.normalize_punctuation(text)).to eq(expected_text)
end
end
end

describe '.compile_to_regex' do
context 'when the text contains placeholders' do
it 'returns regex with wildcards' do
text = <<~TEXT
I am <thing>
You are <thing2>
TEXT

expected_regex = Regexp.new('I\ am\ (.*)\ You\ are\ (.*)')

expect(described_class.compile_to_regex(text)).to eq(expected_regex)
end
end

context 'when the text contains commas' do
it 'returns regex with comma optionals' do
text = <<~TEXT
This is a comma,
This is also a comma,
TEXT

expected_regex = Regexp.new('This\ is\ a\ comma(,)?\ This\ is\ also\ a\ comma(,)?')

expect(described_class.compile_to_regex(text)).to eq(expected_regex)
end
end

context 'when the text contains alphabetically ordered list' do
it 'returns regex with optional alphabetically order list' do
text = <<~TEXT
(a) for an apple
(b) for a loaf of bread
TEXT

expected_regex = Regexp.new('(\([a-z]\)\s)?for\ an\ apple\ (\([a-z]\)\s)?for\ a\ loaf\ of\ bread')

expect(described_class.compile_to_regex(text)).to eq(expected_regex)
end
end

context 'when the text contains numerically ordered/unordered list' do
it 'returns regex with optional alphabetically order list' do
text = <<~TEXT
1. for an apple
* for a loaf of bread
TEXT

expected_regex = Regexp.new('(\d{1,2}.|\*)\s*for\ an\ apple\ (\d{1,2}.|\*)\s*for\ a\ loaf\ of\ bread')

expect(described_class.compile_to_regex(text)).to eq(expected_regex)
end

context 'when the text contains brackets near the unordered bullets' do
it 'returns properly formatted regex' do
text = <<~TEXT
**
* (banana bread)
**
TEXT

expected_regex = Regexp.new('\*(\d{1,2}.|\*)\s*(\d{1,2}.|\*)\s*\(banana\ bread\)\ \*\*')

expect(described_class.compile_to_regex(text)).to eq(expected_regex)
end
end
end
end
end

0 comments on commit acf5705

Please sign in to comment.