Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swift 3 #32

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified CHANGELOG.md
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
Empty file modified Parsimmon.podspec
100644 → 100755
Empty file.
6 changes: 3 additions & 3 deletions Parsimmon/Analyzer.swift
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ protocol Analyzer {
var scheme: String { get }
}

internal func analyze(analyzer: Analyzer, text: String, options: NSLinguisticTaggerOptions?) -> [Pair] {
internal func analyze(_ analyzer: Analyzer, text: String, options: NSLinguisticTagger.Options?) -> [Pair] {
var pairs: [Pair] = []

let range = NSRange(location: 0, length: text.characters.count)
Expand All @@ -38,9 +38,9 @@ internal func analyze(analyzer: Analyzer, text: String, options: NSLinguisticTag

tagger.string = text
tagger.setOrthography(analyzer.seed.orthography, range: range)
tagger.enumerateTagsInRange(range, scheme: analyzer.scheme, options: options) { (tag: String?, tokenRange, range, stop) in
tagger.enumerateTags(in: range, scheme: analyzer.scheme, options: options) { (tag: String?, tokenRange, range, stop) in
if let tag = tag {
let token = (text as NSString).substringWithRange(tokenRange)
let token = (text as NSString).substring(with: tokenRange)
let pair = (token, tag)
pairs.append(pair)
}
Expand Down
90 changes: 45 additions & 45 deletions Parsimmon/DecisionTree.swift
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,39 +23,39 @@
import Foundation

public struct Datum {
public let featureValues: (Bit, Bit)
public let classification: Bit
public let featureValues: (Int, Int)
public let classification: Int

public init(featureValues: (Bit, Bit), classification: Bit) {
public init(featureValues: (Int, Int), classification: Int) {
self.featureValues = featureValues
self.classification = classification
}

public func featureValueAtPosition(position: Bit) -> Bit {
if position.rawValue == 0 {
public func featureValueAtPosition(_ position: Int) -> Int {
if position == 0 {
return self.featureValues.0
} else {
return self.featureValues.1
}
}
}

public class Node<T> {
public var leftChild: Node<T>?
public var rightChild: Node<T>?
public var value: T
open class Node<T> {
open var leftChild: Node<T>?
open var rightChild: Node<T>?
open var value: T

init(value: T) {
self.value = value
}
}

public class DecisionTree {
public var root: Node<Bit>?
public var maxDepth: Int = 5
private let featureNames: (String, String)
private let classificationNames: (String, String)
private var data = [Datum]()
open class DecisionTree {
open var root: Node<Int>?
open var maxDepth: Int = 5
fileprivate let featureNames: (String, String)
fileprivate let classificationNames: (String, String)
fileprivate var data = [Datum]()

public init(featureNames: (String, String), classificationNames: (String, String)) {
self.featureNames = featureNames
Expand All @@ -67,40 +67,40 @@ public class DecisionTree {

@param datum A data point
*/
public func addSample(datum: Datum) {
open func addSample(_ datum: Datum) {
self.data.append(datum)
}

/**
Builds the decision tree based on the data it has.
*/
public func build() {
let features = [ Bit.Zero, Bit.One ]
open func build() {
let features = [0, 1]
self.root = self.decisionTree(self.data, remainingFeatures: features, maxDepth: self.maxDepth)
}

public func classify(sample: [Int]) -> String? {
open func classify(_ sample: [Int]) -> String? {
var node = self.root
while (node != nil) {
let unwrappedNode = node!
if let _ = unwrappedNode.leftChild {
let pathToTake = sample[unwrappedNode.value.rawValue]
let pathToTake = sample[unwrappedNode.value]
if pathToTake == 0 {
node = unwrappedNode.leftChild
} else {
node = unwrappedNode.rightChild
}
} else if unwrappedNode.value.rawValue == 0 {
} else if unwrappedNode.value == 0 {
return self.classificationNames.0
} else if unwrappedNode.value.rawValue == 1 {
} else if unwrappedNode.value == 1 {
return self.classificationNames.1
}
}
return nil
}

private func decisionTree(data: [Datum], remainingFeatures: [Bit], maxDepth: Int) -> Node<Bit> {
let tree = Node<Bit>(value: Bit.Zero)
fileprivate func decisionTree(_ data: [Datum], remainingFeatures: [Int], maxDepth: Int) -> Node<Int> {
let tree = Node<Int>(value: 0)
if data.first == nil {
return tree
}
Expand All @@ -123,17 +123,17 @@ public class DecisionTree {
if firstDatumFeatureValues != datumFeatureValues {
allSameFeatureValues = false
}
count += datumClassification.rawValue
count += datumClassification
}

if allSameClassification == true {
tree.value = firstDatum.classification
} else if allSameFeatureValues == true || maxDepth == 0 {
tree.value = count > (data.count / 2) ? Bit.One : Bit.Zero
tree.value = count > (data.count / 2) ? 1 : 0
} else {
// Find the best feature to split on and recurse.
var maxInformationGain = -Float.infinity
var bestFeature = Bit.Zero
var bestFeature = 0
for feature in remainingFeatures {
let informationGain = self.informationGain(feature, data: data)
if informationGain >= maxInformationGain {
Expand All @@ -143,8 +143,8 @@ public class DecisionTree {
}
let splitData = self.splitData(data, onFeature: bestFeature)
var newRemainingFeatures = remainingFeatures
if let bestFeatureIndex = newRemainingFeatures.indexOf(bestFeature) {
newRemainingFeatures.removeAtIndex(bestFeatureIndex)
if let bestFeatureIndex = newRemainingFeatures.index(of: bestFeature) {
newRemainingFeatures.remove(at: bestFeatureIndex)
tree.leftChild = self.decisionTree(splitData.0, remainingFeatures: newRemainingFeatures, maxDepth: maxDepth - 1)
tree.rightChild = self.decisionTree(splitData.1, remainingFeatures: newRemainingFeatures, maxDepth: maxDepth - 1)
tree.value = bestFeature
Expand All @@ -154,11 +154,11 @@ public class DecisionTree {
return tree
}

private func splitData(data: [Datum], onFeature: Bit) -> ([Datum], [Datum]) {
fileprivate func splitData(_ data: [Datum], onFeature: Int) -> ([Datum], [Datum]) {
var first = [Datum]()
var second = [Datum]()
for datum in data {
if datum.featureValueAtPosition(onFeature).rawValue == 0 {
if datum.featureValueAtPosition(onFeature) == 0 {
first.append(datum)
} else {
second.append(datum)
Expand All @@ -169,17 +169,17 @@ public class DecisionTree {

// MARK: Entropy

private func informationGain(feature: Bit, data: [Datum]) -> Float {
fileprivate func informationGain(_ feature: Int, data: [Datum]) -> Float {
return self.HY(data) - self.HY(data, X: feature)
}

private func HY(data: [Datum]) -> Float {
fileprivate func HY(_ data: [Datum]) -> Float {
let pY0: Float = self.pY(data, Y: 0)
let pY1 = 1.0 - pY0
return -1.0 * (pY0 * log2(pY0) + pY1 * log2(pY1))
}

private func HY(data: [Datum], X: Bit) -> Float {
fileprivate func HY(_ data: [Datum], X: Int) -> Float {
var result = Float(0.0)
for x in [0, 1] {
for y in [0, 1] {
Expand All @@ -189,35 +189,35 @@ public class DecisionTree {
return result
}

private func pY(data: [Datum], Y: Int) -> Float {
fileprivate func pY(_ data: [Datum], Y: Int) -> Float {
var count = 0
for datum in data {
if datum.classification.rawValue == Y {
count++
if datum.classification == Y {
count += 1
}
}
return Float(count) / Float(data.count)
}

private func pY(data: [Datum], Y: Int, X: Int, feature: Bit) -> Float {
fileprivate func pY(_ data: [Datum], Y: Int, X: Int, feature: Int) -> Float {
var yCount = 0
var xCount = 0
for datum in data {
if datum.featureValueAtPosition(feature).rawValue == X {
xCount++
if datum.classification.rawValue == Y {
yCount++
if datum.featureValueAtPosition(feature) == X {
xCount += 1
if datum.classification == Y {
yCount += 1
}
}
}
return Float(yCount) / Float(xCount)
}

private func pX(data: [Datum], X: Int, Y: Int, feature: Bit) -> Float {
fileprivate func pX(_ data: [Datum], X: Int, Y: Int, feature: Int) -> Float {
var count = 0
for datum in data {
if datum.classification.rawValue == Y && datum.featureValueAtPosition(feature).rawValue == X {
count++
if datum.classification == Y && datum.featureValueAtPosition(feature) == X {
count += 1
}
}
return Float(count) / Float(data.count)
Expand Down
2 changes: 1 addition & 1 deletion Parsimmon/Functions.swift
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import Foundation

func argmax<T, U: Comparable>(elements: [(T, U)]) -> T? {
func argmax<T, U: Comparable>(_ elements: [(T, U)]) -> T? {
if let start = elements.first {
return elements.reduce(start) { $0.1 > $1.1 ? $0 : $1 }.0
}
Expand Down
4 changes: 2 additions & 2 deletions Parsimmon/Lemmatizer.swift
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ public struct Lemmatizer: Analyzer {
@param options Linguistic tagger options
@return The lemmatized tokens
*/
public func lemmatizeWordsInText(text: String, options: NSLinguisticTaggerOptions? = nil) -> [String] {
return analyze(self, text: text, options: options).map { (token, lemma) in lemma.lowercaseString }.filter {
public func lemmatizeWordsInText(_ text: String, options: NSLinguisticTagger.Options? = nil) -> [String] {
return analyze(self, text: text, options: options).map { (token, lemma) in lemma.lowercased() }.filter {
!$0.isEmpty
}
}
Expand Down
44 changes: 24 additions & 20 deletions Parsimmon/NaiveBayesClassifier.swift
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,22 @@ import Foundation

private let smoothingParameter = 1.0

public class NaiveBayesClassifier {
open class NaiveBayesClassifier {
public typealias Word = String
public typealias Category = String

private let tokenizer: Tokenizer
fileprivate let tokenizer: Tokenizer

private var categoryOccurrences: [Category: Int] = [:]
private var wordOccurrences: [Word: [Category: Int]] = [:]
private var trainingCount = 0
private var wordCount = 0
fileprivate var categoryOccurrences: [Category: Int] = [:]
fileprivate var wordOccurrences: [Word: [Category: Int]] = [:]
fileprivate var trainingCount = 0
fileprivate var wordCount = 0

public init(tokenizer: Tokenizer = Tokenizer()) {
public init() {
self.tokenizer = Tokenizer()
}

public init(tokenizer: Tokenizer) {
self.tokenizer = tokenizer
}

Expand All @@ -78,7 +82,7 @@ public class NaiveBayesClassifier {
@param text The text
@param category The category of the text
*/
public func trainWithText(text: String, category: Category) {
open func trainWithText(_ text: String, category: Category) {
let tokens = tokenizer.tokenize(text)
trainWithTokens(tokens, category: category)
}
Expand All @@ -90,13 +94,13 @@ public class NaiveBayesClassifier {
@param tokens The tokenized text
@param category The category of the text
*/
public func trainWithTokens(tokens: [Word], category: Category) {
open func trainWithTokens(_ tokens: [Word], category: Category) {
let words = Set(tokens)
for word in words {
incrementWord(word, category: category)
}
incrementCategory(category)
trainingCount++
trainingCount += 1
}

// MARK: - Classifying
Expand All @@ -107,7 +111,7 @@ public class NaiveBayesClassifier {
@param text The text to classify
@return The category classification
*/
public func classify(text: String) -> Category? {
open func classify(_ text: String) -> Category? {
let tokens = tokenizer.tokenize(text)
return classifyTokens(tokens)
}
Expand All @@ -118,7 +122,7 @@ public class NaiveBayesClassifier {
@param text The tokenized text to classify
@return The category classification if one was found, or nil if one wasn’t
*/
public func classifyTokens(tokens: [Word]) -> Category? {
open func classifyTokens(_ tokens: [Word]) -> Category? {
// Compute argmax_cat [log(P(C=cat)) + sum_token(log(P(W=token|C=cat)))]
return argmax(categoryOccurrences.map { (category, count) -> (Category, Double) in
let pCategory = self.P(category)
Expand All @@ -131,42 +135,42 @@ public class NaiveBayesClassifier {

// MARK: - Probabilites

private func P(category: Category, _ word: Word) -> Double {
fileprivate func P(_ category: Category, _ word: Word) -> Double {
if let occurrences = wordOccurrences[word] {
let count = occurrences[category] ?? 0
return Double(count) / Double(trainingCount)
}
return 0.0
}

private func P(category: Category) -> Double {
fileprivate func P(_ category: Category) -> Double {
return Double(totalOccurrencesOfCategory(category)) / Double(trainingCount)
}

// MARK: - Counting

private func incrementWord(word: Word, category: Category) {
fileprivate func incrementWord(_ word: Word, category: Category) {
if wordOccurrences[word] == nil {
wordCount++
wordCount += 1
wordOccurrences[word] = [:]
}

let count = wordOccurrences[word]?[category] ?? 0
wordOccurrences[word]?[category] = count + 1
}

private func incrementCategory(category: Category) {
fileprivate func incrementCategory(_ category: Category) {
categoryOccurrences[category] = totalOccurrencesOfCategory(category) + 1
}

private func totalOccurrencesOfWord(word: Word) -> Int {
fileprivate func totalOccurrencesOfWord(_ word: Word) -> Int {
if let occurrences = wordOccurrences[word] {
return Array(occurrences.values).reduce(0, combine: +)
return Array(occurrences.values).reduce(0, +)
}
return 0
}

private func totalOccurrencesOfCategory(category: Category) -> Int {
fileprivate func totalOccurrencesOfCategory(_ category: Category) -> Int {
return categoryOccurrences[category] ?? 0
}
}
Loading