From d8b0f7501db96bb752c66db29a33f41cf1def680 Mon Sep 17 00:00:00 2001 From: Jimmy M Andersson Date: Tue, 26 Nov 2024 19:52:15 +0100 Subject: [PATCH] feat!: Use separate methods for different correlation coefficients --- .github/workflows/TestSuite.yml | 8 +- Local Test Helpers/docker-qa.yml | 17 -- Local Test Helpers/run-tests.sh | 7 - Package.swift | 8 +- README.md | 4 +- Sources/StatKit/BetaFunctions.swift | 1 - .../Association/Correlation.swift | 240 +++++++++++++++++- .../Association/CorrelationMethod.swift | 49 ---- .../Association/KendallsTauCalculator.swift | 151 ----------- .../PearsonsProductMomentCalculator.swift | 47 ---- .../Association/SpearmansRhoCalculator.swift | 31 --- .../Ranking/Ranking.swift | 2 - .../ContinuousDistribution.swift | 1 + .../Distributions/DiscreteDistribution.swift | 1 + Sources/StatKit/StatKit.docc/Association.md | 10 +- .../StatKit.docc/Combinatorial Extensions.md | 2 +- .../LinearCorrelationTests.swift | 35 +-- .../Association Tests/PearsonRTests.swift | 67 +++++ .../RankCorrelationTests.swift | 101 ++------ run-tests.sh | 9 + 20 files changed, 343 insertions(+), 448 deletions(-) delete mode 100644 Local Test Helpers/docker-qa.yml delete mode 100755 Local Test Helpers/run-tests.sh delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift create mode 100644 Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift create mode 100755 run-tests.sh diff --git a/.github/workflows/TestSuite.yml b/.github/workflows/TestSuite.yml index 1b43960..eb1d283 100644 --- a/.github/workflows/TestSuite.yml +++ b/.github/workflows/TestSuite.yml @@ -7,12 +7,12 @@ on: branches: [ main ] env: - DEVELOPER_DIR: /Applications/Xcode_14.0.app + DEVELOPER_DIR: /Applications/Xcode_16.1.app jobs: macOS: - runs-on: macos-12 - + runs-on: macos-15 + steps: - uses: actions/checkout@v3 - name: Run macOS Tests @@ -25,6 +25,6 @@ jobs: - uses: actions/checkout@v3 - uses: swift-actions/setup-swift@v1 with: - swift-version: '5.7' + swift-version: '5.9' - name: Run Linux Tests run: swift test -c release --parallel diff --git a/Local Test Helpers/docker-qa.yml b/Local Test Helpers/docker-qa.yml deleted file mode 100644 index aa93424..0000000 --- a/Local Test Helpers/docker-qa.yml +++ /dev/null @@ -1,17 +0,0 @@ -version: "3.8" -services: - linux_5_7: - container_name: linux_5_7 - image: swift:5.7.0-amazonlinux2 - volumes: - - ./:/statkit - working_dir: /statkit - command: swift test -c release --parallel - - linter: - container_name: linter - image: norionomura/swiftlint:0.46.5_swift-5.5.1 - volumes: - - ./:/statkit - working_dir: /statkit - command: swiftlint lint --strict diff --git a/Local Test Helpers/run-tests.sh b/Local Test Helpers/run-tests.sh deleted file mode 100755 index 52f0c41..0000000 --- a/Local Test Helpers/run-tests.sh +++ /dev/null @@ -1,7 +0,0 @@ -if [ -x "$(command -v docker)" ]; then - docker-compose -f docker-qa.yml up -else - echo "Install Docker to run Linux tests and linting..." -fi - -swift test -c release --parallel diff --git a/Package.swift b/Package.swift index 7a2931e..545bd6b 100644 --- a/Package.swift +++ b/Package.swift @@ -1,13 +1,13 @@ -// swift-tools-version:5.7 +// swift-tools-version:5.9 import PackageDescription let package = Package( name: "StatKit", platforms: [ - .macOS(.v12), - .iOS(.v15), - .tvOS(.v15) + .macOS(.v14), + .iOS(.v17), + .tvOS(.v17) ], products: [ .library( diff --git a/README.md b/README.md index 6d4ded0..bfd12f7 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@

- + Swift PM Compatible

@@ -113,7 +113,7 @@ let normalRandomVariables = normal.sample(10) StatKit is documented using Swift-DocC, which means that the documentation pages can be built by Xcode and viewed in the Developer Documentation panel. Build it by clicking `Product > Build Documentation` or hitting `Shift + Ctrl + Cmd + D`. ## System Requirements -To use StatKit, make sure that your system has Swift 5.7 (or later) installed. If you’re using a Mac, also make sure that `xcode-select` points at an Xcode installation that includes a valid version of Swift and that you’re running macOS Monterey (12.5) or later. +To use StatKit, make sure that your system has Swift 5.9 (or later) installed. If you’re using a Mac, also make sure that `xcode-select` points at an Xcode installation that includes a valid version of Swift and that you’re running macOS 14 or later. **IMPORTANT** StatKit **does not** officially support any beta software, including beta versions of Xcode and macOS, or unreleased versions of Swift. diff --git a/Sources/StatKit/BetaFunctions.swift b/Sources/StatKit/BetaFunctions.swift index 46da23a..f748c5a 100644 --- a/Sources/StatKit/BetaFunctions.swift +++ b/Sources/StatKit/BetaFunctions.swift @@ -31,7 +31,6 @@ public func beta( /// - parameter x: The value for which to evaluate the incomplete Beta function. /// - parameter alpha: The first shape argument. /// - parameter beta: The second shape argument. -/// - parameter logarithmic: Whether to return the natural logarithm of the function. /// /// The Beta function only supports positive numbers `alpha` and `beta`. /// `x` is a value in the range [0, 1]. diff --git a/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift b/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift index 79185db..5be976e 100644 --- a/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift +++ b/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift @@ -1,30 +1,244 @@ +import RealModule + public extension Collection { - /// Calculates the specified correlation coefficient for a collection. + /// Calculates Pearsons correlation coefficient for a collection. /// - parameter X: The first variable. /// - parameter Y: The second variable. - /// - parameter composition: The composition of the collection. - /// - parameter method: The calculation method to use. - /// - returns: The correlation coefficient for the specified variables in the collection. + /// - returns: Pearsons correlation coefficient. /// /// Since there is no notion of correlation in collections with less than two elements, /// this method returns NaN if the array count is less than two. /// The time complexity of this method is O(n). @inlinable - func correlation( + func pearsonR( of X: KeyPath, - and Y: KeyPath, - for composition: DataSetComposition, - method: CorrelationMethod = .pearsonsProductMoment + and Y: KeyPath ) -> Double where T: Comparable & Hashable & ConvertibleToReal, U: Comparable & Hashable & ConvertibleToReal { + typealias RComponents = (xSum: Double, ySum: Double, xySum: Double, xSquareSum: Double, ySquareSum: Double) guard self.count > 1 else { return .signalingNaN } - return method.calculator.compute( - for: X, - and: Y, - in: self, - as: composition + + guard X != Y else { return 1 } + + let n = self.count.realValue + + let rComponents: RComponents = self.reduce(into: (0, 0, 0, 0, 0)) { partialResult, element in + let x = element[keyPath: X].realValue + let y = element[keyPath: Y].realValue + + partialResult.xSum += x + partialResult.ySum += y + partialResult.xySum += x * y + partialResult.xSquareSum += x * x + partialResult.ySquareSum += y * y + } + + let numerator = n * rComponents.xySum - rComponents.xSum * rComponents.ySum + let denominator = ( + (n * rComponents.xSquareSum - rComponents.xSum * rComponents.xSum) * + (n * rComponents.ySquareSum - rComponents.ySum * rComponents.ySum) + ).squareRoot() + + guard denominator != 0 else { return .signalingNaN } + + return numerator / denominator + } + + /// Calculates Spearmans rank-order correlction coefficient for a collection. + /// - parameter X: The first variable. + /// - parameter Y: The second variable. + /// - returns: Spearmans rank-order correlation coefficient. + /// + /// Since there is no notion of correlation in collections with less than two elements, + /// this method returns NaN if the array count is less than two. + /// The time complexity of this method is O(n). + @inlinable + func spearmanR( + of X: KeyPath, + and Y: KeyPath + ) -> Double + where T: Comparable & Hashable & ConvertibleToReal, + U: Comparable & Hashable & ConvertibleToReal + { + guard X != Y else { return 1 } + + let XRanks = self.rank( + variable: X, + by: >, + strategy: .fractional + ) + let YRanks = self.rank( + variable: Y, + by: >, + strategy: .fractional ) + let ranks: [(X: Double, Y: Double)] = Array(zip(XRanks, YRanks)) + + return ranks.pearsonR(of: \.X, and: \.Y) + } + + /// Calculates Kendalls rank correlction coefficient for a collection. + /// - parameter X: The first variable. + /// - parameter Y: The second variable. + /// - parameter variant: Which variant of the Tau coefficient to compute. + /// - returns: Kendalls rank correlation coefficient. + /// + /// Since there is no notion of correlation in collections with less than two elements, + /// this method returns NaN if the array count is less than two. + /// The time complexity of this method is O(n). + func kendallTau( + of X: KeyPath, + and Y: KeyPath, + variant: KendallTauVariant = .b + ) -> Double + where T: Comparable & Hashable & ConvertibleToReal, + U: Comparable & Hashable & ConvertibleToReal + { + guard X != Y else { return 1 } + + let tiesX = self.countTieRanks(of: X) + let tiesY = self.countTieRanks(of: Y) + + let count = self.count + let discordant = self.discordantPairs(of: X, and: Y) + let combinations = count * (count - 1) / 2 + let concordant = combinations - discordant - tiesX - tiesY + + switch variant { + case .a: + let numerator = (concordant - discordant).realValue + let denominator = combinations.realValue + return numerator / denominator + case .b: + let numerator = (concordant - discordant).realValue + let tieProduct = (combinations - tiesX) * (combinations - tiesY) + let denominator = tieProduct.realValue.squareRoot() + guard !denominator.isZero else { return .signalingNaN } + + return numerator / denominator + } + } +} + +/// The different supported variants of the Kendall Tau coefficient. +public enum KendallTauVariant { + /// The original Tau statistic defined in 1938. + /// Tau-a does not make adjustments for rank ties. + case a + + /// The Tau-b statistic (originally named Tau-w) is an extension of Tau-a which makes adjustments for tie rank pairs. + case b +} + +private extension Collection { + /// Counts the number of tied variables within a collection of measurements. + /// - parameter X : The variable under investigation. + /// - returns: The number of tied measurements. + func countTieRanks(of X: KeyPath) -> Int { + + let elementCount = reduce(into: [T: Int]()) { dictionary, element in + let x = element[keyPath: X] + dictionary[x, default: 0] += 1 + } + + return elementCount.values.reduce(into: 0) { tiesX, count in + guard count > 1 else { return } + + tiesX += count * (count - 1) / 2 + } + } + + /// Counts the number of discordant pairs inside a collection. + /// - parameter X: The first variable. + /// - parameter Y: The second variable. + /// - returns: The number of discordant pairs contained in the collection. + func discordantPairs( + of X: KeyPath, + and Y: KeyPath + ) -> Int { + + var sortedCopy = self.sorted { lhs, rhs in + if lhs[keyPath: X] == rhs[keyPath: X] { + return lhs[keyPath: Y] < rhs[keyPath: Y] + } else { + return lhs[keyPath: X] < rhs[keyPath: X] + } + } + return sortedCopy[...].computeDiscordance(sorting: Y) + } +} + +private extension ArraySlice { + /// Sorts the measurements and counts the number of discordant pairs contained in it. + /// - parameter X: The first variable under investigation. + /// - parameter Y: The second variable under investigation. + /// - returns: The number of discordant pairs found in the collection. + /// + /// This method assumes that the collection is sorted, in ascending order, + /// by the variable that acts as the basis of discordance measurements against `Y`. + mutating func computeDiscordance( + sorting Y: KeyPath + ) -> Int { + + if count < 2 { + return 0 + } else { + let midPoint = (endIndex + startIndex) / 2 + + var discordants = self[startIndex ..< midPoint].computeDiscordance(sorting: Y) + discordants += self[midPoint ..< endIndex].computeDiscordance(sorting: Y) + + return discordants + self.countDiscordantPairs(sorting: Y) + } + } + + /// Sorts the collection and counts the number of discordant pairs. + /// - parameter Y: The variable to sort by. + /// - returns: The number of discordant pairs found in the collection. + private mutating func countDiscordantPairs( + sorting Y: KeyPath + ) -> Int { + + let pivot = (startIndex + endIndex) / 2 + var sorted = self + var discordant = 0 + var mergeIndex = startIndex + var lhsIndex = startIndex + var rhsIndex = pivot + + while lhsIndex < pivot && rhsIndex < endIndex { + + if self[lhsIndex][keyPath: Y] <= self[rhsIndex][keyPath: Y] { + discordant += Swift.max(0, mergeIndex - lhsIndex) + sorted[mergeIndex] = self[lhsIndex] + lhsIndex += 1 + } else { + discordant += Swift.max(0, mergeIndex - rhsIndex) + sorted[mergeIndex] = self[rhsIndex] + rhsIndex += 1 + } + + mergeIndex += 1 + } + + for index in lhsIndex ..< pivot { + discordant += Swift.max(0, mergeIndex - index) + sorted[mergeIndex] = self[index] + mergeIndex += 1 + } + + for index in rhsIndex ..< endIndex { + discordant += Swift.max(0, mergeIndex - index) + sorted[mergeIndex] = self[index] + mergeIndex += 1 + } + + for index in startIndex ..< endIndex { + self[index] = sorted[index] + } + + return discordant } } diff --git a/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift b/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift deleted file mode 100644 index b28dc75..0000000 --- a/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift +++ /dev/null @@ -1,49 +0,0 @@ -/// An internal protocol defining the methods required by association measure calculator types. -@usableFromInline -internal protocol CorrelationCalculator { - /// Computes the measure of association for two variables in a collection. - /// - parameter X: The first variable. - /// - parameter Y: The second variable. - /// - parameter collection: The data set containing the measurements. - /// - parameter composition: The composition of the data set. - /// - returns: The measure of assocation coefficient for the specified variables. - func compute( - for X: KeyPath, - and Y: KeyPath, - in collection: C, - as composition: DataSetComposition - ) -> Double - where T: Comparable & Hashable & ConvertibleToReal, - U: Comparable & Hashable & ConvertibleToReal, - C: Collection -} - -/// Different methods of calculating the association measure between arbitrary comparable variables. -public enum CorrelationMethod { - /// Pearson's product-moment correlation coefficient. - case pearsonsProductMoment - - /// Spearman's Rho coefficient. - case spearmansRho - - /// Kendall's Tau coefficient. - /// - /// This method calculates the Tau-B coefficient, which takes ties into account. - /// The time complexity is O(n * log(n)). - case kendallsTau - - /// A calculator object that can be used to compute the specified measure of association. - @usableFromInline - internal var calculator: any CorrelationCalculator { - switch self { - case .pearsonsProductMoment: - return PearsonsProductMomentCalculator() - - case .spearmansRho: - return SpearmansRhoCalculator() - - case .kendallsTau: - return KendallsTauCalculator() - } - } -} diff --git a/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift deleted file mode 100644 index f85bf6a..0000000 --- a/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift +++ /dev/null @@ -1,151 +0,0 @@ -import RealModule - -/// A helper object for calculating Kendall's Tau Coefficient. -internal struct KendallsTauCalculator: CorrelationCalculator { - internal func compute< - T: Comparable & Hashable & ConvertibleToReal, - U: Comparable & Hashable & ConvertibleToReal, - C: Collection - >( - for X: KeyPath, - and Y: KeyPath, - in collection: C, - as composition: DataSetComposition - ) -> Double { - - guard X != Y else { return 1 } - - let tiesX = collection.countTieRanks(of: X) - let tiesY = collection.countTieRanks(of: Y) - - let count = collection.count - let discordant = collection.discordantPairs(of: X, and: Y) - let combinations = count * (count - 1) / 2 - let concordant = combinations - discordant - tiesX - tiesY - - switch composition { - case .population: - let numerator = concordant.realValue - discordant.realValue - let denominator = combinations.realValue - return numerator / denominator - case .sample: - let numerator = (concordant - discordant).realValue - let tieProduct = (combinations - tiesX) * (combinations - tiesY) - let denominator = tieProduct.realValue.squareRoot() - guard !denominator.isZero else { return .signalingNaN } - - return numerator / denominator - } - } -} - -private extension Collection { - /// Counts the number of tied variables within a collection of measurements. - /// - parameter X : The variable under investigation. - /// - returns: The number of tied measurements. - func countTieRanks(of X: KeyPath) -> Int { - - let elementCount = reduce(into: [T: Int]()) { dictionary, element in - let x = element[keyPath: X] - dictionary[x, default: 0] += 1 - } - - return elementCount.values.reduce(into: 0) { tiesX, count in - guard count > 1 else { return } - - tiesX += count * (count - 1) / 2 - } - } - - /// Counts the number of discordant pairs inside a collection. - /// - parameter X: The first variable. - /// - parameter Y: The second variable. - /// - returns: The number of discordant pairs contained in the collection. - func discordantPairs( - of X: KeyPath, - and Y: KeyPath - ) -> Int { - - var sortedCopy = self.sorted { lhs, rhs in - if lhs[keyPath: X] == rhs[keyPath: X] { - return lhs[keyPath: Y] < rhs[keyPath: Y] - } else { - return lhs[keyPath: X] < rhs[keyPath: X] - } - } - return sortedCopy[...].computeDiscordance(sorting: Y) - } -} - -private extension ArraySlice { - /// Sorts the measurements and counts the number of discordant pairs contained in it. - /// - parameter X: The first variable under investigation. - /// - parameter Y: The second variable under investigation. - /// - returns: The number of discordant pairs found in the collection. - /// - /// This method assumes that the collection is sorted, in ascending order, - /// by the variable that acts as the basis of discordance measurements against `Y`. - mutating func computeDiscordance( - sorting Y: KeyPath - ) -> Int { - - if count < 2 { - return 0 - } else { - let midPoint = (endIndex + startIndex) / 2 - - var discordants = self[startIndex ..< midPoint].computeDiscordance(sorting: Y) - discordants += self[midPoint ..< endIndex].computeDiscordance(sorting: Y) - - return discordants + self.countDiscordantPairs(sorting: Y) - } - } - - /// Sorts the collection and counts the number of discordant pairs. - /// - parameter Y: The variable to sort by. - /// - returns: The number of discordant pairs found in the collection. - private mutating func countDiscordantPairs( - sorting Y: KeyPath - ) -> Int { - - let pivot = (startIndex + endIndex) / 2 - var sorted = self - var discordant = 0 - var mergeIndex = startIndex - var lhsIndex = startIndex - var rhsIndex = pivot - - while lhsIndex < pivot && rhsIndex < endIndex { - - if self[lhsIndex][keyPath: Y] <= self[rhsIndex][keyPath: Y] { - discordant += Swift.max(0, mergeIndex - lhsIndex) - sorted[mergeIndex] = self[lhsIndex] - lhsIndex += 1 - } else { - discordant += Swift.max(0, mergeIndex - rhsIndex) - sorted[mergeIndex] = self[rhsIndex] - rhsIndex += 1 - } - - mergeIndex += 1 - } - - for index in lhsIndex ..< pivot { - discordant += Swift.max(0, mergeIndex - index) - sorted[mergeIndex] = self[index] - mergeIndex += 1 - } - - for index in rhsIndex ..< endIndex { - discordant += Swift.max(0, mergeIndex - index) - sorted[mergeIndex] = self[index] - mergeIndex += 1 - } - - for index in startIndex ..< endIndex { - self[index] = sorted[index] - } - - return discordant - } -} diff --git a/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift deleted file mode 100644 index 70d5e5d..0000000 --- a/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift +++ /dev/null @@ -1,47 +0,0 @@ -/// A helper object for calculating the Pearson Correlation Coefficient. -internal struct PearsonsProductMomentCalculator: CorrelationCalculator { - internal func compute( - for X: KeyPath, - and Y: KeyPath, - in collection: C, - as composition: DataSetComposition) -> Double - where T: Comparable & Hashable & ConvertibleToReal, - U: Comparable & Hashable & ConvertibleToReal, - C: Collection - { - - guard X != Y else { return 1 } - - let XStdDev = collection.standardDeviation( - variable: X, - from: composition - ) - let YStdDev = collection.standardDeviation( - variable: Y, - from: composition - ) - - let stdDevProduct = XStdDev * YStdDev - if stdDevProduct.isZero { - return .signalingNaN - } - - switch composition { - case .population: - return collection.covariance( - of: X, - and: Y, - from: composition - ) / stdDevProduct - - case .sample: - let sumOfProducts = collection.reduce(into: 0) { result, element in - result += element[keyPath: X].realValue * element[keyPath: Y].realValue - } - let term = collection.mean(variable: X) * collection.mean(variable: Y) - let numerator = sumOfProducts - collection.count.realValue * term - let denominator = (collection.count - 1).realValue * stdDevProduct - return numerator / denominator - } - } -} diff --git a/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift deleted file mode 100644 index b72ed0c..0000000 --- a/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift +++ /dev/null @@ -1,31 +0,0 @@ -/// A helper object for calculating Spearman's Rho Coefficient. -internal struct SpearmansRhoCalculator: CorrelationCalculator { - internal func compute( - for X: KeyPath, - and Y: KeyPath, - in collection: C, - as composition: DataSetComposition) -> Double - where T: Comparable & Hashable, U: Comparable & Hashable, C: Collection { - - guard X != Y else { return 1 } - - let XRanks = collection.rank( - variable: X, - by: >, - strategy: .fractional - ) - let YRanks = collection.rank( - variable: Y, - by: >, - strategy: .fractional - ) - let ranks: [(X: Double, Y: Double)] = Array(zip(XRanks, YRanks)) - - return ranks.correlation( - of: \.X, - and: \.Y, - for: composition, - method: .pearsonsProductMoment - ) - } -} diff --git a/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift b/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift index 82c4602..20ed920 100644 --- a/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift +++ b/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift @@ -2,8 +2,6 @@ public extension Collection { /// Ranks the specified variable according to a specified strategy. /// - parameter variable: The variable to investigate. /// - parameter order: The order by which the variables should be ranked. - /// - parameter lhs: The left hand element. - /// - parameter rhs: The right hand element. /// - parameter strategy: The calculation method to use. /// - returns: An array with the rank of each original element, /// where the index of a rank corresponds to the index of the element in the original array. diff --git a/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift b/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift index aed1238..f927339 100644 --- a/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift +++ b/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift @@ -1,6 +1,7 @@ public protocol ContinuousDistribution: Distribution { /// The Probability Density Function of the distribution. /// - parameter x: The value for which to calculate the relative likelihood of being sampled. + /// - parameter logarithmic: Whether to return the natural logarithm of the function. /// - returns: The relative likelihood that a sample from the distribution is exactly equal to x. func pdf(x: DomainType, logarithmic: Bool) -> Double } diff --git a/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift b/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift index 84f7143..66124bf 100644 --- a/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift +++ b/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift @@ -2,6 +2,7 @@ public protocol DiscreteDistribution: Distribution { /// The Probability Mass Function of the distribution. /// - parameter x: The value for which to calculate the probability. + /// - parameter logarithmic: Whether to return the natural logarithm of the function. /// - returns: The probability that a sample from the distribution is exactly equal to x. func pmf(x: DomainType, logarithmic: Bool) -> Double } diff --git a/Sources/StatKit/StatKit.docc/Association.md b/Sources/StatKit/StatKit.docc/Association.md index 53b9878..7d09aa3 100644 --- a/Sources/StatKit/StatKit.docc/Association.md +++ b/Sources/StatKit/StatKit.docc/Association.md @@ -10,12 +10,10 @@ One of the most common ways of measuring association is by the Pearson Product M ### Functions -- ``StatKit/Swift/Collection/correlation(of:and:for:method:)`` - -### Methods of Computation - -- ``CorrelationMethod`` +- ``StatKit/Swift/Collection/pearsonR(of:and:)`` +- ``StatKit/Swift/Collection/spearmanR(of:and:)`` +- ``StatKit/Swift/Collection/kendallTau(of:and:variant:)`` ### Enums -- ``DataSetComposition`` +- ``KendallTauVariant`` diff --git a/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md b/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md index aa0d8ee..14765d5 100644 --- a/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md +++ b/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md @@ -11,4 +11,4 @@ StatKit allows developers to compute different extensions of combinatorial numbe ### Functions - ``choose(n:k:)`` -- ``betaFunction(alpha:beta:log:)`` +- ``beta(alpha:beta:logarithmic:)`` diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift index f8d1eca..3947c98 100644 --- a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift +++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift @@ -16,12 +16,7 @@ final class LinearCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedCorrelation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .pearsonsProductMoment - ) + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) let expectedCorrelation = 0.99329456 XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6) @@ -39,12 +34,7 @@ final class LinearCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedCorrelation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .pearsonsProductMoment - ) + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) let expectedCorrelation = 0.99329456 XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6) @@ -62,12 +52,7 @@ final class LinearCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedCorrelation = simd2Array.correlation( - of: \.x, - and: \.x, - for: .population, - method: .pearsonsProductMoment - ) + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.x) let expectedCorrelation = 1.0 XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6) @@ -75,12 +60,7 @@ final class LinearCorrelationTests: XCTestCase { func testPearsonCorrelationWithEmptyCollection() { let simd2Array = [SIMD2]() - let calculatedCorrelation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .pearsonsProductMoment - ) + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) XCTAssert(calculatedCorrelation.isNaN) } @@ -96,12 +76,7 @@ final class LinearCorrelationTests: XCTestCase { SIMD2(x: 8, y: 6), SIMD2(x: 9, y: 6), SIMD2(x: 10, y: 6)] - let calculatedCorrelation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .pearsonsProductMoment - ) + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) XCTAssert(calculatedCorrelation.isNaN) } diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift new file mode 100644 index 0000000..e81ac7e --- /dev/null +++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift @@ -0,0 +1,67 @@ +#if !os(watchOS) + +import XCTest +import StatKit + +final class PearsonRTests: XCTestCase { + func testPearsonR() { + let simd2Array = [SIMD2(x: 1, y: 10), + SIMD2(x: 2, y: 20), + SIMD2(x: 3, y: 27), + SIMD2(x: 4, y: 30), + SIMD2(x: 5, y: 35), + SIMD2(x: 6, y: 38), + SIMD2(x: 7, y: 49), + SIMD2(x: 8, y: 56), + SIMD2(x: 9, y: 62), + SIMD2(x: 10, y: 69)] + + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) + let expectedCorrelation = 0.99329456 + + XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6) + } + + func testPearsonRWithSingleVariable() { + let simd2Array = [SIMD2(x: 1, y: 10), + SIMD2(x: 2, y: 20), + SIMD2(x: 3, y: 27), + SIMD2(x: 4, y: 30), + SIMD2(x: 5, y: 35), + SIMD2(x: 6, y: 38), + SIMD2(x: 7, y: 49), + SIMD2(x: 8, y: 56), + SIMD2(x: 9, y: 62), + SIMD2(x: 10, y: 69)] + + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.x) + let expectedCorrelation = 1.0 + + XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6) + } + + func testPearsonRWithEmptyCollection() { + let simd2Array = [SIMD2]() + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) + + XCTAssert(calculatedCorrelation.isNaN) + } + + func testPearsonRWithConstantValueArray() { + let simd2Array = [SIMD2(x: 1, y: 6), + SIMD2(x: 2, y: 6), + SIMD2(x: 3, y: 6), + SIMD2(x: 4, y: 6), + SIMD2(x: 5, y: 6), + SIMD2(x: 6, y: 6), + SIMD2(x: 7, y: 6), + SIMD2(x: 8, y: 6), + SIMD2(x: 9, y: 6), + SIMD2(x: 10, y: 6)] + let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y) + + XCTAssertTrue(calculatedCorrelation.isNaN) + } +} + +#endif diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift index f3ef8ee..3925e27 100644 --- a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift +++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift @@ -16,12 +16,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .spearmansRho - ) + let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y) let expectedAssociation = 1.0 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -39,12 +34,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 9, y: 7), SIMD2(x: 10, y: 1)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .spearmansRho - ) + let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y) let expectedAssociation = -0.165144564768954 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -62,12 +52,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .spearmansRho - ) + let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y) let expectedAssociation = 1.0 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -75,25 +60,15 @@ final class RankCorrelationTests: XCTestCase { func testPearsonCorrelationWithEmptyCollection() { let simd2Array = [SIMD2]() - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .spearmansRho - ) - + let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y) + XCTAssertTrue(calculatedAssociation.isNaN) } func testPearsonCorrelationWithSingleEntryCollection() { let simd2Array = [SIMD2]() - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .spearmansRho - ) - + let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y) + XCTAssertTrue(calculatedAssociation.isNaN) } @@ -103,12 +78,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 3, y: 27), SIMD2(x: 4, y: 30)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .kendallsTau - ) + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a) let expectedAssociation = 0.666666666 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -126,12 +96,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 9, y: -2), SIMD2(x: 10, y: -6)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .kendallsTau - ) + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a) let expectedAssociation = -0.244444444 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -144,12 +109,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 2, y: 2), SIMD2(x: 5, y: 7)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .kendallsTau - ) + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b) let expectedAssociation = 0.119522861 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -162,12 +122,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 2, y: 2), SIMD2(x: 5, y: 7)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .population, - method: .kendallsTau - ) + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a) let expectedAssociation = 0.1 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -180,13 +135,8 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 1, y: 2), SIMD2(x: 1, y: 7)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .kendallsTau - ) - + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b) + XCTAssertTrue(calculatedAssociation.isNaN) } @@ -202,12 +152,7 @@ final class RankCorrelationTests: XCTestCase { SIMD2(x: 9, y: 62), SIMD2(x: 10, y: 69)] - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .kendallsTau - ) + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b) let expectedAssociation = 1.0 XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6) @@ -215,25 +160,15 @@ final class RankCorrelationTests: XCTestCase { func testKendallsTauWithEmptyCollection() { let simd2Array = [SIMD2]() - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .kendallsTau - ) - + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b) + XCTAssertTrue(calculatedAssociation.isNaN) } func testKendallsTauBWithSingleEntryCollection() { let simd2Array = [SIMD2]() - let calculatedAssociation = simd2Array.correlation( - of: \.x, - and: \.y, - for: .sample, - method: .kendallsTau - ) - + let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b) + XCTAssertTrue(calculatedAssociation.isNaN) } } diff --git a/run-tests.sh b/run-tests.sh new file mode 100755 index 0000000..bf8ea38 --- /dev/null +++ b/run-tests.sh @@ -0,0 +1,9 @@ +#!/bin/zsh + +if [ -x "$(command -v swiftlint)" ]; then + swiftlint lint --strict +else + echo "Install Swiftlint to run linting..." +fi + +swift test -c release --parallel