From d8b0f7501db96bb752c66db29a33f41cf1def680 Mon Sep 17 00:00:00 2001
From: Jimmy M Andersson
Date: Tue, 26 Nov 2024 19:52:15 +0100
Subject: [PATCH] feat!: Use separate methods for different correlation
coefficients
---
.github/workflows/TestSuite.yml | 8 +-
Local Test Helpers/docker-qa.yml | 17 --
Local Test Helpers/run-tests.sh | 7 -
Package.swift | 8 +-
README.md | 4 +-
Sources/StatKit/BetaFunctions.swift | 1 -
.../Association/Correlation.swift | 240 +++++++++++++++++-
.../Association/CorrelationMethod.swift | 49 ----
.../Association/KendallsTauCalculator.swift | 151 -----------
.../PearsonsProductMomentCalculator.swift | 47 ----
.../Association/SpearmansRhoCalculator.swift | 31 ---
.../Ranking/Ranking.swift | 2 -
.../ContinuousDistribution.swift | 1 +
.../Distributions/DiscreteDistribution.swift | 1 +
Sources/StatKit/StatKit.docc/Association.md | 10 +-
.../StatKit.docc/Combinatorial Extensions.md | 2 +-
.../LinearCorrelationTests.swift | 35 +--
.../Association Tests/PearsonRTests.swift | 67 +++++
.../RankCorrelationTests.swift | 101 ++------
run-tests.sh | 9 +
20 files changed, 343 insertions(+), 448 deletions(-)
delete mode 100644 Local Test Helpers/docker-qa.yml
delete mode 100755 Local Test Helpers/run-tests.sh
delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift
delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift
delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift
delete mode 100644 Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift
create mode 100644 Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift
create mode 100755 run-tests.sh
diff --git a/.github/workflows/TestSuite.yml b/.github/workflows/TestSuite.yml
index 1b43960..eb1d283 100644
--- a/.github/workflows/TestSuite.yml
+++ b/.github/workflows/TestSuite.yml
@@ -7,12 +7,12 @@ on:
branches: [ main ]
env:
- DEVELOPER_DIR: /Applications/Xcode_14.0.app
+ DEVELOPER_DIR: /Applications/Xcode_16.1.app
jobs:
macOS:
- runs-on: macos-12
-
+ runs-on: macos-15
+
steps:
- uses: actions/checkout@v3
- name: Run macOS Tests
@@ -25,6 +25,6 @@ jobs:
- uses: actions/checkout@v3
- uses: swift-actions/setup-swift@v1
with:
- swift-version: '5.7'
+ swift-version: '5.9'
- name: Run Linux Tests
run: swift test -c release --parallel
diff --git a/Local Test Helpers/docker-qa.yml b/Local Test Helpers/docker-qa.yml
deleted file mode 100644
index aa93424..0000000
--- a/Local Test Helpers/docker-qa.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-version: "3.8"
-services:
- linux_5_7:
- container_name: linux_5_7
- image: swift:5.7.0-amazonlinux2
- volumes:
- - ./:/statkit
- working_dir: /statkit
- command: swift test -c release --parallel
-
- linter:
- container_name: linter
- image: norionomura/swiftlint:0.46.5_swift-5.5.1
- volumes:
- - ./:/statkit
- working_dir: /statkit
- command: swiftlint lint --strict
diff --git a/Local Test Helpers/run-tests.sh b/Local Test Helpers/run-tests.sh
deleted file mode 100755
index 52f0c41..0000000
--- a/Local Test Helpers/run-tests.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-if [ -x "$(command -v docker)" ]; then
- docker-compose -f docker-qa.yml up
-else
- echo "Install Docker to run Linux tests and linting..."
-fi
-
-swift test -c release --parallel
diff --git a/Package.swift b/Package.swift
index 7a2931e..545bd6b 100644
--- a/Package.swift
+++ b/Package.swift
@@ -1,13 +1,13 @@
-// swift-tools-version:5.7
+// swift-tools-version:5.9
import PackageDescription
let package = Package(
name: "StatKit",
platforms: [
- .macOS(.v12),
- .iOS(.v15),
- .tvOS(.v15)
+ .macOS(.v14),
+ .iOS(.v17),
+ .tvOS(.v17)
],
products: [
.library(
diff --git a/README.md b/README.md
index 6d4ded0..bfd12f7 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
-
+
@@ -113,7 +113,7 @@ let normalRandomVariables = normal.sample(10)
StatKit is documented using Swift-DocC, which means that the documentation pages can be built by Xcode and viewed in the Developer Documentation panel. Build it by clicking `Product > Build Documentation` or hitting `Shift + Ctrl + Cmd + D`.
## System Requirements
-To use StatKit, make sure that your system has Swift 5.7 (or later) installed. If you’re using a Mac, also make sure that `xcode-select` points at an Xcode installation that includes a valid version of Swift and that you’re running macOS Monterey (12.5) or later.
+To use StatKit, make sure that your system has Swift 5.9 (or later) installed. If you’re using a Mac, also make sure that `xcode-select` points at an Xcode installation that includes a valid version of Swift and that you’re running macOS 14 or later.
**IMPORTANT**
StatKit **does not** officially support any beta software, including beta versions of Xcode and macOS, or unreleased versions of Swift.
diff --git a/Sources/StatKit/BetaFunctions.swift b/Sources/StatKit/BetaFunctions.swift
index 46da23a..f748c5a 100644
--- a/Sources/StatKit/BetaFunctions.swift
+++ b/Sources/StatKit/BetaFunctions.swift
@@ -31,7 +31,6 @@ public func beta(
/// - parameter x: The value for which to evaluate the incomplete Beta function.
/// - parameter alpha: The first shape argument.
/// - parameter beta: The second shape argument.
-/// - parameter logarithmic: Whether to return the natural logarithm of the function.
///
/// The Beta function only supports positive numbers `alpha` and `beta`.
/// `x` is a value in the range [0, 1].
diff --git a/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift b/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift
index 79185db..5be976e 100644
--- a/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift
+++ b/Sources/StatKit/Descriptive Statistics/Association/Correlation.swift
@@ -1,30 +1,244 @@
+import RealModule
+
public extension Collection {
- /// Calculates the specified correlation coefficient for a collection.
+ /// Calculates Pearsons correlation coefficient for a collection.
/// - parameter X: The first variable.
/// - parameter Y: The second variable.
- /// - parameter composition: The composition of the collection.
- /// - parameter method: The calculation method to use.
- /// - returns: The correlation coefficient for the specified variables in the collection.
+ /// - returns: Pearsons correlation coefficient.
///
/// Since there is no notion of correlation in collections with less than two elements,
/// this method returns NaN if the array count is less than two.
/// The time complexity of this method is O(n).
@inlinable
- func correlation(
+ func pearsonR(
of X: KeyPath,
- and Y: KeyPath,
- for composition: DataSetComposition,
- method: CorrelationMethod = .pearsonsProductMoment
+ and Y: KeyPath
) -> Double
where T: Comparable & Hashable & ConvertibleToReal,
U: Comparable & Hashable & ConvertibleToReal
{
+ typealias RComponents = (xSum: Double, ySum: Double, xySum: Double, xSquareSum: Double, ySquareSum: Double)
guard self.count > 1 else { return .signalingNaN }
- return method.calculator.compute(
- for: X,
- and: Y,
- in: self,
- as: composition
+
+ guard X != Y else { return 1 }
+
+ let n = self.count.realValue
+
+ let rComponents: RComponents = self.reduce(into: (0, 0, 0, 0, 0)) { partialResult, element in
+ let x = element[keyPath: X].realValue
+ let y = element[keyPath: Y].realValue
+
+ partialResult.xSum += x
+ partialResult.ySum += y
+ partialResult.xySum += x * y
+ partialResult.xSquareSum += x * x
+ partialResult.ySquareSum += y * y
+ }
+
+ let numerator = n * rComponents.xySum - rComponents.xSum * rComponents.ySum
+ let denominator = (
+ (n * rComponents.xSquareSum - rComponents.xSum * rComponents.xSum) *
+ (n * rComponents.ySquareSum - rComponents.ySum * rComponents.ySum)
+ ).squareRoot()
+
+ guard denominator != 0 else { return .signalingNaN }
+
+ return numerator / denominator
+ }
+
+ /// Calculates Spearmans rank-order correlction coefficient for a collection.
+ /// - parameter X: The first variable.
+ /// - parameter Y: The second variable.
+ /// - returns: Spearmans rank-order correlation coefficient.
+ ///
+ /// Since there is no notion of correlation in collections with less than two elements,
+ /// this method returns NaN if the array count is less than two.
+ /// The time complexity of this method is O(n).
+ @inlinable
+ func spearmanR(
+ of X: KeyPath,
+ and Y: KeyPath
+ ) -> Double
+ where T: Comparable & Hashable & ConvertibleToReal,
+ U: Comparable & Hashable & ConvertibleToReal
+ {
+ guard X != Y else { return 1 }
+
+ let XRanks = self.rank(
+ variable: X,
+ by: >,
+ strategy: .fractional
+ )
+ let YRanks = self.rank(
+ variable: Y,
+ by: >,
+ strategy: .fractional
)
+ let ranks: [(X: Double, Y: Double)] = Array(zip(XRanks, YRanks))
+
+ return ranks.pearsonR(of: \.X, and: \.Y)
+ }
+
+ /// Calculates Kendalls rank correlction coefficient for a collection.
+ /// - parameter X: The first variable.
+ /// - parameter Y: The second variable.
+ /// - parameter variant: Which variant of the Tau coefficient to compute.
+ /// - returns: Kendalls rank correlation coefficient.
+ ///
+ /// Since there is no notion of correlation in collections with less than two elements,
+ /// this method returns NaN if the array count is less than two.
+ /// The time complexity of this method is O(n).
+ func kendallTau(
+ of X: KeyPath,
+ and Y: KeyPath,
+ variant: KendallTauVariant = .b
+ ) -> Double
+ where T: Comparable & Hashable & ConvertibleToReal,
+ U: Comparable & Hashable & ConvertibleToReal
+ {
+ guard X != Y else { return 1 }
+
+ let tiesX = self.countTieRanks(of: X)
+ let tiesY = self.countTieRanks(of: Y)
+
+ let count = self.count
+ let discordant = self.discordantPairs(of: X, and: Y)
+ let combinations = count * (count - 1) / 2
+ let concordant = combinations - discordant - tiesX - tiesY
+
+ switch variant {
+ case .a:
+ let numerator = (concordant - discordant).realValue
+ let denominator = combinations.realValue
+ return numerator / denominator
+ case .b:
+ let numerator = (concordant - discordant).realValue
+ let tieProduct = (combinations - tiesX) * (combinations - tiesY)
+ let denominator = tieProduct.realValue.squareRoot()
+ guard !denominator.isZero else { return .signalingNaN }
+
+ return numerator / denominator
+ }
+ }
+}
+
+/// The different supported variants of the Kendall Tau coefficient.
+public enum KendallTauVariant {
+ /// The original Tau statistic defined in 1938.
+ /// Tau-a does not make adjustments for rank ties.
+ case a
+
+ /// The Tau-b statistic (originally named Tau-w) is an extension of Tau-a which makes adjustments for tie rank pairs.
+ case b
+}
+
+private extension Collection {
+ /// Counts the number of tied variables within a collection of measurements.
+ /// - parameter X : The variable under investigation.
+ /// - returns: The number of tied measurements.
+ func countTieRanks(of X: KeyPath) -> Int {
+
+ let elementCount = reduce(into: [T: Int]()) { dictionary, element in
+ let x = element[keyPath: X]
+ dictionary[x, default: 0] += 1
+ }
+
+ return elementCount.values.reduce(into: 0) { tiesX, count in
+ guard count > 1 else { return }
+
+ tiesX += count * (count - 1) / 2
+ }
+ }
+
+ /// Counts the number of discordant pairs inside a collection.
+ /// - parameter X: The first variable.
+ /// - parameter Y: The second variable.
+ /// - returns: The number of discordant pairs contained in the collection.
+ func discordantPairs(
+ of X: KeyPath,
+ and Y: KeyPath
+ ) -> Int {
+
+ var sortedCopy = self.sorted { lhs, rhs in
+ if lhs[keyPath: X] == rhs[keyPath: X] {
+ return lhs[keyPath: Y] < rhs[keyPath: Y]
+ } else {
+ return lhs[keyPath: X] < rhs[keyPath: X]
+ }
+ }
+ return sortedCopy[...].computeDiscordance(sorting: Y)
+ }
+}
+
+private extension ArraySlice {
+ /// Sorts the measurements and counts the number of discordant pairs contained in it.
+ /// - parameter X: The first variable under investigation.
+ /// - parameter Y: The second variable under investigation.
+ /// - returns: The number of discordant pairs found in the collection.
+ ///
+ /// This method assumes that the collection is sorted, in ascending order,
+ /// by the variable that acts as the basis of discordance measurements against `Y`.
+ mutating func computeDiscordance(
+ sorting Y: KeyPath
+ ) -> Int {
+
+ if count < 2 {
+ return 0
+ } else {
+ let midPoint = (endIndex + startIndex) / 2
+
+ var discordants = self[startIndex ..< midPoint].computeDiscordance(sorting: Y)
+ discordants += self[midPoint ..< endIndex].computeDiscordance(sorting: Y)
+
+ return discordants + self.countDiscordantPairs(sorting: Y)
+ }
+ }
+
+ /// Sorts the collection and counts the number of discordant pairs.
+ /// - parameter Y: The variable to sort by.
+ /// - returns: The number of discordant pairs found in the collection.
+ private mutating func countDiscordantPairs(
+ sorting Y: KeyPath
+ ) -> Int {
+
+ let pivot = (startIndex + endIndex) / 2
+ var sorted = self
+ var discordant = 0
+ var mergeIndex = startIndex
+ var lhsIndex = startIndex
+ var rhsIndex = pivot
+
+ while lhsIndex < pivot && rhsIndex < endIndex {
+
+ if self[lhsIndex][keyPath: Y] <= self[rhsIndex][keyPath: Y] {
+ discordant += Swift.max(0, mergeIndex - lhsIndex)
+ sorted[mergeIndex] = self[lhsIndex]
+ lhsIndex += 1
+ } else {
+ discordant += Swift.max(0, mergeIndex - rhsIndex)
+ sorted[mergeIndex] = self[rhsIndex]
+ rhsIndex += 1
+ }
+
+ mergeIndex += 1
+ }
+
+ for index in lhsIndex ..< pivot {
+ discordant += Swift.max(0, mergeIndex - index)
+ sorted[mergeIndex] = self[index]
+ mergeIndex += 1
+ }
+
+ for index in rhsIndex ..< endIndex {
+ discordant += Swift.max(0, mergeIndex - index)
+ sorted[mergeIndex] = self[index]
+ mergeIndex += 1
+ }
+
+ for index in startIndex ..< endIndex {
+ self[index] = sorted[index]
+ }
+
+ return discordant
}
}
diff --git a/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift b/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift
deleted file mode 100644
index b28dc75..0000000
--- a/Sources/StatKit/Descriptive Statistics/Association/CorrelationMethod.swift
+++ /dev/null
@@ -1,49 +0,0 @@
-/// An internal protocol defining the methods required by association measure calculator types.
-@usableFromInline
-internal protocol CorrelationCalculator {
- /// Computes the measure of association for two variables in a collection.
- /// - parameter X: The first variable.
- /// - parameter Y: The second variable.
- /// - parameter collection: The data set containing the measurements.
- /// - parameter composition: The composition of the data set.
- /// - returns: The measure of assocation coefficient for the specified variables.
- func compute(
- for X: KeyPath,
- and Y: KeyPath,
- in collection: C,
- as composition: DataSetComposition
- ) -> Double
- where T: Comparable & Hashable & ConvertibleToReal,
- U: Comparable & Hashable & ConvertibleToReal,
- C: Collection
-}
-
-/// Different methods of calculating the association measure between arbitrary comparable variables.
-public enum CorrelationMethod {
- /// Pearson's product-moment correlation coefficient.
- case pearsonsProductMoment
-
- /// Spearman's Rho coefficient.
- case spearmansRho
-
- /// Kendall's Tau coefficient.
- ///
- /// This method calculates the Tau-B coefficient, which takes ties into account.
- /// The time complexity is O(n * log(n)).
- case kendallsTau
-
- /// A calculator object that can be used to compute the specified measure of association.
- @usableFromInline
- internal var calculator: any CorrelationCalculator {
- switch self {
- case .pearsonsProductMoment:
- return PearsonsProductMomentCalculator()
-
- case .spearmansRho:
- return SpearmansRhoCalculator()
-
- case .kendallsTau:
- return KendallsTauCalculator()
- }
- }
-}
diff --git a/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift
deleted file mode 100644
index f85bf6a..0000000
--- a/Sources/StatKit/Descriptive Statistics/Association/KendallsTauCalculator.swift
+++ /dev/null
@@ -1,151 +0,0 @@
-import RealModule
-
-/// A helper object for calculating Kendall's Tau Coefficient.
-internal struct KendallsTauCalculator: CorrelationCalculator {
- internal func compute<
- T: Comparable & Hashable & ConvertibleToReal,
- U: Comparable & Hashable & ConvertibleToReal,
- C: Collection
- >(
- for X: KeyPath,
- and Y: KeyPath,
- in collection: C,
- as composition: DataSetComposition
- ) -> Double {
-
- guard X != Y else { return 1 }
-
- let tiesX = collection.countTieRanks(of: X)
- let tiesY = collection.countTieRanks(of: Y)
-
- let count = collection.count
- let discordant = collection.discordantPairs(of: X, and: Y)
- let combinations = count * (count - 1) / 2
- let concordant = combinations - discordant - tiesX - tiesY
-
- switch composition {
- case .population:
- let numerator = concordant.realValue - discordant.realValue
- let denominator = combinations.realValue
- return numerator / denominator
- case .sample:
- let numerator = (concordant - discordant).realValue
- let tieProduct = (combinations - tiesX) * (combinations - tiesY)
- let denominator = tieProduct.realValue.squareRoot()
- guard !denominator.isZero else { return .signalingNaN }
-
- return numerator / denominator
- }
- }
-}
-
-private extension Collection {
- /// Counts the number of tied variables within a collection of measurements.
- /// - parameter X : The variable under investigation.
- /// - returns: The number of tied measurements.
- func countTieRanks(of X: KeyPath) -> Int {
-
- let elementCount = reduce(into: [T: Int]()) { dictionary, element in
- let x = element[keyPath: X]
- dictionary[x, default: 0] += 1
- }
-
- return elementCount.values.reduce(into: 0) { tiesX, count in
- guard count > 1 else { return }
-
- tiesX += count * (count - 1) / 2
- }
- }
-
- /// Counts the number of discordant pairs inside a collection.
- /// - parameter X: The first variable.
- /// - parameter Y: The second variable.
- /// - returns: The number of discordant pairs contained in the collection.
- func discordantPairs(
- of X: KeyPath,
- and Y: KeyPath
- ) -> Int {
-
- var sortedCopy = self.sorted { lhs, rhs in
- if lhs[keyPath: X] == rhs[keyPath: X] {
- return lhs[keyPath: Y] < rhs[keyPath: Y]
- } else {
- return lhs[keyPath: X] < rhs[keyPath: X]
- }
- }
- return sortedCopy[...].computeDiscordance(sorting: Y)
- }
-}
-
-private extension ArraySlice {
- /// Sorts the measurements and counts the number of discordant pairs contained in it.
- /// - parameter X: The first variable under investigation.
- /// - parameter Y: The second variable under investigation.
- /// - returns: The number of discordant pairs found in the collection.
- ///
- /// This method assumes that the collection is sorted, in ascending order,
- /// by the variable that acts as the basis of discordance measurements against `Y`.
- mutating func computeDiscordance(
- sorting Y: KeyPath
- ) -> Int {
-
- if count < 2 {
- return 0
- } else {
- let midPoint = (endIndex + startIndex) / 2
-
- var discordants = self[startIndex ..< midPoint].computeDiscordance(sorting: Y)
- discordants += self[midPoint ..< endIndex].computeDiscordance(sorting: Y)
-
- return discordants + self.countDiscordantPairs(sorting: Y)
- }
- }
-
- /// Sorts the collection and counts the number of discordant pairs.
- /// - parameter Y: The variable to sort by.
- /// - returns: The number of discordant pairs found in the collection.
- private mutating func countDiscordantPairs(
- sorting Y: KeyPath
- ) -> Int {
-
- let pivot = (startIndex + endIndex) / 2
- var sorted = self
- var discordant = 0
- var mergeIndex = startIndex
- var lhsIndex = startIndex
- var rhsIndex = pivot
-
- while lhsIndex < pivot && rhsIndex < endIndex {
-
- if self[lhsIndex][keyPath: Y] <= self[rhsIndex][keyPath: Y] {
- discordant += Swift.max(0, mergeIndex - lhsIndex)
- sorted[mergeIndex] = self[lhsIndex]
- lhsIndex += 1
- } else {
- discordant += Swift.max(0, mergeIndex - rhsIndex)
- sorted[mergeIndex] = self[rhsIndex]
- rhsIndex += 1
- }
-
- mergeIndex += 1
- }
-
- for index in lhsIndex ..< pivot {
- discordant += Swift.max(0, mergeIndex - index)
- sorted[mergeIndex] = self[index]
- mergeIndex += 1
- }
-
- for index in rhsIndex ..< endIndex {
- discordant += Swift.max(0, mergeIndex - index)
- sorted[mergeIndex] = self[index]
- mergeIndex += 1
- }
-
- for index in startIndex ..< endIndex {
- self[index] = sorted[index]
- }
-
- return discordant
- }
-}
diff --git a/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift
deleted file mode 100644
index 70d5e5d..0000000
--- a/Sources/StatKit/Descriptive Statistics/Association/PearsonsProductMomentCalculator.swift
+++ /dev/null
@@ -1,47 +0,0 @@
-/// A helper object for calculating the Pearson Correlation Coefficient.
-internal struct PearsonsProductMomentCalculator: CorrelationCalculator {
- internal func compute(
- for X: KeyPath,
- and Y: KeyPath,
- in collection: C,
- as composition: DataSetComposition) -> Double
- where T: Comparable & Hashable & ConvertibleToReal,
- U: Comparable & Hashable & ConvertibleToReal,
- C: Collection
- {
-
- guard X != Y else { return 1 }
-
- let XStdDev = collection.standardDeviation(
- variable: X,
- from: composition
- )
- let YStdDev = collection.standardDeviation(
- variable: Y,
- from: composition
- )
-
- let stdDevProduct = XStdDev * YStdDev
- if stdDevProduct.isZero {
- return .signalingNaN
- }
-
- switch composition {
- case .population:
- return collection.covariance(
- of: X,
- and: Y,
- from: composition
- ) / stdDevProduct
-
- case .sample:
- let sumOfProducts = collection.reduce(into: 0) { result, element in
- result += element[keyPath: X].realValue * element[keyPath: Y].realValue
- }
- let term = collection.mean(variable: X) * collection.mean(variable: Y)
- let numerator = sumOfProducts - collection.count.realValue * term
- let denominator = (collection.count - 1).realValue * stdDevProduct
- return numerator / denominator
- }
- }
-}
diff --git a/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift b/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift
deleted file mode 100644
index b72ed0c..0000000
--- a/Sources/StatKit/Descriptive Statistics/Association/SpearmansRhoCalculator.swift
+++ /dev/null
@@ -1,31 +0,0 @@
-/// A helper object for calculating Spearman's Rho Coefficient.
-internal struct SpearmansRhoCalculator: CorrelationCalculator {
- internal func compute(
- for X: KeyPath,
- and Y: KeyPath,
- in collection: C,
- as composition: DataSetComposition) -> Double
- where T: Comparable & Hashable, U: Comparable & Hashable, C: Collection {
-
- guard X != Y else { return 1 }
-
- let XRanks = collection.rank(
- variable: X,
- by: >,
- strategy: .fractional
- )
- let YRanks = collection.rank(
- variable: Y,
- by: >,
- strategy: .fractional
- )
- let ranks: [(X: Double, Y: Double)] = Array(zip(XRanks, YRanks))
-
- return ranks.correlation(
- of: \.X,
- and: \.Y,
- for: composition,
- method: .pearsonsProductMoment
- )
- }
-}
diff --git a/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift b/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift
index 82c4602..20ed920 100644
--- a/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift
+++ b/Sources/StatKit/Descriptive Statistics/Ranking/Ranking.swift
@@ -2,8 +2,6 @@ public extension Collection {
/// Ranks the specified variable according to a specified strategy.
/// - parameter variable: The variable to investigate.
/// - parameter order: The order by which the variables should be ranked.
- /// - parameter lhs: The left hand element.
- /// - parameter rhs: The right hand element.
/// - parameter strategy: The calculation method to use.
/// - returns: An array with the rank of each original element,
/// where the index of a rank corresponds to the index of the element in the original array.
diff --git a/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift b/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift
index aed1238..f927339 100644
--- a/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift
+++ b/Sources/StatKit/Protocols/Distributions/ContinuousDistribution.swift
@@ -1,6 +1,7 @@
public protocol ContinuousDistribution: Distribution {
/// The Probability Density Function of the distribution.
/// - parameter x: The value for which to calculate the relative likelihood of being sampled.
+ /// - parameter logarithmic: Whether to return the natural logarithm of the function.
/// - returns: The relative likelihood that a sample from the distribution is exactly equal to x.
func pdf(x: DomainType, logarithmic: Bool) -> Double
}
diff --git a/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift b/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift
index 84f7143..66124bf 100644
--- a/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift
+++ b/Sources/StatKit/Protocols/Distributions/DiscreteDistribution.swift
@@ -2,6 +2,7 @@
public protocol DiscreteDistribution: Distribution {
/// The Probability Mass Function of the distribution.
/// - parameter x: The value for which to calculate the probability.
+ /// - parameter logarithmic: Whether to return the natural logarithm of the function.
/// - returns: The probability that a sample from the distribution is exactly equal to x.
func pmf(x: DomainType, logarithmic: Bool) -> Double
}
diff --git a/Sources/StatKit/StatKit.docc/Association.md b/Sources/StatKit/StatKit.docc/Association.md
index 53b9878..7d09aa3 100644
--- a/Sources/StatKit/StatKit.docc/Association.md
+++ b/Sources/StatKit/StatKit.docc/Association.md
@@ -10,12 +10,10 @@ One of the most common ways of measuring association is by the Pearson Product M
### Functions
-- ``StatKit/Swift/Collection/correlation(of:and:for:method:)``
-
-### Methods of Computation
-
-- ``CorrelationMethod``
+- ``StatKit/Swift/Collection/pearsonR(of:and:)``
+- ``StatKit/Swift/Collection/spearmanR(of:and:)``
+- ``StatKit/Swift/Collection/kendallTau(of:and:variant:)``
### Enums
-- ``DataSetComposition``
+- ``KendallTauVariant``
diff --git a/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md b/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md
index aa0d8ee..14765d5 100644
--- a/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md
+++ b/Sources/StatKit/StatKit.docc/Combinatorial Extensions.md
@@ -11,4 +11,4 @@ StatKit allows developers to compute different extensions of combinatorial numbe
### Functions
- ``choose(n:k:)``
-- ``betaFunction(alpha:beta:log:)``
+- ``beta(alpha:beta:logarithmic:)``
diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift
index f8d1eca..3947c98 100644
--- a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift
+++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/LinearCorrelationTests.swift
@@ -16,12 +16,7 @@ final class LinearCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedCorrelation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .pearsonsProductMoment
- )
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
let expectedCorrelation = 0.99329456
XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6)
@@ -39,12 +34,7 @@ final class LinearCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedCorrelation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .pearsonsProductMoment
- )
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
let expectedCorrelation = 0.99329456
XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6)
@@ -62,12 +52,7 @@ final class LinearCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedCorrelation = simd2Array.correlation(
- of: \.x,
- and: \.x,
- for: .population,
- method: .pearsonsProductMoment
- )
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.x)
let expectedCorrelation = 1.0
XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6)
@@ -75,12 +60,7 @@ final class LinearCorrelationTests: XCTestCase {
func testPearsonCorrelationWithEmptyCollection() {
let simd2Array = [SIMD2]()
- let calculatedCorrelation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .pearsonsProductMoment
- )
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
XCTAssert(calculatedCorrelation.isNaN)
}
@@ -96,12 +76,7 @@ final class LinearCorrelationTests: XCTestCase {
SIMD2(x: 8, y: 6),
SIMD2(x: 9, y: 6),
SIMD2(x: 10, y: 6)]
- let calculatedCorrelation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .pearsonsProductMoment
- )
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
XCTAssert(calculatedCorrelation.isNaN)
}
diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift
new file mode 100644
index 0000000..e81ac7e
--- /dev/null
+++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/PearsonRTests.swift
@@ -0,0 +1,67 @@
+#if !os(watchOS)
+
+import XCTest
+import StatKit
+
+final class PearsonRTests: XCTestCase {
+ func testPearsonR() {
+ let simd2Array = [SIMD2(x: 1, y: 10),
+ SIMD2(x: 2, y: 20),
+ SIMD2(x: 3, y: 27),
+ SIMD2(x: 4, y: 30),
+ SIMD2(x: 5, y: 35),
+ SIMD2(x: 6, y: 38),
+ SIMD2(x: 7, y: 49),
+ SIMD2(x: 8, y: 56),
+ SIMD2(x: 9, y: 62),
+ SIMD2(x: 10, y: 69)]
+
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
+ let expectedCorrelation = 0.99329456
+
+ XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6)
+ }
+
+ func testPearsonRWithSingleVariable() {
+ let simd2Array = [SIMD2(x: 1, y: 10),
+ SIMD2(x: 2, y: 20),
+ SIMD2(x: 3, y: 27),
+ SIMD2(x: 4, y: 30),
+ SIMD2(x: 5, y: 35),
+ SIMD2(x: 6, y: 38),
+ SIMD2(x: 7, y: 49),
+ SIMD2(x: 8, y: 56),
+ SIMD2(x: 9, y: 62),
+ SIMD2(x: 10, y: 69)]
+
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.x)
+ let expectedCorrelation = 1.0
+
+ XCTAssertEqual(calculatedCorrelation, expectedCorrelation, accuracy: 1e-6)
+ }
+
+ func testPearsonRWithEmptyCollection() {
+ let simd2Array = [SIMD2]()
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
+
+ XCTAssert(calculatedCorrelation.isNaN)
+ }
+
+ func testPearsonRWithConstantValueArray() {
+ let simd2Array = [SIMD2(x: 1, y: 6),
+ SIMD2(x: 2, y: 6),
+ SIMD2(x: 3, y: 6),
+ SIMD2(x: 4, y: 6),
+ SIMD2(x: 5, y: 6),
+ SIMD2(x: 6, y: 6),
+ SIMD2(x: 7, y: 6),
+ SIMD2(x: 8, y: 6),
+ SIMD2(x: 9, y: 6),
+ SIMD2(x: 10, y: 6)]
+ let calculatedCorrelation = simd2Array.pearsonR(of: \.x, and: \.y)
+
+ XCTAssertTrue(calculatedCorrelation.isNaN)
+ }
+}
+
+#endif
diff --git a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift
index f3ef8ee..3925e27 100644
--- a/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift
+++ b/Tests/StatKitTests/Descriptive Statistics Tests/Association Tests/RankCorrelationTests.swift
@@ -16,12 +16,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .spearmansRho
- )
+ let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y)
let expectedAssociation = 1.0
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -39,12 +34,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 7),
SIMD2(x: 10, y: 1)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .spearmansRho
- )
+ let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y)
let expectedAssociation = -0.165144564768954
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -62,12 +52,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .spearmansRho
- )
+ let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y)
let expectedAssociation = 1.0
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -75,25 +60,15 @@ final class RankCorrelationTests: XCTestCase {
func testPearsonCorrelationWithEmptyCollection() {
let simd2Array = [SIMD2]()
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .spearmansRho
- )
-
+ let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y)
+
XCTAssertTrue(calculatedAssociation.isNaN)
}
func testPearsonCorrelationWithSingleEntryCollection() {
let simd2Array = [SIMD2]()
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .spearmansRho
- )
-
+ let calculatedAssociation = simd2Array.spearmanR(of: \.x, and: \.y)
+
XCTAssertTrue(calculatedAssociation.isNaN)
}
@@ -103,12 +78,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 3, y: 27),
SIMD2(x: 4, y: 30)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .kendallsTau
- )
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a)
let expectedAssociation = 0.666666666
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -126,12 +96,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 9, y: -2),
SIMD2(x: 10, y: -6)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .kendallsTau
- )
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a)
let expectedAssociation = -0.244444444
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -144,12 +109,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 2, y: 2),
SIMD2(x: 5, y: 7)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .kendallsTau
- )
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b)
let expectedAssociation = 0.119522861
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -162,12 +122,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 2, y: 2),
SIMD2(x: 5, y: 7)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .population,
- method: .kendallsTau
- )
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .a)
let expectedAssociation = 0.1
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -180,13 +135,8 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 1, y: 2),
SIMD2(x: 1, y: 7)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .kendallsTau
- )
-
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b)
+
XCTAssertTrue(calculatedAssociation.isNaN)
}
@@ -202,12 +152,7 @@ final class RankCorrelationTests: XCTestCase {
SIMD2(x: 9, y: 62),
SIMD2(x: 10, y: 69)]
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .kendallsTau
- )
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b)
let expectedAssociation = 1.0
XCTAssertEqual(calculatedAssociation, expectedAssociation, accuracy: 1e-6)
@@ -215,25 +160,15 @@ final class RankCorrelationTests: XCTestCase {
func testKendallsTauWithEmptyCollection() {
let simd2Array = [SIMD2]()
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .kendallsTau
- )
-
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b)
+
XCTAssertTrue(calculatedAssociation.isNaN)
}
func testKendallsTauBWithSingleEntryCollection() {
let simd2Array = [SIMD2]()
- let calculatedAssociation = simd2Array.correlation(
- of: \.x,
- and: \.y,
- for: .sample,
- method: .kendallsTau
- )
-
+ let calculatedAssociation = simd2Array.kendallTau(of: \.x, and: \.y, variant: .b)
+
XCTAssertTrue(calculatedAssociation.isNaN)
}
}
diff --git a/run-tests.sh b/run-tests.sh
new file mode 100755
index 0000000..bf8ea38
--- /dev/null
+++ b/run-tests.sh
@@ -0,0 +1,9 @@
+#!/bin/zsh
+
+if [ -x "$(command -v swiftlint)" ]; then
+ swiftlint lint --strict
+else
+ echo "Install Swiftlint to run linting..."
+fi
+
+swift test -c release --parallel