Merge pull request #212 from Forced-Alignment-and-Vowel-Extraction/dev

v0.7.5
Forced-Alignment-and-Vowel-Extraction · Nov 12, 2024 · fe370b4 · fe370b4
2 parents b2bd399 + 67a96a0
commit fe370b4
Show file tree

Hide file tree

Showing 13 changed files with 322 additions and 39 deletions.
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
@@ -18,17 +18,8 @@ jobs:
         with:
             virtualenvs-create: true
             virtualenvs-in-project: true
-            installer-parallel: true
-
-      - name: Load cached venv
-        id: cached-poetry-dependencies
-        uses: actions/cache@v3
-        with:
-            path: .venv
-            key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}          
-
+            installer-parallel: true    
       - name: Install dependencies
-        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
         run: poetry install --no-interaction --with docs
       - name: Quarto Doc Build
         run: |
@@ -37,6 +28,12 @@ jobs:
             poetry run python objects.py            
             poetry run quartodoc interlinks
       - uses: quarto-dev/quarto-actions/setup@v2
+      - name: Load cached quarto freeze
+        id: cached-quarto-freeze
+        uses: actions/cache@v3
+        with:
+            path: docs/_freeze
+            key: ${{ runner.os }}-freeze
       - name: Render and publish to gh pages
         run: |
           git config --global user.email "[email protected]"

diff --git a/docs/.gitignore b/docs/.gitignore
@@ -2,4 +2,5 @@
 _site/
 reference/
 _inv/
-objects.txt
+objects.txt
+_freeze
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
@@ -3,6 +3,7 @@ project:
   output-dir: _site
 
 license: GPLv3
+freeze: auto
 
 website:
   image: assets/logo.png

diff --git a/docs/cookbook/building_atg.qmd b/docs/cookbook/building_atg.qmd
@@ -0,0 +1,206 @@
+---
+title: Building an AlignedTextGrid
+author: Josef Fruehwakd
+date: last-modified
+---
+
+In this recipe, we'll build an AlignedTextGrid "by hand." The principles will be similar if 
+you were building one programmatically from other kinds of timing and label data.
+
+## Use append
+
+The safest way to go about building an AlignedTextGrid so that you don't
+inadvertently start creating copies of your intervals and tiers is to stick to
+the `.append()` methods for these various objects.
+
+## Interval-first approach
+
+One way you could go about doing things is to 
+
+- build up some intervals and set up their super/subset relationships
+
+- then add the intervals to tiers
+
+- add the tiers to tiergroups
+
+- add the tiergroups to textgrids
+
+First, we'll set up with our imports.
+```{python}
+from aligned_textgrid import AlignedTextGrid, \
+    TierGroup, \
+    SequenceTier, \
+    custom_classes
+
+Word, Phone = custom_classes(["Word", "Phone"])
+```
+
+### Creating the intervals
+
+Next, we'll build some words and their phones.
+
+```{python}
+the = Word((0, 10, "the"))
+dog = Word((10, 25, "dog"))
+
+DH  = Phone((0, 5, "DH"))
+AH0 = Phone((5, 10, "AH0"))
+
+D   = Phone((10, 15, "D"))
+AO1 = Phone((15, 20, "A01"))
+G   = Phone((20, 25, "G"))
+
+```
+
+### Setting subset membership
+
+Now, we can append the appropriate phones to their words.
+
+```{python}
+for phone in [DH, AH0]:
+    the.append(phone)
+
+for phone in [D, AO1, G]:
+    dog.append(phone)
+```
+
+We can check to make sure everything got appended right.
+
+```{python}
+# the first phone of 'the' is DH
+the.first is DH
+```
+
+```{python}
+# the last phone of 'dog' is G
+dog.last is G
+```
+
+### Set up the tiers
+
+Now, we'll create an empty TierGroup that has a Word and Phone tier.
+
+```{python}
+tier_group = TierGroup([
+    SequenceTier(entry_class=Word),
+    SequenceTier(entry_class=Phone)
+])
+```
+
+### Appending the intervals
+
+If we append `the` and `dog` to the Word tier, their phones will now be automatically added to the phone tier.
+
+```{python}
+tier_group.Word.append(the)
+tier_group.Word.append(dog)
+```
+
+We'll double check that the phones were automatically appended.
+
+```{python}
+tier_group.Phone.labels
+```
+
+### Creating the AlignedTextGrid
+
+Now, we can wrap this in an AlignedTextGrid so we can save it to a new TextGrid file, or any other analysis purpose.
+
+```{python}
+atg = AlignedTextGrid([tier_group])
+atg
+```
+
+### Reference is maintained
+
+Just to confirm that the reference to all objects has been maintained, let's double check that our original words and phones are in the textgrid.
+
+```{python}
+the in atg.group_0.Word
+```
+
+```{python}
+AO1 in atg.group_0.Phone
+```
+
+
+## TextGrid first approach
+
+We could also take a TextGrid first approach, and then add each component piece by piece.
+
+### Initializing the TextGrid
+
+```{python}
+#| warning: false
+atg = AlignedTextGrid()
+atg
+```
+
+
+### Adding the TierGroup
+
+```{python}
+tier_group = TierGroup([
+    SequenceTier(entry_class=Word),
+    SequenceTier(entry_class=Phone)
+])
+
+atg.append(tier_group)
+atg
+```
+
+### Adding the Intervals
+
+For this part, I'm going to use some pythony tricks to make things a little easier. First the words.
+
+```{python}
+word_times = [0,10,25]
+word_labels = ["the", "dog"]
+
+word_generator = zip(
+    word_times[0:-1], 
+    word_times[1:], 
+    word_labels)
+
+for start, end, label in word_generator:
+    atg.group_0.Word.append(
+        Word((start, end, label))
+    )
+```
+
+Now the phones
+
+```{python}
+#| warning: false
+phone_times = [0, 5, 10, 15, 20, 25]
+phone_labels = ["DH", "AH0", "D", "AO1", "G"]
+
+phone_generator = zip(
+    phone_times[0:-1],
+    phone_times[1:],
+    phone_labels
+)
+
+for start, end, label in phone_generator:
+    atg.group_0.Phone.append(
+        Phone((start, end, label))
+    )
+```
+
+### Double checking
+
+We can double check that everything is properly related.
+
+```{python}
+new_the = atg.group_0.Word.first
+new_dog = atg.group_0.Word.last
+
+new_the.sub_labels
+```
+
+```{python}
+new_dog.sub_labels
+```
+
+
+{{< include includes/_session_info.qmd >}}
diff --git a/docs/cookbook/includes/_session_info.qmd b/docs/cookbook/includes/_session_info.qmd
@@ -0,0 +1,18 @@
+
+---------
+
+#### Session Info {.unnumbered .unlisted}
+
+```{python}
+#| code-fold: true
+
+import sys
+import aligned_textgrid
+
+print(
+    (
+        f"Python version: {sys.version}\n"
+        f"aligned-textgrid version: {aligned_textgrid.__version__}"
+    )
+)
+```
diff --git a/docs/cookbook/overlaps.qmd b/docs/cookbook/overlaps.qmd
@@ -20,7 +20,7 @@ atg = AlignedTextGrid(
 
 ## Overlap Detection
 
-We'll get all phones that aren't silences and create a SequenceList from them.
+We'll get all phones that aren't silences and create a [](`~aligned_textgrid.SequenceList`) from them.
 SequenceList have convenience attributes to return an array of the start
 and end times of SequenceIntervals within them.
 
@@ -116,4 +116,6 @@ print(
 print(
     f"Overlapper words: {[x.within.label for x in one_phone.overlapper]}"
 )
-```
+```
+
+{{< include includes/_session_info.qmd >}}
diff --git a/docs/cookbook/phrase_creation.qmd b/docs/cookbook/phrase_creation.qmd
@@ -121,4 +121,6 @@ pause_durs = np.array([
 ])
 
 pause_durs
-```
+```
+
+{{< include includes/_session_info.qmd >}}
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "aligned_textgrid"
-version = "0.7.4"
+version = "0.7.5"
 description = "Classes for defining sequential information from TextGrids"
 authors = ["JoFrhwld <[email protected]>", "chrisbrickhouse <[email protected]>"]
 license = "GPL-3.0-or-later"
@@ -20,6 +20,7 @@ praatio = "^6.0.0"
 numpy = "^1.24.2"
 polars = "^0.20.18"
 cloudpickle = "^3.0.0"
+toml = "^0.10.2"
 
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"

diff --git a/src/aligned_textgrid/__init__.py b/src/aligned_textgrid/__init__.py
@@ -7,6 +7,21 @@
 from aligned_textgrid.sequence_list import SequenceList
 from aligned_textgrid.custom_classes import custom_classes
 from aligned_textgrid.outputs.to_dataframe import to_df
+
+from importlib.metadata import version
+
+from pathlib import Path
+import toml
+
+__version__ = "unknown"
+# adopt path to your pyproject.toml
+pyproject_toml_file = Path(__file__).parent.parent.parent / "pyproject.toml"
+if pyproject_toml_file.exists() and pyproject_toml_file.is_file():
+    data = toml.load(pyproject_toml_file)
+    # check project.version
+    if "tool" in data and "poetry" in data["tool"] and "version" in data["tool"]["poetry"]:
+        __version__ = data["tool"]["poetry"]["version"]
+
 __all__ = [
     "SequenceInterval",
     "SequencePoint",
@@ -21,5 +36,6 @@
     "SequenceList",
     "AlignedTextGrid",
     "custom_classes",
-    "to_df"
+    "to_df",
+    "__version__"
 ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -121,4 +121,6 @@ pause_durs = np.array([ @@
     ])
     pause_durs
-    ```
+    ```
+    {{< include includes/_session_info.qmd >}}