diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 12fe2efe..dc5b5228 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -46,4 +46,24 @@ jobs:
 
       - name: Run pytest
         run: |
-          pytest -v
\ No newline at end of file
+          pytest -v
+
+  commit-hooks:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v3
+        with:
+          python-version: 3.6.8
+          cache: 'pip'
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pre-commit install
+
+      - name: Check commit hooks
+        run: |
+          pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 44b4d541..81880b61 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,6 +30,15 @@ repos:
         language: script
         stages: [commit]
 
+#works
+-     repo: local
+      hooks:
+      - id: mixed-line-endings
+        entry: pre-commits/mixed_line_endings.py
+        name: Check for consistent end of line type LF to CRLF to CR (auto-fixes)
+        language: script
+        stages: [commit]
+
 #works
 #if using on different file types, it will need a seperate hook per file type
 -   repo: local
@@ -76,18 +85,17 @@ repos:
         stages: [commit]
 
 
-# #needs to remove the password in hello_world.py
-# -   repo: local
-#     hooks:
-#       - id: detect-secrets
-#         entry: detect-secrets
-#         name: detect-secrets - Detect secrets in staged code
-#         #args: [ "--baseline", ".secrets.baseline" ]
-#         args: [scan, audit]
-#         language: system
-#         types: [python]
-#         stages: [commit]
-#         exclude: .*/tests/.*|^\.cruft\.json$
+# works in testing
+-   repo: local
+    hooks:
+      - id: detect-secrets
+        entry: detect-secrets-hook
+        name: detect-secrets - Detect secrets in staged code
+        args: [ "--baseline", ".secrets.baseline" ]
+        #args: [scan, audit]
+        language: system
+        types: [python]
+        stages: [commit]
 
 
 
diff --git a/docs/contributor_guide/CONTRIBUTING.md b/docs/contributor_guide/CONTRIBUTING.md
index ee56bcde..dee48d43 100644
--- a/docs/contributor_guide/CONTRIBUTING.md
+++ b/docs/contributor_guide/CONTRIBUTING.md
@@ -36,14 +36,14 @@ documentation][docs-pre-commit-hooks].
 
 ## Code conventions
 
-Code written for this project should follow [PEP 8 coding conventions](pep8), [project naming conventions](docs-naming) and the guidance on [quality assurance of code for analysis and research](duck-book) (also known as the Duck Book).
+Code written for this project should follow [PEP 8 coding conventions][pep8], [project naming conventions][docs-naming] and the guidance on [quality assurance of code for analysis and research][duck-book] (also known as the Duck Book).
 
 ### Git and GitHub
 
 We use Git to version control the source code and out source code is stored on
 GitHub.
 
-We follow the [GitHub flow](github-flow) workflow. This means that we create
+We follow the [GitHub flow][github-flow] workflow. This means that we create
 feature branches of the `main` branch and merge them back to `main` once they
 meet the definition of done. We give our branches short but informative names,
 in lowercase and separated with hypens. Where applicable, we start branch names
@@ -53,16 +53,20 @@ with the respective Jira ticket number. For example,
 We commit regularly, with distinct chunks of work where possible. We write
 short but informative commit messages, starting with a capitalised
 present-tense verb, for example `Add`, `Fix`. When pair-programming, we
-[add co-authors to the commit](git-coauthor). We add
-[longer commit messages](long-commit) for larger or more complex commits, for
+[add co-authors to the commit][git-coauthor]. We add
+[longer commit messages][long-commit] for larger or more complex commits, for
 example (squash) merge commits.
 
 We open a pull request to `main` once we have working code that meets a user
 need, for example meets the definition of done on the Jira ticket. Pull
 requests must be reviewed by at least one member of the team before merging.
-Reviews should follow the [pull request template](pr-template). If we want review on code that does not yet meet the definition of done, we open a draft
+Reviews should follow the [pull request template][pr-template]. If we want review on code that does not yet meet the definition of done, we open a draft
 pull request. Once a branch has been reviewed, it can be merged. We prefer to use squash merges, in order to simplify the `main` branch commit history. After merging the feature branch should be deleted.
 
+### Functions
+
+We prefer writing functions over classes to make it easier for beginners to understand the code. [Type hints][typing] should be used when writing functions. We prefer functions to return `pandas.DataFrame` rather than `pandas.Series`, for example when deriving new (temporary) variables.
+
 ### Markdown
 
 Local links can be written as normal, but external links should be referenced at the
@@ -83,6 +87,10 @@ tests, enter the following command in your terminal:
 ```shell
 pytest
 ```
+Our testing approach is:
+- use `.csv` files containing simple minimal input and output data for a function to be tested
+- individual test cases should be separated into different `.csv` files and grouped into folders
+- the name of the test data `.csv` files should reflect the test case and the folder name should be the same as the module/function
 
 ### Code coverage
 
@@ -139,3 +147,4 @@ build the documentation into an accessible, searchable website.
 [github-flow]: https://docs.github.com/en/get-started/using-github/github-flow
 [git-coauthor]: https://docs.github.com/en/pull-requests/committing-changes-to-your-project/creating-and-editing-commits/creating-a-commit-with-multiple-authors
 [long-commit]: https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html
+[typing]: https://docs.python.org/3/library/typing.html
diff --git a/pre-commits/check_added_large_files.py b/pre-commits/check_added_large_files.py
index 41fa69b3..59c0353a 100755
--- a/pre-commits/check_added_large_files.py
+++ b/pre-commits/check_added_large_files.py
@@ -4,24 +4,20 @@
 import json
 import math
 import os
-from typing import Optional
-from typing import Sequence
-from typing import Set
+from typing import Optional, Sequence, Set
 
-from pre_commit_hooks.util import added_files
-from pre_commit_hooks.util import CalledProcessError
-from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import CalledProcessError, added_files, cmd_output
 
 
 def _lfs_files() -> Set[str]:
     """Private function."""
     try:
         # Introduced in git-lfs 2.2.0, first working in 2.2.1
-        lfs_ret = cmd_output('git', 'lfs', 'status', '--json')
+        lfs_ret = cmd_output("git", "lfs", "status", "--json")
     except CalledProcessError:  # pragma: no cover (with git-lfs)
         lfs_ret = '{"files":{}}'
 
-    return set(json.loads(lfs_ret)['files'])
+    return set(json.loads(lfs_ret)["files"])
 
 
 def _find_large_added_files(filenames: Sequence[str], maxkb: int) -> int:
@@ -32,7 +28,7 @@ def _find_large_added_files(filenames: Sequence[str], maxkb: int) -> int:
     for filename in (added_files() & set(filenames)) - _lfs_files():
         kb = int(math.ceil(os.stat(filename).st_size / 1024))
         if kb > maxkb:
-            print(f'{filename} ({kb} KB) exceeds {maxkb} KB.')
+            print(f"{filename} ({kb} KB) exceeds {maxkb} KB.")
             retv = 1
 
     return retv
@@ -42,17 +38,20 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
     """Entry function for script."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        'filenames', nargs='*',
-        help='Filenames pre-commit believes are changed.',
+        "filenames",
+        nargs="*",
+        help="Filenames pre-commit believes are changed.",
     )
     parser.add_argument(
-        '--maxkb', type=int, default=500,
-        help='Maxmimum allowable KB for added files',
+        "--maxkb",
+        type=int,
+        default=500,
+        help="Maxmimum allowable KB for added files",
     )
 
     args = parser.parse_args(argv)
     return _find_large_added_files(args.filenames, args.maxkb)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     exit(main())
diff --git a/pre-commits/check_merge_conflict.py b/pre-commits/check_merge_conflict.py
index 85a29255..e6c67007 100755
--- a/pre-commits/check_merge_conflict.py
+++ b/pre-commits/check_merge_conflict.py
@@ -2,35 +2,30 @@
 """Pre commit hook to check for merge conflict flags in file."""
 import argparse
 import os.path
-from typing import Optional
-from typing import Sequence
-
+from typing import Optional, Sequence
 
 CONFLICT_PATTERNS = [
-    b'<<<<<<< ',
-    b'======= ',
-    b'=======\n',
-    b'>>>>>>> ',
+    b"<<<<<<< ",
+    b"======= ",
+    b"=======\n",
+    b">>>>>>> ",
 ]
 
 
 def _is_in_merge() -> int:
     """Private function."""
-    return (
-        os.path.exists(os.path.join('.git', 'MERGE_MSG')) and
-        (
-            os.path.exists(os.path.join('.git', 'MERGE_HEAD')) or
-            os.path.exists(os.path.join('.git', 'rebase-apply')) or
-            os.path.exists(os.path.join('.git', 'rebase-merge'))
-        )
+    return os.path.exists(os.path.join(".git", "MERGE_MSG")) and (
+        os.path.exists(os.path.join(".git", "MERGE_HEAD"))
+        or os.path.exists(os.path.join(".git", "rebase-apply"))
+        or os.path.exists(os.path.join(".git", "rebase-merge"))
     )
 
 
 def main(argv: Optional[Sequence[str]] = None) -> int:
     """Entry function for script."""
     parser = argparse.ArgumentParser()
-    parser.add_argument('filenames', nargs='*')
-    parser.add_argument('--assume-in-merge', action='store_true')
+    parser.add_argument("filenames", nargs="*")
+    parser.add_argument("--assume-in-merge", action="store_true")
     args = parser.parse_args(argv)
 
     if not _is_in_merge() and not args.assume_in_merge:
@@ -38,18 +33,18 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
 
     retcode = 0
     for filename in args.filenames:
-        with open(filename, 'rb') as inputfile:
+        with open(filename, "rb") as inputfile:
             for i, line in enumerate(inputfile):
                 for pattern in CONFLICT_PATTERNS:
                     if line.startswith(pattern):
                         print(
                             f'Merge conflict string "{pattern.decode()}" '
-                            f'found in {filename}:{i + 1}',
+                            f"found in {filename}:{i + 1}",
                         )
                         retcode = 1
 
     return retcode
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     exit(main())
diff --git a/pre-commits/commit_msg.py b/pre-commits/commit_msg.py
deleted file mode 100755
index e478166a..00000000
--- a/pre-commits/commit_msg.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python3
-"""Git hook to check git commit message has appropriate length subject line.
-
-After removing the jira issue number from the subject line we check that the
-message is longer than 20 characters and shorter than 65.
-"""
-import sys
-
-# Collect the parameters
-commit_msg_filepath = sys.argv[1]
-
-with open(commit_msg_filepath, 'r') as f:
-    lines = f.readlines()
-
-    # The subject is the first line of the message, but we don't count any
-    # Jira issue note
-    commit_subject = lines[0].split(']')[-1]
-
-    if (len(commit_subject) < 20):
-        print(
-            f'''
-            commit-msg: ERROR! The commit subject is too short!
-            subject length = {len(commit_subject)} < 20 characters'
-            '''
-        )
-        sys.exit(1)
-
-    elif (len(commit_subject) > 65):
-        # We check if messages are greater than 65 char, but warn as if
-        # longer than 50
-        print(
-            f'''
-            commit-msg: ERROR!
-            The commit subject is too long!
-            subject length = {len(commit_subject)} > 50 characters'
-            '''
-        )
-        sys.exit(1)
-
-#    for line in lines[2:]:
-#        print(line)
diff --git a/pre-commits/end_of_line_fixer.py b/pre-commits/end_of_line_fixer.py
index 8f39b8c1..eb85f62e 100755
--- a/pre-commits/end_of_line_fixer.py
+++ b/pre-commits/end_of_line_fixer.py
@@ -2,9 +2,7 @@
 """Pre commit hook to ensure single blank line at end of python file."""
 import argparse
 import os
-from typing import IO
-from typing import Optional
-from typing import Sequence
+from typing import IO, Optional, Sequence
 
 
 def _fix_file(file_obj: IO[bytes]) -> int:
@@ -17,13 +15,13 @@ def _fix_file(file_obj: IO[bytes]) -> int:
         return 0
     last_character = file_obj.read(1)
     # last_character will be '' for an empty file
-    if last_character not in {b'\n', b'\r'} and last_character != b'':
+    if last_character not in {b"\n", b"\r"} and last_character != b"":
         # Needs this seek for windows, otherwise IOError
         file_obj.seek(0, os.SEEK_END)
-        file_obj.write(b'\n')
+        file_obj.write(b"\n")
         return 1
 
-    while last_character in {b'\n', b'\r'}:
+    while last_character in {b"\n", b"\r"}:
         # Deal with the beginning of the file
         if file_obj.tell() == 1:
             # If we've reached the beginning of the file and it is all
@@ -40,7 +38,7 @@ def _fix_file(file_obj: IO[bytes]) -> int:
     # newlines.  If we find extraneous newlines, then backtrack and trim them.
     position = file_obj.tell()
     remaining = file_obj.read()
-    for sequence in (b'\n', b'\r\n', b'\r'):
+    for sequence in (b"\n", b"\r\n", b"\r"):
         if remaining == sequence:
             return 0
         elif remaining.startswith(sequence):
@@ -54,21 +52,21 @@ def _fix_file(file_obj: IO[bytes]) -> int:
 def main(argv: Optional[Sequence[str]] = None) -> int:
     """Entry function for script."""
     parser = argparse.ArgumentParser()
-    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    parser.add_argument("filenames", nargs="*", help="Filenames to fix")
     args = parser.parse_args(argv)
 
     retv = 0
 
     for filename in args.filenames:
         # Read as binary so we can read byte-by-byte
-        with open(filename, 'rb+') as file_obj:
+        with open(filename, "rb+") as file_obj:
             ret_for_file = _fix_file(file_obj)
             if ret_for_file:
-                print(f'Fixing {filename}')
+                print(f"Fixing {filename}")
             retv |= ret_for_file
 
     return retv
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     exit(main())
diff --git a/pre-commits/mixed_line_endings.py b/pre-commits/mixed_line_endings.py
index 48afc2e6..8ae44909 100755
--- a/pre-commits/mixed_line_endings.py
+++ b/pre-commits/mixed_line_endings.py
@@ -2,31 +2,28 @@
 """Pre commit hook to ensure all EOL characters are the same."""
 import argparse
 import collections
-from typing import Dict
-from typing import Optional
-from typing import Sequence
+from typing import Dict, Optional, Sequence
 
-
-CRLF = b'\r\n'
-LF = b'\n'
-CR = b'\r'
+CRLF = b"\r\n"
+LF = b"\n"
+CR = b"\r"
 # Prefer LF to CRLF to CR, but detect CRLF before LF
 ALL_ENDINGS = (CR, CRLF, LF)
-FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
+FIX_TO_LINE_ENDING = {"cr": CR, "crlf": CRLF, "lf": LF}
 
 
 def _fix(filename: str, contents: bytes, ending: bytes) -> None:
     """Private function."""
-    new_contents = b''.join(
-        line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
+    new_contents = b"".join(
+        line.rstrip(b"\r\n") + ending for line in contents.splitlines(True)
     )
-    with open(filename, 'wb') as f:
+    with open(filename, "wb") as f:
         f.write(new_contents)
 
 
 def fix_filename(filename: str, fix: str) -> int:
     """Private function."""
-    with open(filename, 'rb') as f:
+    with open(filename, "rb") as f:
         contents = f.read()
 
     counts: Dict[bytes, int] = collections.defaultdict(int)
@@ -40,10 +37,10 @@ def fix_filename(filename: str, fix: str) -> int:
     # Some amount of mixed line endings
     mixed = sum(bool(x) for x in counts.values()) > 1
 
-    if fix == 'no' or (fix == 'auto' and not mixed):
+    if fix == "no" or (fix == "auto" and not mixed):
         return mixed
 
-    if fix == 'auto':
+    if fix == "auto":
         max_ending = LF
         max_lines = 0
         # ordering is important here such that lf > crlf > cr
@@ -70,24 +67,25 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
     """Entry function for script."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '-f', '--fix',
-        choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
-        default='auto',
+        "-f",
+        "--fix",
+        choices=("auto", "no") + tuple(FIX_TO_LINE_ENDING),
+        default="auto",
         help='Replace line ending with the specified. Default is "auto"',
     )
-    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    parser.add_argument("filenames", nargs="*", help="Filenames to fix")
     args = parser.parse_args(argv)
 
     retv = 0
     for filename in args.filenames:
         if fix_filename(filename, args.fix):
-            if args.fix == 'no':
-                print(f'{filename}: mixed line endings')
+            if args.fix == "no":
+                print(f"{filename}: mixed line endings")
             else:
-                print(f'{filename}: fixed mixed line endings')
+                print(f"{filename}: fixed mixed line endings")
             retv = 1
     return retv
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     exit(main())
diff --git a/pre-commits/prepare_commit_msg.py b/pre-commits/prepare_commit_msg.py
deleted file mode 100755
index 7cc97878..00000000
--- a/pre-commits/prepare_commit_msg.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python3
-"""Git hook to automatically prefix git commit message with Jira issue number.
-
-The issue number (e.g. Jira ticket number) from the current branch name. Works
-with or without specifying -m option at commit time.
-"""
-import re
-import sys
-from subprocess import check_output
-
-
-commit_msg_filepath = sys.argv[1]
-branch = (
-    check_output(["git", "symbolic-ref", "--short", "HEAD"])
-    .decode("utf-8").strip()
-)
-
-# If branch name contains /'s we only want the final part of the branch name
-branch_end = branch.split('/')[-1]
-
-# Regex pattern for matching to Jira issues
-regex = r"[Jj]\d+"
-
-if re.search(regex, branch_end):
-    # Create list of all matches to regex pattern
-    issue_number_matches = re.findall(regex, branch_end)
-
-    # If mutiple issues in branch name we join them together
-    commit_issue = f'{"_".join(issue_number_matches)}'
-
-    with open(commit_msg_filepath, "r+") as f:
-        commit_msg = f.read()
-        f.seek(0, 0)  # correctly position issue_number when writing commit msg
-        f.write(f"[{commit_issue}] {commit_msg}")
-
-else:
-    # If branch does not contain a jira issue number, reject the commit
-    print(
-        f'''
-        prepare-commit-msg: Error!
-        Branch name is {branch}
-        Does not match branch name strategy \'*/jxxx\'
-        '''
-    )
-    sys.exit(1)
diff --git a/pre-commits/remove_whitespace.py b/pre-commits/remove_whitespace.py
index d890b18c..61e5803f 100755
--- a/pre-commits/remove_whitespace.py
+++ b/pre-commits/remove_whitespace.py
@@ -2,21 +2,20 @@
 """Pre commit hook to remove any trailing whitespace."""
 import argparse
 import os
-from typing import Optional
-from typing import Sequence
+from typing import Optional, Sequence
 
 
 def _fix_file(
-        filename: str,
-        is_markdown: bool,
-        chars: Optional[bytes],
+    filename: str,
+    is_markdown: bool,
+    chars: Optional[bytes],
 ) -> bool:
     """Private function."""
-    with open(filename, mode='rb') as file_processed:
+    with open(filename, mode="rb") as file_processed:
         lines = file_processed.readlines()
     newlines = [_process_line(line, is_markdown, chars) for line in lines]
     if newlines != lines:
-        with open(filename, mode='wb') as file_processed:
+        with open(filename, mode="wb") as file_processed:
             for line in newlines:
                 file_processed.write(line)
         return True
@@ -25,22 +24,22 @@ def _fix_file(
 
 
 def _process_line(
-        line: bytes,
-        is_markdown: bool,
-        chars: Optional[bytes],
+    line: bytes,
+    is_markdown: bool,
+    chars: Optional[bytes],
 ) -> bytes:
     """Private function."""
-    if line[-2:] == b'\r\n':
-        eol = b'\r\n'
+    if line[-2:] == b"\r\n":
+        eol = b"\r\n"
         line = line[:-2]
-    elif line[-1:] == b'\n':
-        eol = b'\n'
+    elif line[-1:] == b"\n":
+        eol = b"\n"
         line = line[:-1]
     else:
-        eol = b''
+        eol = b""
     # preserve trailing two-space for non-blank lines in markdown files
-    if is_markdown and (not line.isspace()) and line.endswith(b'  '):
-        return line[:-2].rstrip(chars) + b'  ' + eol
+    if is_markdown and (not line.isspace()) and line.endswith(b"  "):
+        return line[:-2].rstrip(chars) + b"  " + eol
     return line.rstrip(chars) + eol
 
 
@@ -48,48 +47,46 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
     """Entry function for script."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--no-markdown-linebreak-ext',
-        action='store_true',
+        "--no-markdown-linebreak-ext",
+        action="store_true",
         help=argparse.SUPPRESS,
     )
     parser.add_argument(
-        '--markdown-linebreak-ext',
-        action='append',
+        "--markdown-linebreak-ext",
+        action="append",
         default=[],
-        metavar='*|EXT[,EXT,...]',
+        metavar="*|EXT[,EXT,...]",
         help=(
-            'Markdown extensions (or *) to not strip linebreak spaces.  '
-            'default: %(default)s'
+            "Markdown extensions (or *) to not strip linebreak spaces.  "
+            "default: %(default)s"
         ),
     )
     parser.add_argument(
-        '--chars',
+        "--chars",
         help=(
-            'The set of characters to strip from the end of lines.  '
-            'Defaults to all whitespace characters.'
+            "The set of characters to strip from the end of lines.  "
+            "Defaults to all whitespace characters."
         ),
     )
-    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    parser.add_argument("filenames", nargs="*", help="Filenames to fix")
     args = parser.parse_args(argv)
 
     if args.no_markdown_linebreak_ext:
-        print('--no-markdown-linebreak-ext now does nothing!')
+        print("--no-markdown-linebreak-ext now does nothing!")
 
     md_args = args.markdown_linebreak_ext
-    if '' in md_args:
-        parser.error('--markdown-linebreak-ext requires a non-empty argument')
-    all_markdown = '*' in md_args
+    if "" in md_args:
+        parser.error("--markdown-linebreak-ext requires a non-empty argument")
+    all_markdown = "*" in md_args
     # normalize extensions; split at ',', lowercase, and force 1 leading '.'
-    md_exts = [
-        '.' + x.lower().lstrip('.') for x in ','.join(md_args).split(',')
-    ]
+    md_exts = ["." + x.lower().lstrip(".") for x in ",".join(md_args).split(",")]
 
     # reject probable "eaten" filename as extension: skip leading '.' with [1:]
     for ext in md_exts:
-        if any(c in ext[1:] for c in r'./\:'):
+        if any(c in ext[1:] for c in r"./\:"):
             parser.error(
-                f'bad --markdown-linebreak-ext extension '
-                f'{ext!r} (has . / \\ :)\n'
+                f"bad --markdown-linebreak-ext extension "
+                f"{ext!r} (has . / \\ :)\n"
                 f"  (probably filename; use '--markdown-linebreak-ext=EXT')",
             )
     chars = None if args.chars is None else args.chars.encode()
@@ -98,10 +95,10 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
         _, extension = os.path.splitext(filename.lower())
         md = all_markdown or extension in md_exts
         if _fix_file(filename, md, chars):
-            print(f'Fixing {filename}')
+            print(f"Fixing {filename}")
             return_code = 1
     return return_code
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     exit(main())
diff --git a/requirements.txt b/requirements.txt
index bd9b2879..e26789b2 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,4 @@ nbqa
 pre_commit_hooks
 flake8
 pandas==1.1.5
-numpy
\ No newline at end of file
+numpy
diff --git a/src/apply_imputation_link.py b/src/apply_imputation_link.py
new file mode 100755
index 00000000..e04104fb
--- /dev/null
+++ b/src/apply_imputation_link.py
@@ -0,0 +1,161 @@
+def create_and_merge_imputation_values(
+    df,
+    imputation_class,
+    reference,
+    period,
+    marker,
+    combined_imputation,
+    target,
+    cumulative_forward_link,
+    cumulative_backward_link,
+    auxiliary,
+    construction_link,
+    imputation_types=("c", "fir", "bir", "fic"),
+):
+    """
+    Loop through different imputation types and merge the results according
+    to an imputation marker column
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+    imputation_class : str
+        column name for the variable that defines the imputation class
+    reference : str
+        column name for the reference
+    period : str
+        column name for the period
+    marker : str
+        column name containing a marker to indicate the type of imputation required
+    combined_imputation : str
+        column name for the combined imputation types according to the imputation marker
+    target : str
+        column name for the target variable for imputation
+    cumulative_forward_link : str
+        column name for the cumulative forward imputation link
+    cumulative_backward_link : str
+        column name for the cumulative backward imputation link
+    auxiliary : str
+        column name for auxiliary variable
+    construction_link : str
+        column name for contruction link
+    imputation_types : tup
+        types of imputation to run and add to combined_imputation column stored in a
+        tuple. If 'fic' is selected 'c' must also be selected and proceed 'fic'.
+        For 'fic' to produce the correct result, the C marker must be in the first
+        period for a given reference.
+
+    Returns
+    -------
+    pandas.DataFrame
+        dataframe with imputation values defined by the imputation marker
+    """
+
+    # constructed has to come first to use the result for forward impute from contructed
+    imputation_config = {
+        "c": {
+            "intermediate_column": "constructed",
+            "marker": "C",
+            # doesn't actually apply a fill so can be forward or back
+            "fill_column": auxiliary,
+            "fill_method": "ffill",
+            "link_column": construction_link,
+        },
+        "fir": {
+            "intermediate_column": "fir",
+            "marker": "FIR",
+            "fill_column": target,
+            "fill_method": "ffill",
+            "link_column": cumulative_forward_link,
+        },
+        "bir": {
+            "intermediate_column": "bir",
+            "marker": "BIR",
+            "fill_column": target,
+            "fill_method": "bfill",
+            "link_column": cumulative_backward_link,
+        },
+        "fic": {
+            # FIC only works if the C is in the first period of the business being
+            # sampled. This is fine for automatic imputation, but should be careful
+            # if manual construction imputation is done
+            "intermediate_column": "fic",
+            "marker": "FIC",
+            # this has to have the same name as the intermediate column for constructed
+            "fill_column": "constructed",
+            "fill_method": "ffill",
+            "link_column": cumulative_forward_link,
+        },
+    }
+
+    df.sort_values([imputation_class, reference, period], inplace=True)
+
+    intermediate_columns = []
+
+    for imp_type in imputation_types:
+        df = create_impute(
+            df, [imputation_class, reference], imputation_config[imp_type]
+        )
+        df = merge_imputation_type(
+            df, imputation_config[imp_type], marker, combined_imputation
+        )
+
+        intermediate_columns.append(imputation_config[imp_type]["intermediate_column"])
+
+    return df.drop(columns=intermediate_columns)
+
+
+def create_impute(df, group, imputation_spec):
+    """
+    Add a new column to a dataframe of imputed values using ratio imputation.
+
+    Parameters
+    ----------
+    dataframe : pandas.DataFrame
+    group : str or list
+        variables that define the imputation class
+    imputation_spec: dict
+        dictionary defining the details of the imputation type
+
+    Returns
+    -------
+    pandas.DataFrame
+        dataframe with an added imputation column defined by the imputation_spec
+    """
+    column_name = imputation_spec["intermediate_column"]
+    fill_column = imputation_spec["fill_column"]
+    fill_method = imputation_spec["fill_method"]
+    link_column = imputation_spec["link_column"]
+
+    df[column_name] = (
+        df.groupby(group)[fill_column].fillna(method=fill_method) * df[link_column]
+    )
+    return df
+
+
+def merge_imputation_type(df, imputation_spec, marker, combined_imputation):
+    """
+    Uses an existing column of imputed values and a imputation marker to merge values
+    into a single column
+
+    Parameters
+    ----------
+    dataframe : pandas.DataFrame
+    imputation_spec: dict
+        dictionary defining the details of the imputation type
+    marker : str
+        column name containing a marker to indicate the type of imputation required
+    combined_imputation : str
+        column name for the combined imputation types according to the imputation marker
+
+    Returns
+    -------
+    pandas.DataFrame
+        dataframe with combined_imputation
+    """
+
+    imputation_marker = imputation_spec["marker"]
+    imputation_column = imputation_spec["intermediate_column"]
+
+    df.loc[df[marker] == imputation_marker, combined_imputation] = df[imputation_column]
+    return df
diff --git a/src/construction_matches.py b/src/construction_matches.py
index dc947e0b..41ab2590 100644
--- a/src/construction_matches.py
+++ b/src/construction_matches.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+
 def flag_construction_matches(dataframe, target, period, auxiliary):
     """
     Add flag to indicate whether the record has non-null target, period and
@@ -22,6 +23,8 @@ def flag_construction_matches(dataframe, target, period, auxiliary):
         dataframe with additional flag_construction_matches column
     """
 
-    dataframe["flag_construction_matches"] = pd.notna(dataframe[[target, period, auxiliary]]).all(axis="columns")
+    dataframe["flag_construction_matches"] = pd.notna(
+        dataframe[[target, period, auxiliary]]
+    ).all(axis="columns")
 
     return dataframe
diff --git a/src/cumulative_imputation_links.py b/src/cumulative_imputation_links.py
new file mode 100755
index 00000000..91dfbed9
--- /dev/null
+++ b/src/cumulative_imputation_links.py
@@ -0,0 +1,72 @@
+import numpy as np
+
+
+def get_cumulative_links(
+    dataframe,
+    forward_or_backward,
+    strata,
+    reference,
+    target,
+    period,
+    imputation_link,
+    time_difference=1,
+):
+    """
+    Create cumulative imputation links for multiple consecutive periods
+    without a return.
+
+    Parameters
+    ----------
+    dataframe : pandas.DataFrame
+    forward_or_backward: str
+        either f or b for forward or backward method
+
+    strata : str
+        column name containing strata information (sic)
+    reference : str
+        column name containing business reference id
+    target : str
+        column name containing target variable
+    period : str
+        column name containing time period
+    imputation_link : string
+        column name containing imputation links
+    time_difference : int
+        time difference between predictive and target period in months
+
+    Returns
+    -------
+    pandas.DataFrame
+        dataframe with imputation_group and
+        cumulative_forward/backward_imputation_link column
+    """
+
+    dataframe.sort_values([strata, reference, period], inplace=True)
+    dataframe["missing_value"] = np.where(dataframe[target].isnull(), True, False)
+
+    dataframe["imputation_group"] = (
+        (
+            (dataframe["missing_value"].diff(time_difference) != 0)
+            | (dataframe[strata].diff(time_difference) != 0)
+            | (dataframe[reference].diff(time_difference) != 0)
+        )
+        .astype("int")
+        .cumsum()
+    )
+
+    if forward_or_backward == "f":
+        dataframe["cumulative_" + imputation_link] = dataframe.groupby(
+            "imputation_group"
+        )[imputation_link].cumprod()
+    elif forward_or_backward == "b":
+        dataframe["cumulative_" + imputation_link] = (
+            dataframe[::-1].groupby("imputation_group")[imputation_link].cumprod()[::-1]
+        )
+
+    dataframe["cumulative_" + imputation_link] = np.where(
+        ~dataframe[target].isnull(),
+        np.nan,
+        dataframe["cumulative_" + imputation_link],
+    )
+
+    return dataframe[["imputation_group", "cumulative_" + imputation_link]]
diff --git a/src/flag_and_count_matched_pairs.py b/src/flag_and_count_matched_pairs.py
index 9ab4a480..36df9e75 100644
--- a/src/flag_and_count_matched_pairs.py
+++ b/src/flag_and_count_matched_pairs.py
@@ -6,31 +6,32 @@ def flag_matched_pair(
     df, forward_or_backward, target, period, reference, strata, time_difference=1
 ):
     """
-    function to flag matched pairs using the shift method
+        function to flag matched pairs using the shift method
 
-    Parameters
-    ----------
-    df : pd.DataFrame
-        pandas dataframe of original data
-    forward_or_backward : str
-        number of rows to shift up or down
-    target : str
-        column name containing target variable
-    period : str
-        column name containing time period
-    reference : str
-        column name containing business reference id
-    strata : str
-        column name containing strata information (sic)
-    time_difference: int
-        lookup distance for matched pairs
+        Parameters
+        ----------
+        df : pd.DataFrame
+            pandas dataframe of original data
+        forward_or_backward : str
+            number of rows to shift up or down
+        target : str
+            column name containing target variable
+        period : str
+            column name containing time period
+        reference : str
+            column name containing business reference id
+        strata : str
+            column name containing strata information (sic)
+        time_difference: int
+            lookup distance for matched pairs
 
-    Returns
-    -------
-    _type_
-        two pandas dataframes: the main dataframe with column added flagging
-        forward matched pairs and
-        predictive target variable data column
+        Returns
+        -------
+        _type_
+    <<<<<<< HEAD
+            two pandas dataframes: the main dataframe with column added flagging
+            forward matched pairs and
+            predictive target variable data column
     """
 
     df = df.sort_values(by=[reference, period])
@@ -52,6 +53,7 @@ def count_matches(df, flag, period, strata):
     """
     function to flag matched pairs using the shift method
 
+
     Parameters
     ----------
     df : pd.DataFrame
diff --git a/src/forward_link.py b/src/forward_link.py
new file mode 100644
index 00000000..1ac97429
--- /dev/null
+++ b/src/forward_link.py
@@ -0,0 +1,76 @@
+import numpy as np
+import pandas as pd
+
+
+def calculate_imputation_link(
+    df: pd.DataFrame,
+    period: str,
+    strata: str,
+    match_col: str,
+    target_variable: str,
+    predictive_variable: str,
+) -> pd.DataFrame:
+    """
+    Calculate link between target_variable and predictive_variable by strata,
+    a match_col must be supplied which indicates if target_variable
+    and predictive_variable can be linked.
+
+    Parameters
+    ----------
+    df : pd.Dataframe
+        Original dataframe.
+    period : str
+        Column name containing time period.
+    strata : str
+        Column name containing strata information (sic).
+    match_col : str
+        Column name of the matched pair links, this column should be bool.
+    target_variable : str
+        Column name of the targeted variable.
+    predictive_variable : str
+        Column name of the predicted target variable.
+
+    Returns
+    -------
+    df : pd.DataFrame
+        A pandas DataFrame with a new column containing either f_link or b_link
+        based on the input parameters.
+    """
+
+    df_intermediate = df.copy()
+
+    if match_col == "f_matched_pair" and predictive_variable == "f_predictive_question":
+        link_col_name = "f_link"
+
+    elif (
+        match_col == "b_matched_pair" and predictive_variable == "b_predictive_question"
+    ):
+        link_col_name = "b_link"
+
+    else:
+        raise ValueError(
+            f"""
+        {match_col} and {predictive_variable} do not have same wildcard."""
+        )
+
+    df_intermediate[target_variable] = (
+        df_intermediate[target_variable] * df_intermediate[match_col]
+    )
+
+    df_intermediate[predictive_variable] = (
+        df_intermediate[predictive_variable] * df_intermediate[match_col]
+    )
+
+    numerator = df_intermediate.groupby([strata, period])[target_variable].transform(
+        "sum"
+    )
+
+    denominator = df_intermediate.groupby([strata, period])[
+        predictive_variable
+    ].transform("sum")
+
+    denominator.replace(0, np.nan, inplace=True)  # cover division with 0
+
+    df[link_col_name] = numerator / denominator
+
+    return df
diff --git a/src/imputation_flags.py b/src/imputation_flags.py
new file mode 100644
index 00000000..91bc04ad
--- /dev/null
+++ b/src/imputation_flags.py
@@ -0,0 +1,137 @@
+import numpy as np
+import pandas as pd
+
+
+def create_impute_flags(
+    df: pd.DataFrame,
+    target: str,
+    reference: str,
+    strata: str,
+    auxiliary: str,
+    predictive_auxiliary: str,
+):
+
+    """
+    function to create logical columns for each type of imputation
+    output columns are needed to create the string flag column for
+    imputation methods.
+    Function requires f_predictive and b_predictive columns produced
+    by `flag_matched_pair` function.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing forward, backward predictive period columns (
+        These columns are created by calling flag_matched_pair_merge forward
+        and backwards)
+
+    target : str
+        Column name containing target variable.
+    reference : str
+        Column name containing business reference id.
+    strata : str
+        Column name containing strata information (sic).
+    auxiliary : str
+        Column name containing auxiliary data.
+    predictive_auxiliary: str
+        Column name containing predictive auxiliary data, this is created,
+        by flag_matched_pair_merge function.
+
+    Returns
+    -------
+    pd.DataFrame
+        Dataframe with four additional logical columns determining if target
+        is a return (r_flag) can be imputed by forward imputation (fir_flag),
+        backward imputation (bir_flag) or can be constructed (c_flag)
+    """
+    for direction in ["f", "b"]:
+        try:
+            df["{}_predictive_{}".format(direction, target)]
+        except KeyError:
+            raise KeyError(
+                "Dataframe needs column '{}_predictive_{}',".format(direction, target)
+                + " run flag_matched_pair function first"
+            )
+    forward_target_roll = "f_predictive_" + target + "_roll"
+    backward_target_roll = "b_predictive_" + target + "_roll"
+    forward_aux_roll = "f_predictive_" + auxiliary + "_roll"
+
+    df[forward_target_roll] = df.groupby([reference, strata])[
+        "f_predictive_" + target
+    ].ffill()
+
+    df[backward_target_roll] = df.groupby([reference, strata])[
+        "b_predictive_" + target
+    ].bfill()
+
+    df["r_flag"] = df[target].notna()
+
+    df["fir_flag"] = np.where(
+        df[forward_target_roll].notna() & df[target].isna(), True, False
+    )
+
+    df["bir_flag"] = np.where(
+        df[backward_target_roll].notna() & df[target].isna(), True, False
+    )
+
+    construction_conditions = df[target].isna() & df[auxiliary].notna()
+    df["c_flag"] = np.where(construction_conditions, True, False)
+
+    df[forward_aux_roll] = df.groupby([reference, strata])[predictive_auxiliary].ffill()
+
+    fic_conditions = df[target].isna() & df[forward_aux_roll].notna()
+    df["fic_flag"] = np.where(fic_conditions, True, False)
+
+    df.drop(
+        [
+            forward_target_roll,
+            backward_target_roll,
+            forward_aux_roll,
+            predictive_auxiliary,
+        ],
+        axis=1,
+        inplace=True,
+    )
+
+    return df
+
+
+def generate_imputation_marker(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Function to add column containing the a string indicating the method of
+    imputation to use following the hierarchy in specifications
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing logical columns produced by `create_imputation_flags`
+        (r_flag, fir_flag, bir_flag, fic_flag and c_flag)
+
+
+    Returns
+    -------
+    pd.DataFrame
+        Dataframe with additional column containing imputation marker
+        i.e. the type of imputation method that should be used to fill
+        missing returns.
+    """
+
+    imputation_markers_and_conditions = {
+        "r": df["r_flag"],
+        "fir": ~df["r_flag"] & df["fir_flag"],
+        "bir": ~df["r_flag"] & ~df["fir_flag"] & df["bir_flag"],
+        "fic": ~df["r_flag"] & ~df["fir_flag"] & ~df["bir_flag"] & df["fic_flag"],
+        "c": ~df["r_flag"]
+        & ~df["fir_flag"]
+        & ~df["bir_flag"]
+        & ~df["fic_flag"]
+        & df["c_flag"],
+    }
+
+    df["imputation_marker"] = np.select(
+        imputation_markers_and_conditions.values(),
+        imputation_markers_and_conditions.keys(),
+        default="error",
+    )
+
+    return df
diff --git a/src/link_filter.py b/src/link_filter.py
new file mode 100644
index 00000000..f5ff6383
--- /dev/null
+++ b/src/link_filter.py
@@ -0,0 +1,49 @@
+import pandas as pd
+
+# TODO: Extend function to receive multiple df with *df_with_filters
+
+
+def flag_rows_to_ignore(
+    df: pd.DataFrame, df_with_filters: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Add a new column bool column named ignore_from_link to df
+    having as TRUE the observations defined in df_with_filters.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Original dataframe.
+    df_with_filters : pd.DataFrame
+        Dataframe with observations which should be flagged in the original
+        dataframe.
+
+    Returns
+    -------
+    df : pd.DataFrame
+        Original dataframe with a bool column containing the flags.
+
+    """
+
+    if not set(df_with_filters.columns).issubset(df.columns):
+
+        raise ValueError(
+            f"""df_with_filters has these columns {list(df_with_filters)} while
+            df has these columns {list(df)}, please
+            double check the column names."""
+        )
+
+    # TODO: Check if values to be ignored exist
+
+    df = df.set_index(list(df_with_filters))
+
+    df_with_filters = df_with_filters.set_index(list(df_with_filters))
+
+    df["ignore_from_link"] = df.index.isin(df_with_filters.index)
+
+    df = df.reset_index()
+
+    # TODO: Consider what should be logged and reroute print to logs
+    print("These values were flagged:\n", df.loc[df["ignore_from_link"]])
+
+    return df
diff --git a/tests/apply_imputation_link.csv b/tests/apply_imputation_link.csv
new file mode 100644
index 00000000..c81711cd
--- /dev/null
+++ b/tests/apply_imputation_link.csv
@@ -0,0 +1,10 @@
+strata,reference,target,period,forward_imputation_link,backward_imputation_link,imputation_group,cumulative_forward_imputation_link,cumulative_backward_imputation_link,imputation_marker,imputed_value,auxiliary_variable,construction_link
+100,100000,200,202402,1,2,1,,,R,,,
+100,100000,,202403,2,0.6,2,2,0.6,FIR,400,,
+100,100000,,202404,3,1,2,6,1,FIR,1200,,
+200,100001,,202402,1,4,3,1,2,BIR,600,,
+200,100001,,202403,3,0.5,3,3,0.5,BIR,150,,
+200,100001,300,202404,0.5,1,4,,,R,,,
+300,100002,,202402,1,4,5,1,2,C,600,40,0.1
+300,100002,,202403,3,0.5,5,3,0.5,FIC,150,,
+300,100002,,202404,0.5,1,5,2,,FIC,,,
diff --git a/tests/calculate_links_test_data.csv b/tests/calculate_links_test_data.csv
new file mode 100755
index 00000000..72e6408d
--- /dev/null
+++ b/tests/calculate_links_test_data.csv
@@ -0,0 +1,16 @@
+,identifier,period,group,question,f_predictive_question,b_predictive_question,f_matched_pair,b_matched_pair,f_link,b_link
+0,10001,202001,1,547.0,,362.0,False,True,,0.9925133689839573
+1,10001,202002,1,362.0,547.0,895.0,True,True,1.0075431034482758,0.8431018935978359
+2,10001,202003,1,895.0,362.0,,True,False,1.186096256684492,
+3,10002,202001,1,381.0,,573.0,False,True,,0.9925133689839573
+4,10002,202002,1,573.0,381.0,214.0,True,True,1.0075431034482758,0.8431018935978359
+5,10002,202003,1,214.0,573.0,,True,False,1.186096256684492,
+6,10001,202001,2,961.0,,267.0,False,True,,1.693854748603352
+7,10001,202002,2,267.0,961.0,314.0,True,True,0.5903693931398417,0.8523809523809524
+8,10001,202003,2,314.0,267.0,,True,False,1.1731843575418994,
+9,10002,202001,2,555.0,,628.0,False,True,,1.693854748603352
+10,10002,202002,2,628.0,555.0,736.0,True,True,0.5903693931398417,0.8523809523809524
+11,10002,202003,2,736.0,628.0,,True,False,1.1731843575418994,
+12,10005,202001,1,,,,False,False,,0.9925133689839573
+13,10005,202002,2,,,100.0,False,False,0.5903693931398417,0.8523809523809524
+14,10005,202003,2,100.0,,,False,False,1.1731843575418994,
diff --git a/tests/cumulative_links.csv b/tests/cumulative_links.csv
new file mode 100755
index 00000000..bef347a5
--- /dev/null
+++ b/tests/cumulative_links.csv
@@ -0,0 +1,7 @@
+strata,reference,target,period,forward_imputation_link,backward_imputation_link,imputation_group,cumulative_forward_imputation_link,cumulative_backward_imputation_link
+100,100000,200,202402,1,2,1,,
+100,100000,,202403,2,0.6,2,2,0.6
+100,100000,,202404,3,1,2,6,1
+200,100001,,202402,1,4,3,1,2
+200,100001,,202403,3,0.5,3,3,0.5
+200,100001,300,202404,0.5,1,4,,
diff --git a/tests/data/apply_imputation_link/BIR.csv b/tests/data/apply_imputation_link/BIR.csv
new file mode 100755
index 00000000..954700c4
--- /dev/null
+++ b/tests/data/apply_imputation_link/BIR.csv
@@ -0,0 +1,4 @@
+imputation_class,reference,target,period,backward_imputation_link,cumulative_backward_imputation_link,imputation_marker,imputed_value
+200,100001,,202402,4,2,BIR,600
+200,100001,,202403,0.5,0.5,BIR,150
+200,100001,300,202404,1,,R,
diff --git a/tests/data/apply_imputation_link/C_FIC.csv b/tests/data/apply_imputation_link/C_FIC.csv
new file mode 100755
index 00000000..7d2424b2
--- /dev/null
+++ b/tests/data/apply_imputation_link/C_FIC.csv
@@ -0,0 +1,4 @@
+imputation_class,reference,target,period,forward_imputation_link,cumulative_forward_imputation_link,construction_link,auxiliary_variable,imputation_marker,imputed_value
+300,100002,,202402,1,,0.1,1000,C,100
+300,100002,,202403,3,3,,,FIC,300
+300,100002,,202404,0.5,1.5,,,FIC,150
diff --git a/tests/data/apply_imputation_link/FIR.csv b/tests/data/apply_imputation_link/FIR.csv
new file mode 100755
index 00000000..341ece76
--- /dev/null
+++ b/tests/data/apply_imputation_link/FIR.csv
@@ -0,0 +1,4 @@
+imputation_class,reference,target,period,forward_imputation_link,cumulative_forward_imputation_link,imputation_marker,imputed_value
+100,100000,200,202402,1,,R,
+100,100000,,202403,2,2,FIR,400
+100,100000,,202404,3,6,FIR,1200
diff --git a/tests/data/apply_imputation_link/FIR_BIR_C_FIC.csv b/tests/data/apply_imputation_link/FIR_BIR_C_FIC.csv
new file mode 100755
index 00000000..91ec36ec
--- /dev/null
+++ b/tests/data/apply_imputation_link/FIR_BIR_C_FIC.csv
@@ -0,0 +1,10 @@
+imputation_class,reference,target,period,forward_imputation_link,backward_imputation_link,auxiliary_variable,construction_link,cumulative_forward_link,cumulative_backward_link,imputation_marker,imputed_value
+100,100000,200,202402,1,2,,,,,R,
+100,100000,,202403,2,0.6,,,2,0.6,FIR,400
+100,100000,,202404,3,1,,,6,1,FIR,1200
+200,100001,,202402,1,4,,,1,2,BIR,600
+200,100001,,202403,3,0.5,,,3,0.5,BIR,150
+200,100001,300,202404,0.5,1,,,,,R,
+300,100002,,202402,1,4,1000,0.1,,2,C,100
+300,100002,,202403,3,0.5,,,3,0.5,FIC,300
+300,100002,,202404,0.5,1,,,1.5,,FIC,150
diff --git a/tests/helper_functions.py b/tests/helper_functions.py
index b9006376..83bce07d 100644
--- a/tests/helper_functions.py
+++ b/tests/helper_functions.py
@@ -1,7 +1,8 @@
 import pandas as pd
 
+
 def load_and_format(filename):
     """Load csv as pandas dataframe and cast period column to datetime type"""
     df_loaded = pd.read_csv(filename)
-    df_loaded['period'] = pd.to_datetime(df_loaded['period'], format='%Y%m')
+    df_loaded["period"] = pd.to_datetime(df_loaded["period"], format="%Y%m")
     return df_loaded
diff --git a/tests/imputation_flag_data.csv b/tests/imputation_flag_data.csv
new file mode 100644
index 00000000..31b56aa8
--- /dev/null
+++ b/tests/imputation_flag_data.csv
@@ -0,0 +1,28 @@
+reference,strata,period,target_variable,auxiliary,f_predictive_target_variable,b_predictive_target_variable,r_flag,fir_flag,bir_flag,c_flag,fic_flag,f_predictive_auxiliary,imputation_marker
+1,100,202001,8444.0,51.0,,,True,False,False,False,False,,r
+1,100,202002,,51.0,8444.0,2003.0,False,True,True,True,True,51.0,fir
+1,100,202003,2003.0,51.0,,1003.0,True,False,False,False,False,51.0,r
+1,100,202004,1003.0,51.0,2003.0,,True,False,False,False,False,51.0,r
+2,100,202001,,72.0,,,False,False,True,True,False,,bir
+2,100,202002,,,,,False,False,True,False,True,72.0,bir
+2,100,202003,,72.0,,3251.0,False,False,True,True,True,,bir
+2,100,202004,3251.0,72.0,,,True,False,False,False,False,72.0,r
+3,100,202001,,7.0,,7511.0,False,False,True,True,False,,bir
+3,100,202002,7511.0,7.0,,1234.0,True,False,False,False,False,7.0,r
+3,100,202003,1234.0,7.0,7511.0,1214.0,True,False,False,False,False,7.0,r
+3,100,202004,1214.0,7.0,1234.0,,True,False,False,False,False,7.0,r
+4,100,202001,64.0,81.0,,,True,False,False,False,False,,r
+4,100,202002,,81.0,64.0,,False,True,True,True,True,81.0,fir
+4,100,202003,,81.0,,254.0,False,True,True,True,True,81.0,fir
+4,100,202004,254.0,81.0,,,True,False,False,False,False,81.0,r
+5,100,202001,65.0,81.0,,342.0,True,False,False,False,False,,r
+5,100,202002,342.0,81.0,65.0,634.0,True,False,False,False,False,81.0,r
+5,100,202003,634.0,81.0,342.0,254.0,True,False,False,False,False,81.0,r
+5,100,202004,254.0,81.0,634.0,,True,False,False,False,False,81.0,r
+6,100,202001,64.0,81.0,,,True,False,False,False,False,,r
+6,100,202002,,81.0,64.0,654.0,False,True,True,True,True,81.0,fir
+6,100,202003,654.0,81.0,,,True,False,False,False,False,81.0,r
+6,100,202004,,81.0,654.0,,False,True,False,True,True,81.0,fir
+7,100,202001,,40.0,,,False,False,False,True,False,,c
+7,100,202002,,,,,False,False,False,False,True,40.0,fic
+7,100,202003,,,,,False,False,False,False,True,,fic
diff --git a/tests/test_apply_imputation_link.py b/tests/test_apply_imputation_link.py
new file mode 100755
index 00000000..568bfcec
--- /dev/null
+++ b/tests/test_apply_imputation_link.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+
+import pytest
+from helper_functions import load_and_format
+from pandas.testing import assert_frame_equal
+
+from src.apply_imputation_link import create_and_merge_imputation_values
+
+
+@pytest.fixture(scope="class")
+def fir_bir_c_fic_test_data():
+    return load_and_format(
+        Path("tests") / "data" / "apply_imputation_link" / "FIR_BIR_C_FIC.csv"
+    )
+
+
+class TestApplyImputationLink:
+    def test_all_imputation_types(self, fir_bir_c_fic_test_data):
+        expected_output = fir_bir_c_fic_test_data
+
+        input_data = expected_output.drop(columns=["imputed_value"])
+        actual_output = create_and_merge_imputation_values(
+            input_data,
+            "imputation_class",
+            "reference",
+            "period",
+            "imputation_marker",
+            "imputed_value",
+            "target",
+            "cumulative_forward_link",
+            "cumulative_backward_link",
+            "auxiliary_variable",
+            "construction_link",
+            imputation_types=("c", "fir", "bir", "fic"),
+        )
+
+        assert_frame_equal(actual_output, expected_output)
diff --git a/tests/test_construction_matches.py b/tests/test_construction_matches.py
index 3daf0260..1378c6ba 100644
--- a/tests/test_construction_matches.py
+++ b/tests/test_construction_matches.py
@@ -1,25 +1,31 @@
-import pytest
-
 from pathlib import Path
+
+import pytest
+from helper_functions import load_and_format
 from pandas.testing import assert_frame_equal
 
 from src.construction_matches import flag_construction_matches
-from helper_functions import load_and_format
+
 
 @pytest.fixture(scope="class")
 def construction_test_data():
-    return load_and_format(Path("tests")/"construction_matches.csv")
-  
+    return load_and_format(Path("tests") / "construction_matches.csv")
+
+
 class TestConstructionMatches:
     def test_construction_matches_flag(self, construction_test_data):
-        expected_output = construction_test_data[[
-            "target",
-            "period",
-            "auxiliary",
-            "flag_construction_matches",
-        ]]
+        expected_output = construction_test_data[
+            [
+                "target",
+                "period",
+                "auxiliary",
+                "flag_construction_matches",
+            ]
+        ]
 
         input_data = expected_output.drop(columns=["flag_construction_matches"])
-        actual_output = flag_construction_matches(input_data, "target", "period", "auxiliary")
+        actual_output = flag_construction_matches(
+            input_data, "target", "period", "auxiliary"
+        )
 
-        assert_frame_equal(actual_output, expected_output)
\ No newline at end of file
+        assert_frame_equal(actual_output, expected_output)
diff --git a/tests/test_cumulative_imputation_links.py b/tests/test_cumulative_imputation_links.py
new file mode 100755
index 00000000..bf31094a
--- /dev/null
+++ b/tests/test_cumulative_imputation_links.py
@@ -0,0 +1,64 @@
+from pathlib import Path
+
+import pytest
+from helper_functions import load_and_format
+from pandas.testing import assert_frame_equal
+
+from src.cumulative_imputation_links import get_cumulative_links
+
+
+@pytest.fixture(scope="class")
+def cumulative_links_test_data():
+    return load_and_format(Path("tests") / "cumulative_links.csv")
+
+
+class TestComulativeLinks:
+    def test_get_cumulative_links_forward(self, cumulative_links_test_data):
+        input_data = cumulative_links_test_data.drop(
+            columns=["cumulative_forward_imputation_link", "imputation_group"]
+        )
+
+        expected_output = cumulative_links_test_data[
+            [
+                "imputation_group",
+                "cumulative_forward_imputation_link",
+            ]
+        ]
+
+        actual_output = get_cumulative_links(
+            input_data,
+            "f",
+            "strata",
+            "reference",
+            "target",
+            "period",
+            "forward_imputation_link",
+            1,
+        )
+
+        assert_frame_equal(actual_output, expected_output)
+
+    def test_get_cumulative_links_backward(self, cumulative_links_test_data):
+        input_data = cumulative_links_test_data.drop(
+            columns=["cumulative_backward_imputation_link", "imputation_group"]
+        )
+
+        expected_output = cumulative_links_test_data[
+            [
+                "imputation_group",
+                "cumulative_backward_imputation_link",
+            ]
+        ]
+
+        actual_output = get_cumulative_links(
+            input_data,
+            "b",
+            "strata",
+            "reference",
+            "target",
+            "period",
+            "backward_imputation_link",
+            1,
+        )
+
+        assert_frame_equal(actual_output, expected_output)
diff --git a/tests/test_flag_data.csv b/tests/test_flag_data.csv
new file mode 100755
index 00000000..2f97b47f
--- /dev/null
+++ b/tests/test_flag_data.csv
@@ -0,0 +1,29 @@
+identifier,date,group,question,other,ignore_from_link
+70001,202001,100,5951.0,39,False
+70001,202002,100,1814.0,39,False
+70001,202003,100,734.0,39,True
+70001,202004,100,96.0,39,False
+70001,202005,100,9086.0,39,True
+70001,202006,100,3949.0,39,False
+70001,202007,100,49.0,39,False
+70002,202001,100,6705.0,94,False
+70002,202002,100,48.0,94,False
+70002,202003,100,5361.0,94,False
+70002,202004,100,8767.0,94,False
+70002,202005,100,9214.0,94,False
+70002,202006,100,7467.0,94,False
+70002,202007,100,3475.0,94,False
+70003,202001,100,6153.0,42,False
+70003,202002,100,7711.0,42,False
+70003,202003,100,5403.0,42,False
+70003,202004,100,7445.0,42,False
+70003,202005,100,7092.0,42,False
+70003,202006,100,2038.0,42,False
+70003,202007,100,8768.0,42,False
+70004,202001,100,,6,False
+70004,202002,100,,6,False
+70004,202003,100,6288.0,6,False
+70004,202004,100,,6,False
+70004,202005,100,,6,False
+70004,202006,100,5875.0,6,False
+70004,202007,100,,6,False
diff --git a/tests/test_flag_filters.csv b/tests/test_flag_filters.csv
new file mode 100755
index 00000000..abdfb4c8
--- /dev/null
+++ b/tests/test_flag_filters.csv
@@ -0,0 +1,3 @@
+identifier,date
+70001,202003
+70001,202005
diff --git a/tests/test_forward_link.py b/tests/test_forward_link.py
new file mode 100644
index 00000000..51fa63c8
--- /dev/null
+++ b/tests/test_forward_link.py
@@ -0,0 +1,75 @@
+import pytest
+from helper_functions import load_and_format
+from pandas.testing import assert_frame_equal
+
+from src.forward_link import calculate_imputation_link
+
+scenarios = ["calculate_links_test_data"]
+
+
+@pytest.mark.parametrize("scenario", scenarios)
+class TestLinks:
+    def test_forward_links(self, scenario):
+        """Test if function returns the f_link column"""
+
+        df_output = load_and_format("tests/" + scenario + ".csv")
+
+        df_input = df_output.drop(columns=["f_link"])
+
+        df_input = calculate_imputation_link(
+            df_input,
+            "period",
+            "group",
+            "f_matched_pair",
+            "question",
+            "f_predictive_question",
+        )
+
+        assert_frame_equal(df_input, df_output, check_like=True)
+
+    def test_back_links(self, scenario):
+        """Test if function returns the b_link column"""
+        df_output = load_and_format("tests/" + scenario + ".csv")
+
+        df_input = df_output.drop(columns=["b_link"])
+
+        df_input = calculate_imputation_link(
+            df_input,
+            "period",
+            "group",
+            "b_matched_pair",
+            "question",
+            "b_predictive_question",
+        )
+
+        assert_frame_equal(df_input, df_output, check_like=True)
+
+    def test_exception(self, scenario):
+
+        df = load_and_format("tests/" + scenario + ".csv")
+
+        with pytest.raises(ValueError):
+            """
+            Test if function is called with wrong arguments, in particular
+            with f_matched_pair and b_predictive_question or with
+            b_matched_pair and f_predictive_question.
+            """
+
+            df = calculate_imputation_link(
+                df,
+                "period",
+                "group",
+                "f_matched_pair",
+                "question",
+                "b_predictive_question",
+            )
+        with pytest.raises(ValueError):
+
+            df = calculate_imputation_link(
+                df,
+                "period",
+                "group",
+                "b_matched_pair",
+                "question",
+                "f_predictive_question",
+            )
diff --git a/tests/test_imputation_flags.py b/tests/test_imputation_flags.py
new file mode 100644
index 00000000..315b5fa3
--- /dev/null
+++ b/tests/test_imputation_flags.py
@@ -0,0 +1,50 @@
+from pathlib import Path
+
+import pytest
+from helper_functions import load_and_format
+from pandas.testing import assert_frame_equal
+
+from src.imputation_flags import create_impute_flags, generate_imputation_marker
+
+
+@pytest.fixture(scope="class")
+def imputation_flag_test_data():
+    return load_and_format(Path("tests") / "imputation_flag_data.csv")
+
+
+class TestImputationFlags:
+    def test_create_impute_flags(self, imputation_flag_test_data):
+        df_expected_output = imputation_flag_test_data.copy()
+        df_expected_output.drop(["imputation_marker"], axis=1, inplace=True)
+        df_input = df_expected_output.copy()
+        df_input = df_input[
+            [
+                "reference",
+                "strata",
+                "period",
+                "target_variable",
+                "auxiliary",
+                "f_predictive_target_variable",
+                "b_predictive_target_variable",
+                "f_predictive_auxiliary",
+            ]
+        ]
+        df_output = create_impute_flags(
+            df=df_input,
+            target="target_variable",
+            reference="reference",
+            strata="strata",
+            auxiliary="auxiliary",
+            predictive_auxiliary="f_predictive_auxiliary",
+        )
+
+        df_expected_output.drop(["f_predictive_auxiliary"], axis=1, inplace=True)
+
+        assert_frame_equal(df_output, df_expected_output)
+
+    def test_imputation_marker(self, imputation_flag_test_data):
+        df_expected_output = imputation_flag_test_data.copy()
+        df_input = imputation_flag_test_data.copy()
+        df_input.drop("imputation_marker", axis=1, inplace=True)
+        df_output = generate_imputation_marker(df_input)
+        assert_frame_equal(df_output, df_expected_output)
diff --git a/tests/test_link_filter.py b/tests/test_link_filter.py
new file mode 100644
index 00000000..bbd5cc75
--- /dev/null
+++ b/tests/test_link_filter.py
@@ -0,0 +1,39 @@
+import pandas as pd
+import pytest
+from pandas.testing import assert_frame_equal
+
+from src.link_filter import flag_rows_to_ignore
+
+
+@pytest.mark.parametrize("scenario", ["test_flag_data"])
+@pytest.mark.parametrize("filters", ["test_flag_filters"])
+class TestFilters:
+    def test_basic_filter(self, scenario, filters):
+        """Test ignore_from_link is correct"""
+
+        df_output_expected = pd.read_csv("tests/" + scenario + ".csv")
+
+        df_filters = pd.read_csv("tests/" + filters + ".csv")
+
+        df_input = df_output_expected.drop(columns=["ignore_from_link"])
+
+        df_output = flag_rows_to_ignore(df_input, df_filters)
+
+        assert_frame_equal(df_output, df_output_expected)
+
+    def test_exception(self, scenario, filters):
+
+        """Test if function raises an exception when the columns in filters
+        do not exist in scenario."""
+
+        df_output_expected = pd.read_csv("tests/" + scenario + ".csv")
+
+        df_filters = pd.read_csv("tests/" + filters + ".csv")
+
+        df_input = df_output_expected.drop(columns=["ignore_from_link"])
+
+        with pytest.raises(ValueError):
+
+            df_filters.columns = df_filters.columns + "_fail"
+
+            flag_rows_to_ignore(df_input, df_filters)