-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #71 from Glitchy-Tozier/Small-Fixes
Add/improve scripts
- Loading branch information
Showing
4 changed files
with
153 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
""" | ||
This script checks your layout-config and removes all impossible ngrams from row specified ngrams-directory. | ||
""" | ||
|
||
import yaml | ||
import os | ||
import shutil | ||
|
||
|
||
def load_yaml_from_file(yaml_file): | ||
with open(yaml_file, "r") as file: | ||
yaml_code = file.read() | ||
yaml_data = yaml.safe_load(yaml_code) | ||
return yaml_data | ||
|
||
|
||
def filter_ngrams(layout_chars, ngram_dir, output_dir): | ||
os.makedirs(output_dir, exist_ok=True) | ||
count = 0 | ||
|
||
for i, filename in enumerate(os.listdir(ngram_dir)): | ||
if filename.endswith(".txt"): | ||
input_filepath = os.path.join(ngram_dir, filename) | ||
output_filepath = os.path.join(output_dir, filename) | ||
with open(input_filepath, "r") as input_file, open( | ||
output_filepath, "w" | ||
) as output_file: | ||
print("Processing", input_filepath, "→", output_filepath) | ||
for line in input_file: | ||
# Split line into frequency and ngram | ||
frequency, ngram = line.split(" ", 1) | ||
ngram = ngram[:-1] | ||
# ngram = ngram.rstrip() # Remove trailing whitespace and newline | ||
|
||
valid_ngram = True | ||
for char in ngram: | ||
if char not in layout_chars: | ||
valid_ngram = False | ||
break | ||
|
||
""" if (not valid_ngram) and (count < 100) and (filename[0] == "1"): | ||
count += 1 | ||
print(count, frequency, ngram, valid_ngram) """ | ||
|
||
if valid_ngram: | ||
output_file.write(line) | ||
|
||
|
||
# Example usage | ||
yaml_file = "config/keyboard/my_keyboard_config.yml" # Specify the keyboard-config here | ||
ignore_in_layout = "☒■⇩⇘⇧⇗♕⇇↜⇉↝♛" # ♔ | ||
ngram_dir = "ngrams/made_up_dir" | ||
output_dir = "ngrams/made_up_dir_reduced" | ||
|
||
yaml_data = load_yaml_from_file(yaml_file) | ||
layout_chars = set() | ||
print("\nCharacters in Layout:") | ||
for row in yaml_data["base_layout"]["keys"]: | ||
for key in row: | ||
print(key) | ||
for c in key: | ||
layout_chars.add(c) | ||
|
||
for c in ignore_in_layout: | ||
layout_chars.remove(c) | ||
|
||
filter_ngrams(layout_chars, ngram_dir, output_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import argparse | ||
import sys | ||
import os | ||
|
||
|
||
def main(ngrams_directory): | ||
filenames = [ | ||
os.path.join(ngrams_directory, "1-grams.txt"), | ||
os.path.join(ngrams_directory, "2-grams.txt"), | ||
os.path.join(ngrams_directory, "3-grams.txt") | ||
] | ||
|
||
for filename in filenames: | ||
fTot = 0 | ||
f = [] | ||
l = [] | ||
with open(filename) as ngrams: | ||
i = 0 | ||
for ngram in ngrams: | ||
freqStr, letters = ngram.split(" ", 1) | ||
freq = float(freqStr) | ||
|
||
f.append(freq) | ||
l.append(letters) | ||
fTot += freq | ||
|
||
with open(filename, "w") as ngrams: | ||
for freq, ngram in zip(f, l): | ||
ngrams.write(str(100 * freq / fTot) + " " + ngram) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description="Normalize n-gram frequencies in a directory of n-gram files." | ||
"Normalization converts absolute frequencies into percentages of " | ||
"how often an n-gram occurs within the corpus.") | ||
parser.add_argument("ngrams_directory", help="Path to the directory containing the n-gram files.") | ||
args = parser.parse_args() | ||
|
||
if not os.path.isdir(args.ngrams_directory): | ||
print("Error: Invalid n-gram directory path. Please provide a valid directory path.", file=sys.stderr) | ||
sys.exit(1) | ||
|
||
main(args.ngrams_directory) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,53 @@ | ||
""" | ||
This script removes all the duplicate layouts from your `solutions.txt`-file. | ||
This script removes all the duplicate layouts from a specified file. | ||
""" | ||
|
||
def main(): | ||
import argparse | ||
import os | ||
|
||
|
||
def remove_duplicates(filename): | ||
originalCount = 0 | ||
uniqueLayouts = [] # A list is used instead of a set to preserve ordering. | ||
uniqueLayouts = [] # A list is used instead of a set to preserve ordering. | ||
|
||
# Fill up [uniqueLayouts]. | ||
with open("../solutions.txt") as layouts: | ||
with open(filename) as layouts: | ||
for layout in layouts: | ||
originalCount += 1 | ||
if layout not in uniqueLayouts: | ||
uniqueLayouts.append(layout) | ||
|
||
if originalCount == len(uniqueLayouts): | ||
print ("There are no duplicate Layouts.") | ||
print("There are no duplicate Layouts.") | ||
else: | ||
# Write all unique layouts to the same file, replacing the old text. | ||
with open("../solutions.txt", "w") as layouts: | ||
with open(filename, "w") as layouts: | ||
for layout in uniqueLayouts: | ||
layouts.write(layout) | ||
|
||
# Display results | ||
print("Updated file!") | ||
print("Original count:", originalCount,) | ||
print("Original count:", originalCount) | ||
print("New count: ", len(uniqueLayouts)) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
description="Remove duplicate layouts from a file." | ||
) # Create an argument parser | ||
parser.add_argument( | ||
"filename", help="Name of the file to process" | ||
) # Add the required unnamed command-line parameter for the filename | ||
args = parser.parse_args() # Parse the command-line arguments | ||
|
||
# Check if the file exists | ||
if not os.path.exists(args.filename): | ||
print(f"Error: The file '{args.filename}' does not exist.") | ||
exit(1) | ||
|
||
# Call the function to remove duplicates | ||
remove_duplicates(args.filename) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |