Skip to content

Commit

Permalink
Merge pull request #1 from raspberrypilearning/scratchblocks
Browse files Browse the repository at this point in the history
Scratchblocks
  • Loading branch information
PhilipHarney authored Jul 10, 2019
2 parents 103e233 + 1fec595 commit ef683ab
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 4 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ You can uninstall nttt using:
pip3 uninstall nttt
```

### scratchblocks translations - chrome driver

To use the scratchblocks auto translations you will also need to install the [selenium](https://pypi.org/project/selenium/) chrome driver.

1. Download the chrome driver from [sites.google.com/a/chromium.org/chromedriver/downloads](https://sites.google.com/a/chromium.org/chromedriver/downloads).

![chrome driver](images/chrome_driver1.png)

![chrome driver](images/chrome_driver2.png)

2. Open the zip file and copy the `chromedriver.exe` to `C:\Windows`.

![chrome driver](images/chrome_driver3.png)

## Usage

NTTT is a command line tool, called using `nttt`.
Expand Down
Binary file added images/allow_internet_access.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/chrome_driver1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/chrome_driver2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/chrome_driver3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
286 changes: 286 additions & 0 deletions nttt/scratchblocks_translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
import codecs
import os.path
from re import match, split
from time import sleep
from urllib.parse import quote, urlparse, parse_qs, unquote

from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options

SCRATCHBLOCKS_TRANSLATION_URL = "https://scratchblocks.github.io/translator/#?lang={}&script={}"

# The DOM ID of the element containing the translated output on the scratchblocks webpage
# Note that we have to pull it from the href of this link as it's not readily accessable from the textbox
OUTPUT_ELEMENT_ID = "home-link-2"

OUTPUT_CODE_QUERY_PARAM = "script"

TRANSLATIONS_DIRECTORY = "scratchblocks-translations"

CODE_START_INDICATOR_MARKDOWN = ["```blocks3"]

CODE_END_INDICATOR_MARKDOWN = ["```"]

LEADING_CHARACTERS_REGEX = "^[ +-]+"


# Create a selenium browser instance
def create_selenium_chrome_browser():
browser_options = Options()
browser = Chrome(options=browser_options)

return browser


# Find files with any of the provided extensions in a given directory
def find_files(directory, file_extensions=[]):
found_files = []

# Get a list of all the files in the directory, and any child directories
# Iterate over them and check for matches to the listed extensions

if len(file_extensions) == 0:
return file_extensions

for root, directories, filenames in os.walk(directory):

for filename in filenames:

file_path = os.path.join(root, filename)

file_extension = os.path.splitext(file_path)[1][1:]

if file_extension in file_extensions:
found_files.append(file_path)

return found_files


# Build project directory
def get_project_directory(base_directory, repository_name):
return os.path.join(base_directory, repository_name)


# Build language directory within project
def get_language_directory(project_directory, language_code):
return os.path.join(project_directory, language_code)


# Get markdown files from project
def get_markdown_files(language_directory):
return find_files(language_directory, ["md"])


# Get the code blocks from inside a markdown file
def get_code_blocks_from_markdown_file(file_path):
# The blocks of code without their surrounding markdown, for uploading to the translator
code_blocks = []

# The blocks of code with their surrounding markdown, for storing away
markdown_blocks = []

# Boolean to track if we're inside a code block as we iterate over the code line-by-line
in_code_block = False

# Temporary strings to hold the markdown and code blocks as they are built
current_code_block = ""

current_markdown_block = ""

with codecs.open(file_path, encoding='utf-8') as file:

# Look for the markdown indicators of a code block starting
for line in file.readlines():

clean_line = line.strip().replace(" ", "")

if clean_line in CODE_START_INDICATOR_MARKDOWN:
in_code_block = True

# Start a new markdown block
current_markdown_block = line

# Empty the temp code block out
current_code_block = ""

elif clean_line in CODE_END_INDICATOR_MARKDOWN and in_code_block:

current_markdown_block += line

markdown_blocks.append(current_markdown_block)

code_blocks.append(current_code_block)

in_code_block = False

elif in_code_block:

current_markdown_block += line

current_code_block += line

return code_blocks, markdown_blocks


# Clean code for translation and return a matching list of addition/removal markers and line numbers
def strip_modification_markers(code):
code_list = code.split("\n")

modifier_characters = []

for line_number in range(0, len(code_list) - 1):
modifiers_on_line = match(LEADING_CHARACTERS_REGEX, code_list[line_number])

modifiers = modifiers_on_line.group(0) if modifiers_on_line else ""

modifier_characters.append(modifiers)

code_list[line_number] = code_list[line_number][len(modifiers):]

return "\n".join(code_list), modifier_characters


# Restore the modifier characters to code, for use post-translation
def restore_modification_markers(code, modifier_characters):
code_list = code.split("\n")

# Cover for an un-closed loop by adding some spaces to the modifiers:
if len(modifier_characters) == len(code_list) - 1:
proxy_modifier = split(modifier_characters[0], "+ | -")[-1]

modifier_characters.append(proxy_modifier)

for line_number in range(0, len(code_list)):
code_list[line_number] = "{}{}".format(modifier_characters[line_number], code_list[line_number])

return "\n".join(code_list)


# Get translations from the web
def fetch_translated_code(browser, english_code, target_language_iso_code):
# Make the code URL safe
url_safe_code = quote(english_code)

# Construct destination URL for translation page
destination_url = SCRATCHBLOCKS_TRANSLATION_URL.format(target_language_iso_code, url_safe_code)

browser.get(destination_url)

sleep(2)

output_link = browser.find_element_by_id(OUTPUT_ELEMENT_ID).get_attribute("href")

parsed_output_link = urlparse(output_link)

return unquote(parse_qs(parsed_output_link.fragment)[OUTPUT_CODE_QUERY_PARAM][0])


def make_directory(directory_path):
try:
os.mkdir(directory_path)
except OSError as e:
print(e)
print("Creation of directory %s failed" % directory_path)


def make_translation_directories(base_language_directory):
# Create translations directory inside language directory
block_translations_directory = os.path.join(base_language_directory, TRANSLATIONS_DIRECTORY)

make_directory(block_translations_directory)

# Create a folder for the en blocks and a folder for the translated blocks inside that

source_blocks = os.path.join(block_translations_directory, "original")

translated_blocks = os.path.join(block_translations_directory, "translated")

make_directory(source_blocks)

make_directory(translated_blocks)

return {"source": source_blocks, "translated": translated_blocks}


def make_code_file(parent_directory, file_name, contents):
file_path = os.path.join(parent_directory, file_name)

with open(file_path, "w") as file:
file.write(contents)


def file_find_replace(file_path, text_pairs=[]):
with open(file_path, "r") as file:
file_text = file.read()

for pair in text_pairs:
file_text = file_text.replace(pair[0], pair[1])

with open(file_path, "w") as file:
file.write(file_text)


def get_markdown_filename(filepath, language_directory, snippet_number):
filename = str(filepath).replace(str(language_directory), "")

output_filename = filename.replace(".md", "").replace("/", "__")

return "{}_block_{}.txt".format(output_filename, snippet_number)


# Main translation method

def translate_blocks(language_directory, language):
translation_directories = make_translation_directories(language_directory)

files = get_markdown_files(language_directory)

my_browser = create_selenium_chrome_browser()

for file_number in range(0, len(files)):

blocks = get_code_blocks_from_markdown_file(files[file_number])

code = blocks[0]
markdown = blocks[1]

translated_pairs = []

for snippet_number in range(0, len(code)):
cleared_markers = strip_modification_markers(code[snippet_number])

translation = fetch_translated_code(my_browser, cleared_markers[0], language)

source_markdown = markdown[snippet_number]

markdown_list = source_markdown.split("\n")

translated_markdown = "{}\n{}\n{}\n".format(
markdown_list[0],
restore_modification_markers(
translation,
cleared_markers[1]
),
markdown_list[-2]
)

markdown_filename = get_markdown_filename(files[file_number], language_directory, snippet_number)

make_code_file(
translation_directories["source"],
markdown_filename,
source_markdown
)

make_code_file(
translation_directories["translated"],
markdown_filename,
translated_markdown
)

translated_pairs.append((source_markdown, translated_markdown))

# Find/replace in original file
file_find_replace(files[file_number], translated_pairs)

my_browser.close()
17 changes: 16 additions & 1 deletion nttt/tidyup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
from pathlib import Path

from .scratchblocks_translate import translate_blocks

def fix_meta(src, dst):
find_replace(src, dst, " - \r\n title:", " - title:")

Expand Down Expand Up @@ -85,7 +87,20 @@ def tidyup_translations(folder, output_folder):
else:
fix_step(source_file_path, output_file_path)

print("Complete")
print("About to translate scratchblocks:")

trasnlate_scratchblocks_yn = input("Continue (y/n):")

if trasnlate_scratchblocks_yn.casefold() == "y":

target_folder = folder
target_language = str(os.path.split(target_folder)[1]).split('-')[0]

translate_blocks(target_folder, target_language)

print("Translated scratchblocks")

print("Complete")

else:
print("No files found in '{}'".format(folder))
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@

__project__ = 'nttt'
__desc__ = 'A utility for Nina to clean up translated projects'
__version__ = '0.1.0'
__version__ = '0.2.0'
__author__ = "Martin O'Hanlon"
__author_email__ = '[email protected]'
__url__ = 'https://github.com/raspberrypilearning/nttt'
#__requires__ = []
__requires__ = ["selenium"]

if __name__ == '__main__':
setup(name='nttt',
Expand All @@ -25,7 +25,7 @@
author = __author__,
author_email = __author_email__,
packages = [__project__],
#install_requires = __requires__,
install_requires = __requires__,
entry_points={
'console_scripts': [
'nttt = nttt:main'
Expand Down

0 comments on commit ef683ab

Please sign in to comment.