From 96f794d628322daa8b268a807dfbfb931583324b Mon Sep 17 00:00:00 2001 From: NSoiffer Date: Sun, 7 Jul 2024 20:57:17 +0100 Subject: [PATCH] Add support for wasm with unzipping of language dirs Now all files and langauges are available --- src/prefs.rs | 76 ++++------ src/shim_filesystem.rs | 324 +++++++++++++++++++++++++---------------- src/speech.rs | 42 +----- 3 files changed, 232 insertions(+), 210 deletions(-) diff --git a/src/prefs.rs b/src/prefs.rs index 3c37d399..7d5ff113 100644 --- a/src/prefs.rs +++ b/src/prefs.rs @@ -29,7 +29,6 @@ use crate::speech::{as_str_checked, RulesFor, FileAndTime}; use std::collections::{HashMap, HashSet}; use phf::phf_set; use crate::shim_filesystem::*; -use zip::ZipArchive; use crate::errors::*; /// Use to indicate preference not found with Preference::to_string() @@ -259,6 +258,12 @@ impl PreferenceManager { /// /// If rules_dir is an empty PathBuf, the existing rules_dir is used (an error if it doesn't exist) pub fn initialize(&mut self, rules_dir: PathBuf) -> Result<()> { + #[cfg(not(target_family = "wasm"))] + let rules_dir = match rules_dir.canonicalize() { + Err(e) => bail!("set_rules_dir: could not canonicalize path {}: {}", rules_dir.display(), e.to_string()), + Ok(rules_dir) => rules_dir, + }; + self.set_rules_dir(&rules_dir)?; self.set_preference_files()?; self.set_all_files(&rules_dir)?; @@ -439,7 +444,7 @@ impl PreferenceManager { /// Returns true if it unzipped them pub fn unzip_files(path: &Path, language: &str) -> Result { thread_local!{ - /// when a language/braille code dir is unzipped, it is marked here + /// when a language/braille code dir is unzipped, it is recorded here static UNZIPPED_FILES: RefCell> = RefCell::new( HashSet::with_capacity(31)); } @@ -447,35 +452,19 @@ impl PreferenceManager { None => return Ok(false), Some(dir) => dir, }; - let zip_file = dir.join(language.to_string() + ".zip"); - let zip_file_string = zip_file.to_string_lossy().to_string(); - if UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow().contains(&zip_file.to_string_lossy().to_string())) { + let zip_file_name = language.to_string() + ".zip"; + let zip_file_path = dir.join(&zip_file_name); + let zip_file_string = zip_file_path.to_string_lossy().to_string(); + if UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow().contains(&zip_file_string)) { return Ok(false); } - // Fix: Need to add wasm version (can't use read_to_string because it wants UTF-8) - let archive = match std::fs::read(&zip_file) { - Err(e) => { - // maybe started out with all the files unzipped? - match read_dir_shim(path) { - Err(e) => bail!("unzip_files: couldn't read directory '{}'\n error is {}", path.display(), e.to_string()), - Ok(read_dir_iter) => { - if read_dir_iter.count() > 1 { - UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow_mut().insert(zip_file_string) ); - return Ok(false); - } - bail!("Couldn't open zip file {}. {}", zip_file.to_str().unwrap(), e.to_string()); - } - } - }, - Ok(contents) => std::io::Cursor::new(contents), - }; - - let mut zip_archive = ZipArchive::new(archive).unwrap(); - // FIX: this needs to have a shim for wasm version - zip_archive.extract(dir).expect("Zip extraction failed"); + let result = match zip_extract_shim(&dir, &zip_file_name) { + Err(e) => bail!("Couldn't open zip file {}: {}.", zip_file_string, e), + Ok(result) => result, + }; UNZIPPED_FILES.with( |unzipped_files| unzipped_files.borrow_mut().insert(zip_file_string) ); - return Ok(true); + return Ok(result); } /// Set BlockSeparators and DecimalSeparators @@ -581,7 +570,7 @@ impl PreferenceManager { } if let Some(result) = alternative_style_file { - debug!("find_file: found alternative_style_file '{}'", result.to_string_lossy()); + // debug!("find_file: found alternative_style_file '{}'", result.to_string_lossy()); return Ok(result); // found an alternative style file in the same lang dir } @@ -600,14 +589,10 @@ impl PreferenceManager { fn find_any_style_file(path: &Path) -> Result { // try to find a xxx_Rules.yaml file // we find the first file because this is the deepest (most language specific) speech rule file - for entry in read_dir_shim(path)?.flatten() { - let alt_file_path = entry.path(); - let file_name = alt_file_path.to_string_lossy(); - if is_file_shim(&alt_file_path) && file_name.ends_with("_Rules.yaml") { - return Ok(alt_file_path); - } + match find_file_in_dir_that_ends_with_shim(path, "_Rules.yaml") { + None => bail!{"didn't find file"}, + Some(file_name) => return Ok(PathBuf::from(file_name)), } - bail!{"didn't find file"} } } @@ -615,20 +600,15 @@ impl PreferenceManager { // return 'Rules/Language/fr', 'Rules/Language/en/gb', etc, if they exist. // fall back to main language, and then to default_dir if language dir doesn't exist let mut full_path = rules_dir.to_path_buf(); - let lang_parts = lang.split('-'); - for part in lang_parts { - full_path.push(Path::new(part)); - if !is_dir_shim(&full_path) { - break; + full_path.push(lang.replace('-', "/")); + for parent in full_path.ancestors() { + if is_dir_shim(parent) { + return Some(parent.to_path_buf()); + } else if parent == rules_dir { + return None } } - - // make sure something got added... - if rules_dir == full_path { - return None; // didn't find a dir - } else { - return Some(full_path); - } + return None; // shouldn't happen } @@ -717,7 +697,7 @@ impl PreferenceManager { bail!("{} is an unknown MathCAT preference!", key); } - debug!("Setting ({}) {} to '{}'", if is_user_pref {"user"} else {"sys"}, key, value); + // debug!("Setting ({}) {} to '{}'", if is_user_pref {"user"} else {"sys"}, key, value); if is_user_pref { // a little messy about the DecimalSeparator due immutable and mutable borrows let current_decimal_separator = self.user_prefs.prefs.get("DecimalSeparator").unwrap().clone(); diff --git a/src/shim_filesystem.rs b/src/shim_filesystem.rs index 139fff39..731ce8e9 100644 --- a/src/shim_filesystem.rs +++ b/src/shim_filesystem.rs @@ -6,6 +6,8 @@ //! but changes are pretty rare and it didn't seem worth it (this may need to be revisited). use std::path::{Path, PathBuf}; +use crate::errors::*; + // The zipped files are needed by WASM builds. // However, they are also useful for other builds because there really isn't another good way to get at the rules. @@ -17,88 +19,136 @@ pub static ZIPPED_RULE_FILES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"),"/r cfg_if! { if #[cfg(target_family = "wasm")] { + // For the WASM build, we build a fake file system based on ZIPPED_RULE_FILES. + // That stream encodes other zip files that must be unzipped. + + // We have a problem in that ZIPPED_RULE_FILES has a static lifetime but the contained zip files, when unzipped, are on the stack with a different lifetime. + // One solution would be to introduce an enum that forks between the two. + // The slightly hacky but slightly less code solution that is adopted is to use Option<>, with None representing the static case + // Note: Rc is used because there are borrowing/lifetime issues without being able to clone the data that goes into the HashMap use std::cell::RefCell; + use std::rc::Rc; + use std::io::Cursor; + use std::io::Read; + use std::collections::{HashSet, HashMap}; - fn file_system_type_from(path: &Path) -> Option<&str> { - // Return "file" or "dir" if a match, otherwise None - use sxd_document::dom::*; - use std::path::Component; - use crate::interface::get_element; - use crate::canonicalize::name; + #[derive(Debug)] + struct FilesEntry { + data: Rc>>, + index: usize, + } + thread_local! { + // mapping the file names to whether they are are directory or a file (if a file, where to find it in the zip archive) + static DIRECTORIES: RefCell> = RefCell::new(HashSet::with_capacity(31)); + static FILES: RefCell> = RefCell::new(HashMap::with_capacity(511)); + } - return DIRECTORY_TREE.with(|files| { - let files = files.borrow(); - let files = get_element(&*files); - // the path should be "Rules/..." - // we don't use a for loop because if we hit a file, we need to check if there are further components (hence, no match) - let mut children = vec![ChildOfElement::Element(files)]; - let mut components = path.components(); - let mut next_component = components.next(); - let mut matched_dir = None; - // debug!("path='{}'", path.to_str().unwrap()); - while let Some(component) = next_component { - if let Component::Normal(os_str) = component { - let component_name = os_str.to_str().unwrap(); - matched_dir = None; - for child in &children { - if let ChildOfElement::Element(child) = child { - if child.attribute_value("name").unwrap() == component_name { - if name(&child) == "dir" { - matched_dir = Some("dir"); - children = child.children(); - break; - } else { // name = "file" - return if components.next().is_none() {Some("file")} else {None}; - } - } + fn read_zip_file(containing_dir: &Path, zip_file: Option>) -> Result<()> { + // Return "file" or "dir" if a match, otherwise None + let zip_file = Rc::new(zip_file); + FILES.with(|files| { + let mut files = files.borrow_mut(); + DIRECTORIES.with(|dirs| { + let mut dirs = dirs.borrow_mut(); + let mut archive = match zip_file.as_ref() { + None => { + let buf_reader = Cursor::new(ZIPPED_RULE_FILES); + zip::ZipArchive::new(buf_reader).unwrap() + }, + Some(zip_file) => { + let buf_reader = Cursor::new((zip_file).as_ref()); + match zip::ZipArchive::new(buf_reader) { + Err(e) => bail!("read_zip_file: failed to create ZipArchive in dir {}: {}", containing_dir.display(), e), + Ok(archive) => archive, + } + } + }; + for i in 0..archive.len() { + let file = archive.by_index(i).unwrap(); + // A little bit of safety/sanity checking + let path = match file.enclosed_name() { + Some(path) => containing_dir.to_path_buf().join(path), + None => { + bail!("Entry {} has a suspicious path (outside of archive)", file.name()); } }; - if matched_dir.is_none() { - return matched_dir; + // debug!("read_zip_file: file path='{}'", path.display()); + // add all the dirs up to the containing dir -- skip the first one as that is a file + // for files like unicode.yaml, this loop is a no-op, but for files in the Shared folder, it will go one time. + for parent in path.ancestors().skip(1) { + if parent == containing_dir { + break; + } + dirs.insert(parent.to_str().unwrap_or_default().to_string()); + } + if file.is_file() { + files.insert(path.to_str().unwrap_or_default().to_string(), FilesEntry{ data: zip_file.clone(), index: i}); + } else if file.is_dir() { + dirs.insert(path.to_str().unwrap_or_default().to_string()); + } else { + bail!("read_zip_file: {} is neither a file nor a directory", path.display()); } - } else { - error!("Expected Component::Normal, found {:?}", component); - return None; }; - next_component = components.next(); - }; - // ran out of components -- must be at a "dir" - return matched_dir; - }); + // debug!("files={:?}", files.keys()); + // debug!("dirs={:?}", dirs); + return Ok( () ); + }) + }) } pub fn is_file_shim(path: &Path) -> bool { - let fs = file_system_type_from(path); - return match fs { - None => false, - Some(fs) => fs == "file", - }; + if FILES.with(|files| files.borrow().is_empty()) { + let empty_path = PathBuf::new(); + read_zip_file(&empty_path, None).unwrap_or(()); + } + return FILES.with(|files| files.borrow().contains_key(path.to_str().unwrap_or_default()) ); } pub fn is_dir_shim(path: &Path) -> bool { - let fs = file_system_type_from(path); - return match fs { - None => false, - Some(fs) => fs == "dir", - }; + if FILES.with(|files| files.borrow().is_empty()) { + let empty_path = PathBuf::new(); + read_zip_file(&empty_path, None).unwrap_or(()); + } + return DIRECTORIES.with(|dirs| dirs.borrow().contains(path.to_str().unwrap_or_default()) ); } - pub fn read_dir_shim(path: &Path) -> Result>> { - return Ok(std::iter::empty::>()); + + pub fn find_file_in_dir_that_ends_with_shim(dir: &Path, ending: &str) -> Option { + // FIX: this is very inefficient -- maybe gather up all the info in read_zip_file()? + // look for files that have 'path' as a prefix + return FILES.with(|files| { + let files = files.borrow(); + + let dir_name = dir.to_str().unwrap_or_default(); + for file_name in files.keys() { + if file_name.strip_prefix(dir_name).is_some() && file_name.ends_with(ending) { + return Some(file_name.clone()); + }; + } + return None; + }); } pub fn canonicalize_shim(path: &Path) -> std::io::Result { - // FIX: need to deal with ".."??? - return Ok( path.to_path_buf() ); + use std::ffi::OsStr; + let dot_dot = OsStr::new(".."); + let mut result = PathBuf::new(); + for part in path.iter() { + if dot_dot == part { + result.pop(); + } else { + result.push(part); + } + } + return Ok(result); } - pub fn read_to_string_shim(path: &Path) -> Result { - use std::io::Cursor; - use std::io::Read; - - let file_name = path.to_str().unwrap().replace("/", "\\"); + pub fn read_to_string_shim(path: &Path) -> Result { + let path = canonicalize_shim(path).unwrap(); // can't fail + let file_name = path.to_str().unwrap_or_default(); + // Is this the debugging override? if let Some(contents) = OVERRIDE_FILE_NAME.with(|override_name| { - if file_name.as_str() == override_name.borrow().as_str() { + if file_name == override_name.borrow().as_str() { debug!("override read_to_string_shim: {}",file_name); return OVERRIDE_FILE_CONTENTS.with(|contents| return Some(contents.borrow().clone())); } else { @@ -107,22 +157,64 @@ cfg_if! { }) { return Ok(contents); }; + debug!("read_to_string_shim: {}",file_name); + + return FILES.with(|files| { + let files = files.borrow(); + let zip_file = match files.get(file_name) { + None => bail!("Didn't find file '{}'", file_name), + Some(data) => data, + }; + let mut archive = match zip_file.data.as_ref() { + None => { + let buf_reader = Cursor::new(ZIPPED_RULE_FILES); + zip::ZipArchive::new(buf_reader).unwrap() + }, + Some(zip_file) => { + let buf_reader = Cursor::new((zip_file).as_ref()); + zip::ZipArchive::new(buf_reader).unwrap() + } + }; + // for name in archive.file_names() { + // debug!(" File: {}", name); + // }; + let mut file = match archive.by_index(zip_file.index) { + Ok(file) => file, + Err(..) => { + panic!("Didn't find {} in zip archive", file_name); + } + }; + + let mut contents = String::new(); + if let Err(e) = file.read_to_string(&mut contents) { + bail!("read_to_string: {}", e); + } + return Ok(contents); + }); + } + + pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result { + let zip_file_path = dir.join(zip_file_name); + let full_zip_file_name = zip_file_path.to_str().unwrap_or_default(); + + // first, extract full_zip_file_name from ZIPPED_RULE_FILES let buf_reader = Cursor::new(ZIPPED_RULE_FILES); let mut archive = zip::ZipArchive::new(buf_reader).unwrap(); - // for name in archive.file_names() { - // debug!(" File: {}", name); - // }; - let mut file = match archive.by_name(&file_name) { + let mut file = match archive.by_name(full_zip_file_name) { Ok(file) => file, Err(..) => { - panic!("Didn't find {} in zip archive", file_name); + bail!("Didn't find {} in dir {} in zip archive", zip_file_name, dir.display()); } }; - let mut contents = String::new(); - file.read_to_string(&mut contents).unwrap(); - return Ok(contents); + // now add them to FILES + let mut zip_file_bytes: Vec = Vec::with_capacity(file.size() as usize); + if let Err(e) = file.read_to_end(&mut zip_file_bytes) { + bail!("Failed to extract file {} (size={}): {}", zip_file_path.display(), file.size(), e); + } + read_zip_file(dir, Some(zip_file_bytes))?; + return Ok(true); } thread_local! { @@ -134,62 +226,9 @@ cfg_if! { // file_name should be path name starting at Rules dir: e.g, "Rules/en/navigate.yaml" OVERRIDE_FILE_NAME.with(|name| *name.borrow_mut() = file_name.to_string().replace("/", "\\")); OVERRIDE_FILE_CONTENTS.with(|contents| *contents.borrow_mut() = file_contents.to_string()); - crate::speech::SpeechRules::invalidate(); + crate::interface::set_rules_dir("Rules".to_string()).unwrap(); // force reinitialization after the change } - - use sxd_document::parser; - use sxd_document::Package; - thread_local! { - // FIX: use include! macro (static DIRECTORY_TREE: ... = include!(...)) - // The file to include would be the result of something added to build.rs to create directory.xml that mimics what's below - static DIRECTORY_TREE: RefCell = RefCell::new( - parser::parse(r" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ") - .expect("Internal error in creating web assembly files: didn't parse initializer string") - ); - } } else { - use crate::errors::*; - pub fn is_file_shim(path: &Path) -> bool { return path.is_file(); } @@ -198,8 +237,22 @@ cfg_if! { return path.is_dir(); } - pub fn read_dir_shim(path: &Path) -> Result>> { - return path.read_dir().chain_err(|| format!("while trying to read directory {}", path.to_str().unwrap())); + pub fn find_file_in_dir_that_ends_with_shim(dir: &Path, ending: &str) -> Option { + match dir.read_dir() { + Err(_) => return None, // empty + Ok(read_dir) => { + for dir_entry in read_dir { + if let Ok(entry) = dir_entry { + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); // avoid temp value being dropped + if file_name.ends_with(ending) { + return Some(file_name.to_string()); + } + } + } + return None; + } + } } pub fn canonicalize_shim(path: &Path) -> std::io::Result { @@ -216,6 +269,25 @@ cfg_if! { Ok(str) => return Ok(str), Err(e) => bail!("Read error while trying to read {}: {}", &path.display(), e), } - } + } + + pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result { + let zip_file = dir.join(zip_file_name); + return match std::fs::read(&zip_file) { + Err(e) => { + // no zip file? -- maybe started out with all the files unzipped? See if there is a .yaml file + match find_file_in_dir_that_ends_with_shim(&dir, ".yaml") { + None => bail!("{}", e), + Some(_file_name) => Ok(false), + } + }, + Ok(contents) => { + let archive = std::io::Cursor::new(contents); + let mut zip_archive = zip::ZipArchive::new(archive).unwrap(); + zip_archive.extract(dir).expect("Zip extraction failed"); + Ok(true) + }, + }; + } } } diff --git a/src/speech.rs b/src/speech.rs index b25bd54b..b95e7331 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -254,7 +254,7 @@ pub fn process_include(current_file: &Path, new_file_name: &str, mut read_new // get the subdir ...Rules/Braille/en/... // could have ...Rules/Braille/definitions.yaml, so 'next()' doesn't exist in this case, but the file wasn't zipped up if let Some(subdir) = new_file.strip_prefix(unzip_dir).unwrap().iter().next() { - PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap())?; + PreferenceManager::unzip_files(unzip_dir, subdir.to_str().unwrap()).ok(); // ok if err/doesn't exist } } } @@ -1943,11 +1943,14 @@ impl FilesAndTimes { pub fn is_file_up_to_date(&self, pref_path: &Path, should_ignore_file_time: bool) -> bool { // if the time isn't set or the path is different from the prefernce (which might have changed), return false - if self.ft.is_empty() || self.ft[0].time == SystemTime::UNIX_EPOCH || self.as_path() != pref_path { + if self.ft.is_empty() || self.as_path() != pref_path { return false; } if should_ignore_file_time || cfg!(target_family = "wasm") { - return !self.ft.is_empty(); + return true; + } + if self.ft[0].time == SystemTime::UNIX_EPOCH { + return false; } @@ -2226,39 +2229,6 @@ impl SpeechRules { } -cfg_if! { - if #[cfg(target_family = "wasm")] { - pub fn invalidate_all() { - SPEECH_RULES.with( |rules| { - let mut rules = rules.borrow_mut(); - rules.rule_files.invalidate(); - rules.unicode_short_files.borrow_mut().invalidate(); - rules.unicode_full_files.borrow_mut().invalidate(); - rules.definitions_files.borrow_mut().invalidate(); - }); - BRAILLE_RULES.with( |rules| { - let mut rules = rules.borrow_mut(); - rules.rule_files.invalidate(); - rules.unicode_short_files.borrow_mut().invalidate(); - rules.unicode_full_files.borrow_mut().invalidate(); - rules.definitions_files.borrow_mut().invalidate(); - }); - - // these share the unicode and def files with SPEECH_RULES, so need to invalidate them - NAVIGATION_RULES.with( |rules| { - rules.borrow_mut().rule_files.invalidate(); - }); - OVERVIEW_RULES.with( |rules| { - rules.borrow_mut().rule_files.invalidate(); - }); - INTENT_RULES.with( |rules| { - rules.borrow_mut().rule_files.invalidate(); - }); - } - } -} - - /// We track three different lifetimes: /// 'c -- the lifetime of the context and mathml /// 's -- the lifetime of the speech rules (which is static)