From bce695d81e4c9e26075111bd6a31f67897a6c941 Mon Sep 17 00:00:00 2001 From: Benjamin Stern Date: Fri, 2 Oct 2020 06:53:15 +0300 Subject: [PATCH] Various Tweaks and Enhancements - Added Parsing of Links for the Gemara - Added Index for Chapter on TopicTextId and Path - Fixed Id not setting correctly - Added Chapter TextTopicId --- Converter/MainWindow.xaml.cs | 13 ++++- Converter/Model/SQLite/Chapter.cs | 4 ++ Converter/Model/SQLite/Text.cs | 4 ++ Converter/SefariaMongoDBService.cs | 53 +++++++++++++++++---- Converter/SefariaSQLiteConversionContext.cs | 4 ++ 5 files changed, 68 insertions(+), 10 deletions(-) diff --git a/Converter/MainWindow.xaml.cs b/Converter/MainWindow.xaml.cs index 574fd07..bf1711c 100644 --- a/Converter/MainWindow.xaml.cs +++ b/Converter/MainWindow.xaml.cs @@ -138,11 +138,20 @@ private void ConversionLogic() { textParseList.RemoveAt(0); var txt = _serviceMongo.ParseText(raw, _serviceSQLite); - //var test = _serviceSQLite.Texts.Local; - _serviceSQLite.Add(txt); + //var test = _serviceSQLite.Texts.Local; + + _serviceSQLite.Add(_serviceSQLite.Texts,txt); + //_serviceSQLite.SaveChanges(); + foreach (var chapter in _serviceMongo.ListChapters(txt.Chapter)) + { + chapter.TopicText = txt; + _serviceSQLite.Add(_serviceSQLite.Chapters, chapter); + } if (i % textBatchAmount == 0) { + //_serviceSQLite.SaveChanges(); + //_serviceSQLite.ChangeTracker.DetectChanges(); _serviceSQLite.SaveChanges(); _serviceSQLite.DisposeAsync(); _serviceSQLite = new SefariaSQLiteConversionContext(new Microsoft.EntityFrameworkCore.DbContextOptions { }); diff --git a/Converter/Model/SQLite/Chapter.cs b/Converter/Model/SQLite/Chapter.cs index cf16d0b..3c0b481 100644 --- a/Converter/Model/SQLite/Chapter.cs +++ b/Converter/Model/SQLite/Chapter.cs @@ -11,6 +11,10 @@ public class Chapter { [Key] public int Id { get; set; } + [ForeignKey("Text")] + public int TopicTextId { get; set; } + public virtual Text TopicText { get; set; } + public bool HasChild { get; set; } [ForeignKey("Chapter")] public int? ParentChapterId { get; set; } diff --git a/Converter/Model/SQLite/Text.cs b/Converter/Model/SQLite/Text.cs index 9fc3cb8..c600a3a 100644 --- a/Converter/Model/SQLite/Text.cs +++ b/Converter/Model/SQLite/Text.cs @@ -8,6 +8,10 @@ namespace Converter.Model.SQLite { public class Text { + public Text() { + Id = ++IdCounter; + } + static public int IdCounter = 0; [Key] public int Id { get; set; } diff --git a/Converter/SefariaMongoDBService.cs b/Converter/SefariaMongoDBService.cs index a7e03ef..55ce495 100644 --- a/Converter/SefariaMongoDBService.cs +++ b/Converter/SefariaMongoDBService.cs @@ -164,7 +164,7 @@ public Text ParseText(BsonDocument value, SefariaSQLiteConversionContext targetC versionTitleLG.Labels.Add(new Label { LanguageId = (int)LanguageTypes.Hebrew, Text = element.Value.AsString }); break; case "chapter": - text.Chapter = GenerateChapterTree(element.Value); + text.Chapter = GenerateChapterTree(text,element.Value); break; default: break; @@ -176,7 +176,17 @@ public Text ParseText(BsonDocument value, SefariaSQLiteConversionContext targetC return text; } + public List ListChapters(Chapter c) { + List result = new List(); + result.Add(c); + if(c.Children != null) + foreach (var item in c.Children) + { + result.AddRange(ListChapters(item)); + } + return result; + } private int CountChapters(Chapter c) { int count = 0; if (c != null) { @@ -192,15 +202,15 @@ private int CountChapters(Chapter c) { return count; } - private Chapter GenerateChapterTree(BsonValue value, Chapter parent = null, int index = 1) + private Chapter GenerateChapterTree(Text txt, BsonValue value, Chapter parent = null, int index = 1) { - Chapter instance = new Chapter { Index = index }; + Chapter instance = new Chapter { Index = index, TopicText = txt, TopicTextId = txt.Id }; if (parent != null) { instance.ParentChapter = parent; } //To recreate fast Lookup Path - instance.Path = (parent!=null? parent.Index.ToString()+":":"")+instance.Index.ToString(); + instance.Path = (parent!=null && parent.ParentChapter != null? parent.Path+":":"")+instance.Index.ToString(); switch (value.BsonType) { case BsonType.Array: @@ -208,7 +218,7 @@ private Chapter GenerateChapterTree(BsonValue value, Chapter parent = null, int instance.Children = new List(); for (int i = 0; i < array.Count; i++) { - instance.Children.Add(GenerateChapterTree(array[i], instance, i+1)); + instance.Children.Add(GenerateChapterTree(txt, array[i], instance, i+1)); instance.HasChild = true; } break; @@ -218,7 +228,7 @@ private Chapter GenerateChapterTree(BsonValue value, Chapter parent = null, int for (int i = 0; i < document.Elements.Count();i++) { var element = document.GetElement(i); - var child = GenerateChapterTree(element.Value, instance, i+1); + var child = GenerateChapterTree(txt, element.Value, instance, i+1); child.Text = element.Name; instance.Children.Add(child); } @@ -302,14 +312,41 @@ public LinkItem ParseLink(BsonDocument value, SefariaSQLiteConversionContext tar return null; } + string parseLocation(string location) { + var gemaraIndicators = new string[] { "a", "b" }; + for (int i = 0; i < gemaraIndicators.Length; i++) + { + var indicator = gemaraIndicators[i]; + var foundIndex = location.IndexOf(indicator, 0, StringComparison.OrdinalIgnoreCase); + if (foundIndex >= 0) { + var parts = location.Split(":"); + for (int j = 0; j < parts.Length; j++) + { + if (parts[j].Contains(indicator)) { + parts[j] = parts[j].Replace(indicator, ""); + var value = int.Parse(parts[j]); + value = (value - 1) * 2 + (i + 1); + parts[j] = value.ToString(); + break; + } + } + + return string.Join(":", parts); + } + } + + return location; + } + var primaryTopicSeperator = PrimaryTopic.LastIndexOf(' '); string primaryTopicName = PrimaryTopic.Substring(0, primaryTopicSeperator); - string primaryTopicLocation = PrimaryTopic.Substring(primaryTopicSeperator + 1); + string primaryTopicLocation = parseLocation(PrimaryTopic.Substring(primaryTopicSeperator + 1)); + int primaryTopicId = targetContext.Topics.Where(t => t.Name == primaryTopicName).Select(t => t.Id).FirstOrDefault(); var secondaryTopicSeperator = SecondaryTopic.LastIndexOf(' '); string secondaryTopicName = SecondaryTopic.Substring(0, secondaryTopicSeperator); - string secondaryTopicLocation = SecondaryTopic.Substring(secondaryTopicSeperator + 1); + string secondaryTopicLocation = parseLocation(SecondaryTopic.Substring(secondaryTopicSeperator + 1)); int secondaryTopicId = targetContext.Topics.Where(t => t.Name == secondaryTopicName).Select(t => t.Id).FirstOrDefault(); link.PrimaryLocation = primaryTopicLocation; diff --git a/Converter/SefariaSQLiteConversionContext.cs b/Converter/SefariaSQLiteConversionContext.cs index f088d93..ad10d9c 100644 --- a/Converter/SefariaSQLiteConversionContext.cs +++ b/Converter/SefariaSQLiteConversionContext.cs @@ -17,6 +17,7 @@ public SefariaSQLiteConversionContext(DbContextOptions LabelGroups { get; set; } public DbSet