Skip to content

Commit

Permalink
Integrated Changes DB Changes and Fixes and Performance Enhancements
Browse files Browse the repository at this point in the history
SQLite - Database
- Text: Character Count - for determining what is the most complete source
- Chapter: Reference Id - for quickly looking up a location in a link to display the source and reference.
  • Loading branch information
Benjamin Stern committed Sep 4, 2020
1 parent e0b6965 commit c78fd9f
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 35 deletions.
79 changes: 52 additions & 27 deletions Converter/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Converter.Model.SQLite;
using Converter.Service;
using MongoDB.Bson;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand Down Expand Up @@ -124,17 +125,29 @@ private void ConversionLogic() {
//Processing Texts
var totalTexts = Total = _serviceMongo.TextsCount();
Log($"Processing: Texts #{totalTexts}");
int textBatchAmount = 390;
List<BsonDocument> textParseList = new List<BsonDocument>();
for (int i = 0; i < totalTexts; i++)
{
var txt = _serviceMongo.GetTextAt(i, _serviceSQLite);
var test = _serviceSQLite.Texts.Local;
_serviceSQLite.AddAsync(txt);
if (textParseList.Count == 0)
{
textParseList = _serviceMongo.GetTexts(i, textBatchAmount);
}

var raw = textParseList[0];
textParseList.RemoveAt(0);

var txt = _serviceMongo.ParseText(raw, _serviceSQLite);
//var test = _serviceSQLite.Texts.Local;
_serviceSQLite.Add(txt);

if (i % 260 == 0)
if (i % textBatchAmount == 0)
{
_serviceSQLite.SaveChanges();
_serviceSQLite.DisposeAsync();
_serviceSQLite = new SefariaSQLiteConversionContext(new Microsoft.EntityFrameworkCore.DbContextOptions<SefariaSQLiteConversionContext> { });
//break;//For Testing Links

}
Complete = i;
//Log($"Processing: index {i} / total {totalTexts}");
Expand All @@ -145,34 +158,46 @@ private void ConversionLogic() {
_serviceSQLite.DisposeAsync();
_serviceSQLite = new SefariaSQLiteConversionContext(new Microsoft.EntityFrameworkCore.DbContextOptions<SefariaSQLiteConversionContext> { });


const int linkBatchAmount = 52000;
var totalLinks = Total = _serviceMongo.LinksCount();
var linksList = _serviceMongo.GetLinks();
List<BsonDocument> linksList = new List<BsonDocument>();
Log($"Processing: Links #{totalLinks}");
for (int i = 0; i < linksList.Count; i++)
for (int i = 0; i < totalLinks; i++)
{
var processing = linksList[i];
if (processing != null)
if (linksList.Count == 0)
{
linksList = _serviceMongo.GetLinks(i, linkBatchAmount);
}

if (linksList.Count > 0)
{
var link = _serviceMongo.ParseLink(processing, _serviceSQLite);
if (link != null) _serviceSQLite.AddAsync(link);
var processing = linksList[0];
linksList.RemoveAt(0);

//var processing = linksList[i];
if (processing != null)
{
var link = _serviceMongo.ParseLink(processing, _serviceSQLite);
if (link != null) _serviceSQLite.Add(link);
}
//bool hasNew = false;
//if (link.LinkGroup.Id == 0) {
// hasNew = true;
//}
//foreach (var item in link.LinkGroup.LinkedLanguages)
//{
// if (item.Id == 0) {
// hasNew = true;
// }
//}

if (i % linkBatchAmount == 0)
{
_serviceSQLite.SaveChanges();
_serviceSQLite.DisposeAsync();
_serviceSQLite = new SefariaSQLiteConversionContext(new Microsoft.EntityFrameworkCore.DbContextOptions<SefariaSQLiteConversionContext> { });
}
}
//bool hasNew = false;
//if (link.LinkGroup.Id == 0) {
// hasNew = true;
//}
//foreach (var item in link.LinkGroup.LinkedLanguages)
//{
// if (item.Id == 0) {
// hasNew = true;
// }
//}

//if (i%520==0) {
// _serviceSQLite.SaveChanges();
// _serviceSQLite.DisposeAsync();
// _serviceSQLite = new SefariaSQLiteConversionContext(new Microsoft.EntityFrameworkCore.DbContextOptions<SefariaSQLiteConversionContext> { });
//}
Complete = i;
}
_serviceSQLite.SaveChanges();
Expand Down
2 changes: 2 additions & 0 deletions Converter/Model/SQLite/Chapter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;
using System.IO;
using System.Text;

namespace Converter.Model.SQLite
Expand All @@ -16,6 +17,7 @@ public class Chapter
public Chapter ParentChapter { get; set; }
public ICollection<Chapter> Children { get; set; }
public int Index { get; set; }
public string Path { get; set; }
public string Text { get; set; }

//TODO: Consider Adding a ChapterClonedText to reduce duplication...
Expand Down
3 changes: 2 additions & 1 deletion Converter/Model/SQLite/Link.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ public class LinkItem
[Key]
public int Id { get; set; }
[ForeignKey("LinkGroup")]
public int LinkGroupId { get; set; }
public int? LinkGroupId { get; set; }
public LinkGroup LinkGroup { get; set; }
public string PrimaryLocation { get; set; }
public string SecondaryLocation { get; set; }
public string DebugInfo { get; set; }
}

public class LinkGroup {
Expand Down
2 changes: 2 additions & 0 deletions Converter/Model/SQLite/Text.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,7 @@ public class Text
public int? ChapterId { get; set; }
public Chapter Chapter { get; set; }

public int ChapterCount { get; set; }

}
}
55 changes: 49 additions & 6 deletions Converter/SefariaMongoDBService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,21 @@ public long TextsCount()
return _texts.CountDocuments(new BsonDocument());
}

public Text GetTextAt(int index, SefariaSQLiteConversionContext targetContext) {
public List<BsonDocument> GetTexts(int startLocation, int amount) {
var result = new List<BsonDocument>();
if (startLocation < TextsCount()) {
result = _texts.Find(_ => true).Skip(startLocation).Limit(amount).ToList();
}

return result;
}

public Text ParseText(BsonDocument value, SefariaSQLiteConversionContext targetContext) {
Text text = new Text();
LabelGroup versionTitleLG = new LabelGroup();
versionTitleLG.Labels = new List<Label>();
BsonDocument value = _texts.Find(_ => true).Skip(index).FirstOrDefault();
//BsonDocument value = _texts.Find(_ => true).Skip(index).FirstOrDefault();

foreach (var element in value.Elements)
{
switch (element.Name)
Expand Down Expand Up @@ -161,17 +171,37 @@ public Text GetTextAt(int index, SefariaSQLiteConversionContext targetContext) {

}
}


text.ChapterCount = CountChapters(text.Chapter);

return text;
}

private int CountChapters(Chapter c) {
int count = 0;
if (c != null) {
count += 1;
if (c.Children != null)
{
foreach (var item in c.Children)
{
count += CountChapters(item);
}
}
}
return count;
}

private Chapter GenerateChapterTree(BsonValue value, Chapter parent = null, int index = 1)
{
Chapter instance = new Chapter { Index = index };
if (parent != null) {
instance.ParentChapter = parent;
}

//To recreate fast Lookup Path
instance.Path = (parent!=null? parent.Index.ToString()+":":"")+instance.Index.ToString();

switch (value.BsonType) {
case BsonType.Array:
var array = value.AsBsonArray;
Expand All @@ -192,7 +222,6 @@ private Chapter GenerateChapterTree(BsonValue value, Chapter parent = null, int
child.Text = element.Name;
instance.Children.Add(child);
}

break;
case BsonType.String:
instance.Text = value.AsString;
Expand All @@ -206,15 +235,26 @@ public long LinksCount() {
return _links.CountDocuments(new BsonDocument());
}

public List<BsonDocument> GetLinks() {
public List<BsonDocument> GetLinks()
{
return _links.Find(_ => true).ToList();
}

public List<BsonDocument> GetLinks(int startLocation, int amount) {
var result = new List<BsonDocument>();
if (startLocation < LinksCount())
{
result = _links.Find(_ => true).Skip(startLocation).Limit(amount).ToList();
}

return result;
}

public LinkItem ParseLink(BsonDocument value, SefariaSQLiteConversionContext targetContext) {
try
{
LinkItem link = new LinkItem();
link.LinkGroup = new LinkGroup();
//link.LinkGroup = new LinkGroup();

//BsonDocument value = _links.Find(_ => true).Skip(index).FirstOrDefault();
string PrimaryTopic = null;
Expand Down Expand Up @@ -348,6 +388,9 @@ public LinkItem ParseLink(BsonDocument value, SefariaSQLiteConversionContext tar
}
}
}
else{
link.DebugInfo = $"{PrimaryTopic}{((primaryTopicId==0)?" 'Not Found'":"")} _ {SecondaryTopic}{((secondaryTopicId == 0) ? " 'Not Found'" : "")}";
}

return link;
}
Expand Down
18 changes: 17 additions & 1 deletion Converter/SefariaSQLiteConversionContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,30 @@ public EntityEntry<T> Add<T>(DbSet<T> target, T item) where T : class
{
Type type = typeof(T);

if (!_typesChanged.Contains(type)) {
if (!_typesChanged.Contains(type))
{
_typesChanged.Add(type);
_trackedList.Add(type, new List<T>());
}
((List<T>)_trackedList.GetValueOrDefault(type))?.Add(item);
return target.Add(item);
}

//public EntityEntry<T> AddAsync<T>(DbSet<T> target, T item) where T : class
//{
// Type type = typeof(T);

// if (!_typesChanged.Contains(type))
// {
// _typesChanged.Add(type);
// _trackedList.Add(type, new List<T>());
// }
// ((List<T>)_trackedList.GetValueOrDefault(type))?.Add(item);
// return target.Add(item);
//}



public override int SaveChanges()
{
_typesChanged.ForEach(t =>
Expand Down

0 comments on commit c78fd9f

Please sign in to comment.