Skip to content

Commit

Permalink
Fix corpus e2e tests (#509)
Browse files Browse the repository at this point in the history
* Pretranslate even if there isn't a target corpus.
Put "TrainOnAll" and "PretranslateAll" back in
Don't do https redirection
Fixes for E2E testing
Add E2E test
Fix echo

* reviewer comments.

* I see now
  • Loading branch information
johnml1135 authored Oct 15, 2024
1 parent 261dde1 commit 30697ae
Show file tree
Hide file tree
Showing 12 changed files with 380 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
build:
name: Build
runs-on: ubuntu-latest
timeout-minutes: 45
timeout-minutes: 60

env:
SERVAL_CLIENT_ID: ${{ secrets.SERVAL_CLIENT_ID }}
Expand Down
3 changes: 0 additions & 3 deletions src/Echo/src/EchoTranslationEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

WebApplication app = builder.Build();

// Configure the HTTP request pipeline.
app.UseHttpsRedirection();

app.MapGrpcService<TranslationEngineServiceV1>();
app.MapGrpcService<HealthServiceV1>();

Expand Down
12 changes: 10 additions & 2 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,23 @@ await client.BuildStartedAsync(
var sourceFiles = corpus
.SourceCorpora.SelectMany(sc =>
sc.Files.Where(f =>
(sc.PretranslateTextIds is null || sc.PretranslateTextIds.Contains(f.TextId))
(
sc.PretranslateAll
|| sc.PretranslateTextIds is null
|| sc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
var targetFiles = corpus
.TargetCorpora.SelectMany(tc =>
tc.Files.Where(f =>
(tc.PretranslateTextIds is null || tc.PretranslateTextIds.Contains(f.TextId))
(
tc.PretranslateAll
|| tc.PretranslateTextIds is null
|| tc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
Expand Down
2 changes: 0 additions & 2 deletions src/Machine/src/Serval.Machine.EngineServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

var app = builder.Build();

app.UseHttpsRedirection();

app.MapServalTranslationEngineService();
app.MapHangfireDashboard();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,22 +238,27 @@ row.Ref is not ScriptureRef sr
}
}
}
void WriteRow(Utf8JsonWriter writer, string textId, IReadOnlyList<object> refs, string translation)
{
writer.WriteStartObject();
writer.WriteString("corpusId", corpus.Id);
writer.WriteString("textId", textId);
writer.WriteStartArray("refs");
foreach (object rowRef in refs)
writer.WriteStringValue(rowRef.ToString());
writer.WriteEndArray();
writer.WriteString("translation", translation);
writer.WriteEndObject();
pretranslateCount++;
}

ITextCorpus targetCorpus =
targetCorpora.Length > 0 ? targetCorpora[0].TextCorpus : new DictionaryTextCorpus();

foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpora[0].TextCorpus))
foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpus))
{
if (row.SourceSegment.Length > 0)
{
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpus.Id);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
foreach (object rowRef in row.Refs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteString("translation", row.SourceSegment);
pretranslateWriter.WriteEndObject();
pretranslateCount++;
}
WriteRow(pretranslateWriter, row.TextId, row.Refs, row.SourceSegment);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,16 +286,20 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
kvp => kvp.Value.Chapters.ToHashSet()
);
var trainOnTextIds = source.TrainOnTextIds.ToHashSet();
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds);
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll);

var pretranslateChapters = source.PretranslateChapters.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Chapters.ToHashSet()
);
var pretranslateTextIds = source.PretranslateTextIds.ToHashSet();
FilterChoice pretranslateFilter = GetFilterChoice(pretranslateChapters, pretranslateTextIds);
FilterChoice pretranslateFilter = GetFilterChoice(
pretranslateChapters,
pretranslateTextIds,
source.PretranslateAll
);

return new Models.MonolingualCorpus
var corpus = new Models.MonolingualCorpus
{
Id = source.Id,
Language = source.Language,
Expand All @@ -305,6 +309,7 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null,
PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null
};
return corpus;
}

private static Models.CorpusFile Map(Translation.V1.CorpusFile source)
Expand All @@ -326,12 +331,13 @@ private enum FilterChoice

private static FilterChoice GetFilterChoice(
IReadOnlyDictionary<string, HashSet<int>> chapters,
HashSet<string> textIds
HashSet<string> textIds,
bool noFilter
)
{
// Only either textIds or Scripture Range will be used at a time
// TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text)
if (chapters is null && textIds is null)
if (noFilter || (chapters is null && textIds is null))
return FilterChoice.None;
if (chapters is null || chapters.Count == 0)
return FilterChoice.TextIds;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ message ParallelCorpus {
message MonolingualCorpus {
string id = 1;
string language = 2;
bool train_on_all = 3;
bool pretranslate_all = 4;
map<string, ScriptureChapters> train_on_chapters = 5;
map<string, ScriptureChapters> pretranslate_chapters = 6;
repeated string train_on_text_ids = 7;
Expand Down
51 changes: 41 additions & 10 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,12 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
V1.MonolingualCorpus targetCorpus =
new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } };

if (trainingCorpus != null)
if (trainingCorpus is null || (trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null))
{
sourceCorpus.TrainOnAll = true;
targetCorpus.TrainOnAll = true;
}
else
{
if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -636,7 +641,15 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
targetCorpus.TrainOnChapters.Add(chapters);
}
}
if (pretranslateCorpus != null)
if (
pretranslateCorpus is null
|| (pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null)
)
{
sourceCorpus.PretranslateAll = true;
targetCorpus.PretranslateAll = true;
}
else
{
if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -767,14 +780,32 @@ pretranslateFilter is not null
Files = { source.Files.Select(Map) }
};

if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
if (trainingFilter is null || (trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null))
{
corpus.TrainOnAll = true;
}
else
{
if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
}

if (
pretranslateFilter is null
|| (pretranslateFilter.TextIds is null && pretranslateFilter.ScriptureRange is null)
)
{
corpus.PretranslateAll = true;
}
else
{
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
}

return corpus;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,105 @@ public async Task StartBuildAsync_ParallelCorpus()
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_Corpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig);
PretranslateCorpusConfig ptcc =
new() { CorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
CorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationParallelCorpus addedCorpus = await client.AddParallelCorpusAsync(
NMT_ENGINE1_ID,
TestParallelCorpusConfig
);
PretranslateCorpusConfig ptcc =
new() { ParallelCorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
ParallelCorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_PretranslateParallelAndNormalCorpus()
{
Expand Down
10 changes: 5 additions & 5 deletions src/Serval/test/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ public async Task NmtQueueMultiple()
engineIds[i] = await _helperClient.CreateNewEngineAsync("Nmt", "es", "en", $"NMT1_{i}");
string engineId = engineIds[i];
string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"];
await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false);
await _helperClient.AddTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true);
await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, books, "es", "en", false);
await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true);
await _helperClient.StartBuildAsync(engineId);
//Ensure that tasks are enqueued roughly in order
await Task.Delay(1_000);
Expand Down Expand Up @@ -247,7 +247,7 @@ public async Task CircuitousRouteGetWordGraphAsync()
Assert.That(ex.StatusCode, Is.EqualTo(409));

//Add corpus
string cId = await _helperClient.AddTextCorpusToEngineAsync(
string cId = await _helperClient.AddParallelTextCorpusToEngineAsync(
smtEngineId,
["2JN.txt", "3JN.txt"],
"es",
Expand All @@ -259,10 +259,10 @@ public async Task CircuitousRouteGetWordGraphAsync()
await _helperClient.BuildEngineAsync(smtEngineId);

//Remove added corpus (shouldn't affect translation)
await _helperClient.TranslationEnginesClient.DeleteCorpusAsync(smtEngineId, cId, deleteFiles: false);
await _helperClient.TranslationEnginesClient.DeleteParallelCorpusAsync(smtEngineId, cId);

// Add corpus
await _helperClient.AddTextCorpusToEngineAsync(
await _helperClient.AddParallelTextCorpusToEngineAsync(
smtEngineId,
["1JN.txt", "2JN.txt", "3JN.txt"],
"es",
Expand Down
Loading

0 comments on commit 30697ae

Please sign in to comment.