Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iris: Add lecture transcription storage #10176

Open
wants to merge 40 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
3eeeb99
WIP
isabellagessl Jan 13, 2025
6335ee1
WIP
isabellagessl Jan 13, 2025
a357749
WIP
sebastianloose Jan 13, 2025
13226bc
Merge branch 'feature/iris/video-transcription-storage' of github.com…
sebastianloose Jan 13, 2025
3f9a557
WIP
sebastianloose Jan 13, 2025
151abde
WIP
isabellagessl Jan 13, 2025
750a179
WIP
sebastianloose Jan 13, 2025
cad2db6
Add transcription resource
sebastianloose Jan 14, 2025
d6ee26e
Create custom path for lecture transcription ingestion
sebastianloose Jan 16, 2025
4ed75e2
Remove host from package.json
sebastianloose Jan 20, 2025
ffb02a6
Merge branch 'develop' into feature/iris/video-transcription-storage
sebastianloose Jan 20, 2025
ca47710
Fix lecture transcription ingestion route
sebastianloose Jan 20, 2025
7e84063
cleanup code backend
isabellagessl Jan 20, 2025
1b24721
Add client tests
sebastianloose Jan 20, 2025
ba2dd37
Merge branch 'feature/iris/video-transcription-storage' of github.com…
sebastianloose Jan 20, 2025
a22949a
add server test
isabellagessl Jan 20, 2025
04b63e9
Add status messages
sebastianloose Jan 20, 2025
57427ed
Merge branch 'feature/iris/video-transcription-storage' of github.com…
sebastianloose Jan 20, 2025
b733370
fix server style
isabellagessl Jan 20, 2025
a31455f
Remove not used db query
sebastianloose Jan 20, 2025
256d5c9
Merge branch 'feature/iris/video-transcription-storage' of github.com…
sebastianloose Jan 20, 2025
6f3c804
Minor changes
sebastianloose Jan 20, 2025
26ba567
fix attachmentResourceIntegrationTEst test
isabellagessl Jan 20, 2025
1a820ed
Implement feedback
sebastianloose Jan 21, 2025
01320d3
Fix data type in db
sebastianloose Jan 21, 2025
627ced0
fix server style4
sebastianloose Jan 21, 2025
90344df
improve method names
sebastianloose Jan 21, 2025
8e9d776
Fix dependency injection
sebastianloose Jan 21, 2025
0be3c13
Minor fix
sebastianloose Jan 21, 2025
3c65bf9
lecture - course validation
sebastianloose Jan 21, 2025
d72c175
Merge branch 'develop' into feature/iris/video-transcription-storage
sebastianloose Jan 21, 2025
afde4d2
Merge branch 'feature/iris/video-transcription-storage' of github.com…
sebastianloose Jan 21, 2025
1d3c6b2
Minor fix
sebastianloose Jan 21, 2025
5191ef0
Fix server style errors
sebastianloose Jan 21, 2025
079d088
fix server style
sebastianloose Jan 21, 2025
e2c552c
remove not needed auth check
sebastianloose Jan 21, 2025
53dcba7
fix server style
sebastianloose Jan 21, 2025
19edc8d
fix codestyle
isabellagessl Jan 26, 2025
4f7f425
fix server tests
isabellagessl Jan 26, 2025
62135c7
Merge branch 'develop' into feature/iris/video-transcription-storage
sebastianloose Jan 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import de.tum.cit.aet.artemis.iris.service.pyris.dto.PyrisVariantDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.lectureingestionwebhook.PyrisWebhookLectureDeletionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.lectureingestionwebhook.PyrisWebhookLectureIngestionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.transcriptionIngestion.PyrisWebhookTranscriptionIngestionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.job.IngestionWebhookJob;
import de.tum.cit.aet.artemis.iris.web.open.PublicPyrisStatusUpdateResource;

Expand Down Expand Up @@ -108,6 +109,27 @@ public void executePipeline(String feature, String variant, Object executionDTO,
}
}

/**
* Executes a webhook and send transcription to the webhook with the given variant
*
* @param variant The variant of the feature to execute
* @param executionDTO The DTO sent as a body for the execution
*/
public void executeTranscriptionAdditionWebhook(String variant, PyrisWebhookTranscriptionIngestionExecutionDTO executionDTO) {
var endpoint = "/api/v1/webhooks/transcriptions/" + variant;
try {
restTemplate.postForEntity(pyrisUrl + endpoint, objectMapper.valueToTree(executionDTO), Void.class);
}
catch (HttpStatusCodeException e) {
log.error("Failed to send transcription {} to Pyris: {}", executionDTO.transcription().lectureName(), e.getMessage());
throw toIrisException(e);
}
catch (RestClientException | IllegalArgumentException e) {
log.error("Failed to send transcription {} to Pyris: {}", executionDTO.transcription().lectureName(), e.getMessage());
throw new PyrisConnectorException("Could not fetch response from Pyris");
}
}

/**
* Executes a webhook and send lectures to the webhook with the given variant
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import de.tum.cit.aet.artemis.iris.service.pyris.job.ExerciseChatJob;
import de.tum.cit.aet.artemis.iris.service.pyris.job.IngestionWebhookJob;
import de.tum.cit.aet.artemis.iris.service.pyris.job.PyrisJob;
import de.tum.cit.aet.artemis.iris.service.pyris.job.TranscriptionIngestionWebhookJob;

/**
* The PyrisJobService class is responsible for managing Pyris jobs in the Artemis system.
Expand Down Expand Up @@ -108,6 +109,22 @@ public String addIngestionWebhookJob(long courseId, long lectureId, long lecture
return token;
}

/**
* Adds a new transcription ingestion webhook job to the job map with a timeout.
*
* @param courseId the ID of the course associated with the webhook job
* @param lectureId the ID of the lecture associated with the webhook job
* @return a unique token identifying the created webhook job
*/
public String addTranscriptionIngestionWebhookJob(long courseId, long lectureId) {
var token = generateJobIdToken();
var job = new TranscriptionIngestionWebhookJob(token, courseId, lectureId);
long timeoutWebhookJob = 60;
TimeUnit unitWebhookJob = TimeUnit.MINUTES;
jobMap.put(token, job, timeoutWebhookJob, unitWebhookJob);
return token;
}

/**
* Remove a job from the job map.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,14 @@
import de.tum.cit.aet.artemis.iris.service.pyris.dto.lectureingestionwebhook.PyrisLectureUnitWebhookDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.lectureingestionwebhook.PyrisWebhookLectureDeletionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.lectureingestionwebhook.PyrisWebhookLectureIngestionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.transcriptionIngestion.PyrisTranscriptionIngestionWebhookDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.transcriptionIngestion.PyrisWebhookTranscriptionIngestionExecutionDTO;
import de.tum.cit.aet.artemis.iris.service.settings.IrisSettingsService;
import de.tum.cit.aet.artemis.lecture.domain.AttachmentType;
import de.tum.cit.aet.artemis.lecture.domain.AttachmentUnit;
import de.tum.cit.aet.artemis.lecture.domain.Lecture;
import de.tum.cit.aet.artemis.lecture.domain.LectureUnit;
import de.tum.cit.aet.artemis.lecture.domain.Transcription;
import de.tum.cit.aet.artemis.lecture.repository.LectureRepository;
import de.tum.cit.aet.artemis.lecture.repository.LectureUnitRepository;

Expand Down Expand Up @@ -69,6 +72,42 @@ public PyrisWebhookService(PyrisConnectorService pyrisConnectorService, PyrisJob
this.lectureRepository = lectureRepository;
}

/**
* adds the transcription to the vector database in Pyris
*
* @param transcription The transcription that got Updated
* @return jobToken if the job was created else null
*/
public String addTranscriptionToPyrisDB(Transcription transcription) {
if (transcription == null) {
throw new IllegalArgumentException("Transcription cannot be null");
}
if (transcription.getLecture() == null) {
throw new IllegalArgumentException("Transcription must be associated with a lecture");
}
if (lectureIngestionEnabled(transcription.getLecture().getCourse())) {
Lecture lecture = transcription.getLecture();
Course course = lecture.getCourse();
return executeTranscriptionAdditionWebhook(
new PyrisTranscriptionIngestionWebhookDTO(transcription, lecture.getId(), lecture.getTitle(), course.getId(), course.getTitle(), course.getDescription()));
}
return null;
}

/**
* executes executeTranscriptionAdditionWebhook add transcription from to the vector database on pyris
isabellagessl marked this conversation as resolved.
Show resolved Hide resolved
*
* @param toUpdateTranscription The transcription that are going to be Updated
* @return jobToken if the job was created
*/
private String executeTranscriptionAdditionWebhook(PyrisTranscriptionIngestionWebhookDTO toUpdateTranscription) {
String jobToken = pyrisJobService.addTranscriptionIngestionWebhookJob(toUpdateTranscription.courseId(), toUpdateTranscription.lectureId());
PyrisPipelineExecutionSettingsDTO settingsDTO = new PyrisPipelineExecutionSettingsDTO(jobToken, List.of(), artemisBaseUrl);
PyrisWebhookTranscriptionIngestionExecutionDTO executionDTO = new PyrisWebhookTranscriptionIngestionExecutionDTO(toUpdateTranscription, settingsDTO, List.of());
pyrisConnectorService.executeTranscriptionAdditionWebhook("fullIngestion", executionDTO);
return jobToken;
}

private boolean lectureIngestionEnabled(Course course) {
return irisSettingsService.getRawIrisSettingsFor(course).getIrisLectureIngestionSettings() != null
&& irisSettingsService.getRawIrisSettingsFor(course).getIrisLectureIngestionSettings().isEnabled();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package de.tum.cit.aet.artemis.iris.service.pyris.dto.transcriptionIngestion;

import com.fasterxml.jackson.annotation.JsonInclude;

import de.tum.cit.aet.artemis.lecture.domain.Transcription;

/**
* Represents a webhook data transfer object for lecture units in the Pyris system.
* This DTO is used to encapsulate the information related to updates of lecture units,
* providing necessary details such as lecture and course identifiers, names, and descriptions.
*/
@JsonInclude(JsonInclude.Include.NON_EMPTY)

public record PyrisTranscriptionIngestionWebhookDTO(Transcription transcription, long lectureId, String lectureName, long courseId, String courseName, String courseDescription) {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package de.tum.cit.aet.artemis.iris.service.pyris.dto.transcriptionIngestion;

import java.util.List;

import com.fasterxml.jackson.annotation.JsonInclude;

import de.tum.cit.aet.artemis.iris.service.pyris.dto.PyrisPipelineExecutionSettingsDTO;
import de.tum.cit.aet.artemis.iris.service.pyris.dto.status.PyrisStageDTO;

@JsonInclude(JsonInclude.Include.NON_EMPTY)
public record PyrisWebhookTranscriptionIngestionExecutionDTO(PyrisTranscriptionIngestionWebhookDTO transcription, PyrisPipelineExecutionSettingsDTO settings,
List<PyrisStageDTO> initialStages) {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package de.tum.cit.aet.artemis.iris.service.pyris.job;

import de.tum.cit.aet.artemis.core.domain.Course;
import de.tum.cit.aet.artemis.exercise.domain.Exercise;

/**
* An implementation of a PyrisJob for Transcription Ingestion in Pyris.
* This job is used to reference the details of then Ingestion when Pyris sends a status update.
*/
public record TranscriptionIngestionWebhookJob(String jobId, long courseId, long lectureId) implements PyrisJob {

@Override
public boolean canAccess(Course course) {
return false;
}

@Override
public boolean canAccess(Exercise exercise) {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ public class Lecture extends DomainObject {
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
private List<LectureUnit> lectureUnits = new ArrayList<>();

@OneToMany(mappedBy = "lecture", cascade = CascadeType.ALL, orphanRemoval = true)
@JsonIgnoreProperties("lecture")
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
private List<Transcription> transcriptions = new ArrayList<>();
isabellagessl marked this conversation as resolved.
Show resolved Hide resolved

@OneToMany(mappedBy = "lecture", cascade = CascadeType.REMOVE, orphanRemoval = true)
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
@JsonIncludeProperties({ "id" })
Expand Down Expand Up @@ -141,6 +146,19 @@ public void addLectureUnit(LectureUnit lectureUnit) {
lectureUnit.setLecture(this);
}

public List<Transcription> getTranscriptions() {
return transcriptions;
}

public void setTranscriptions(List<Transcription> transcriptions) {
this.transcriptions = transcriptions;
}

public void addTranscription(Transcription transcription) {
this.transcriptions.add(transcription);
transcription.setLecture(this);
}

public Set<Post> getPosts() {
return posts;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package de.tum.cit.aet.artemis.lecture.domain;

import java.util.ArrayList;
import java.util.List;

import jakarta.persistence.CascadeType;
import jakarta.persistence.Entity;
import jakarta.persistence.FetchType;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.OneToMany;
import jakarta.persistence.OrderBy;
import jakarta.persistence.Table;

import org.hibernate.annotations.Cache;
import org.hibernate.annotations.CacheConcurrencyStrategy;

import com.fasterxml.jackson.annotation.JsonIgnore;

import de.tum.cit.aet.artemis.core.domain.DomainObject;

@Entity
@Table(name = "transcription")
public class Transcription extends DomainObject {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional: I think naming it LectureTranscription would make it more clear that it is a transcription of a lecture and not of something else.


@ManyToOne
@JoinColumn(name = "lecture_id")
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
@JsonIgnore
private Lecture lecture;

private String language;

@OneToMany(cascade = CascadeType.ALL, fetch = FetchType.LAZY, orphanRemoval = true)
@OrderBy("startTime asc")
@JoinColumn(name = "transcription_id")
private List<TranscriptionSegment> segments = new ArrayList<>();

public Transcription() {
}

public Transcription(Lecture lecture, String language, List<TranscriptionSegment> segments) {
this.lecture = lecture;
this.language = language;
this.segments = segments;
}

public Lecture getLecture() {
return lecture;
}

public void setLecture(Lecture lecture) {
this.lecture = lecture;
}

public String getLanguage() {
return language;
}

public void setLanguage(String language) {
this.language = language;
}

public List<TranscriptionSegment> getSegments() {
return segments;
}

public void setSegments(List<TranscriptionSegment> segments) {
this.segments = segments;
}

@Override
public String toString() {
return "Transcription [language=" + language + ", segments=" + segments + "]";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package de.tum.cit.aet.artemis.lecture.domain;

import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.Lob;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.Table;
import jakarta.validation.constraints.AssertTrue;
import jakarta.validation.constraints.NotNull;

import org.hibernate.annotations.Cache;
import org.hibernate.annotations.CacheConcurrencyStrategy;

import com.fasterxml.jackson.annotation.JsonIgnore;

import de.tum.cit.aet.artemis.core.domain.DomainObject;

@Entity
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
@Table(name = "transcription_segments")
public class TranscriptionSegment extends DomainObject {

@NotNull
@Column(name = "start_time")
private Double startTime;

@NotNull
@Column(name = "end_time")
private Double endTime;

@AssertTrue(message = "End time must be greater than start time")
private boolean isTimeValid() {
return startTime == null || endTime == null || endTime > startTime;
}
isabellagessl marked this conversation as resolved.
Show resolved Hide resolved

@Lob
private String text;

@ManyToOne
@JoinColumn(name = "lecture_unit_id")
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE)
@JsonIgnore
private LectureUnit lectureUnit;

@Column(name = "slide_number")
private int slideNumber;

public TranscriptionSegment() {
}

public TranscriptionSegment(Double startTime, Double endTime, String text, LectureUnit lectureUnit, int slideNumber) {
this.startTime = startTime;
this.endTime = endTime;
this.text = text;
this.lectureUnit = lectureUnit;
this.slideNumber = slideNumber;
}

public Double getStartTime() {
return startTime;
}

public void setStartTime(Double startTime) {
this.startTime = startTime;
}

public Double getEndTime() {
return endTime;
}

public void setEndTime(Double endTime) {
this.endTime = endTime;
}

public String getText() {
return text;
}

public void setText(String text) {
this.text = text;
}

public LectureUnit getLectureUnit() {
return lectureUnit;
}

public void setLectureUnit(LectureUnit lectureUnit) {
this.lectureUnit = lectureUnit;
}

public int getSlideNumber() {
return slideNumber;
}

public void setSlideNumber(int slideNumber) {
this.slideNumber = slideNumber;
}

@Override
public String toString() {
return "TranscriptionSegment [startTime = " + startTime + ", endTime = " + endTime + ", text = " + text + "]";
}
}
Loading
Loading