Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chapter Fetch Improvements #754

Merged
merged 7 commits into from
Nov 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package eu.kanade.tachiyomi.util.chapter

object ChapterSanitizer {
fun String.sanitize(title: String): String {
return trim()
.removePrefix(title)
.trim(*CHAPTER_TRIM_CHARS)
}

private val CHAPTER_TRIM_CHARS =
arrayOf(
// Whitespace
' ',
'\u0009',
'\u000A',
'\u000B',
'\u000C',
'\u000D',
'\u0020',
'\u0085',
'\u00A0',
'\u1680',
'\u2000',
'\u2001',
'\u2002',
'\u2003',
'\u2004',
'\u2005',
'\u2006',
'\u2007',
'\u2008',
'\u2009',
'\u200A',
'\u2028',
'\u2029',
'\u202F',
'\u205F',
'\u3000',
// Separators
'-',
'_',
',',
':',
).toCharArray()
}
270 changes: 162 additions & 108 deletions server/src/main/kotlin/suwayomi/tachidesk/manga/impl/Chapter.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@ package suwayomi.tachidesk.manga.impl
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

import com.google.common.cache.Cache
import com.google.common.cache.CacheBuilder
import eu.kanade.tachiyomi.source.model.SChapter
import eu.kanade.tachiyomi.source.model.SManga
import eu.kanade.tachiyomi.source.online.HttpSource
import eu.kanade.tachiyomi.util.chapter.ChapterRecognition
import eu.kanade.tachiyomi.util.chapter.ChapterSanitizer.sanitize
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.serialization.Serializable
import mu.KotlinLogging
import org.jetbrains.exposed.dao.id.EntityID
import org.jetbrains.exposed.sql.Op
import org.jetbrains.exposed.sql.ResultRow
import org.jetbrains.exposed.sql.SortOrder
import org.jetbrains.exposed.sql.SortOrder.ASC
import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList
import org.jetbrains.exposed.sql.and
import org.jetbrains.exposed.sql.batchInsert
Expand All @@ -41,7 +45,11 @@ import suwayomi.tachidesk.manga.model.table.MangaTable
import suwayomi.tachidesk.manga.model.table.PageTable
import suwayomi.tachidesk.manga.model.table.toDataClass
import suwayomi.tachidesk.server.serverConfig
import java.lang.Long
import java.time.Instant
import java.util.TreeSet
import java.util.concurrent.TimeUnit
import kotlin.math.max

object Chapter {
private val logger = KotlinLogging.logger { }
Expand Down Expand Up @@ -109,133 +117,179 @@ object Chapter {
}
}

val map: Cache<Int, Mutex> =
CacheBuilder.newBuilder()
.expireAfterAccess(10, TimeUnit.MINUTES)
.build()

suspend fun fetchChapterList(mangaId: Int): List<SChapter> {
val manga = getManga(mangaId)
val source = getCatalogueSourceOrStub(manga.sourceId.toLong())
val mutex = map.get(mangaId) { Mutex() }
val chapterList =
mutex.withLock {
val manga = getManga(mangaId)
val source = getCatalogueSourceOrStub(manga.sourceId.toLong())

val sManga =
SManga.create().apply {
title = manga.title
url = manga.url
}

val sManga =
SManga.create().apply {
title = manga.title
url = manga.url
}
val numberOfCurrentChapters = getCountOfMangaChapters(mangaId)
val chapterList = source.getChapterList(sManga)

val numberOfCurrentChapters = getCountOfMangaChapters(mangaId)
val chapterList = source.getChapterList(sManga)
// Recognize number for new chapters.
chapterList.forEach { chapter ->
(source as? HttpSource)?.prepareNewChapter(chapter, sManga)
val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble())
chapter.chapter_number = chapterNumber.toFloat()
chapter.name = chapter.name.sanitize(manga.title)
chapter.scanlator = chapter.scanlator?.ifBlank { null }
}

// Recognize number for new chapters.
chapterList.forEach { chapter ->
(source as? HttpSource)?.prepareNewChapter(chapter, sManga)
val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble())
chapter.chapter_number = chapterNumber.toFloat()
}
val now = Instant.now().epochSecond
// Used to not set upload date of older chapters
// to a higher value than newer chapters
var maxSeenUploadDate = 0L

val now = Instant.now().epochSecond
val chaptersInDb =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.map { ChapterTable.toDataClass(it) }
.toSet()
}
val chaptersInDb =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.map { ChapterTable.toDataClass(it) }
.toList()
}

val chaptersToInsert = mutableListOf<ChapterDataClass>()
val chaptersToUpdate = mutableListOf<ChapterDataClass>()

chapterList.reversed().forEachIndexed { index, fetchedChapter ->
val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url }

val chapterData =
ChapterDataClass.fromSChapter(
fetchedChapter,
chapterEntry?.id ?: 0,
index + 1,
now,
mangaId,
runCatching {
(source as? HttpSource)?.getChapterUrl(fetchedChapter)
}.getOrNull(),
)

if (chapterEntry == null) {
chaptersToInsert.add(chapterData)
} else {
chaptersToUpdate.add(chapterData)
}
}
val chaptersToInsert = mutableListOf<ChapterDataClass>()
val chaptersToUpdate = mutableListOf<ChapterDataClass>()

chapterList.reversed().forEachIndexed { index, fetchedChapter ->
val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url }

val chapterData =
ChapterDataClass.fromSChapter(
fetchedChapter,
chapterEntry?.id ?: 0,
index + 1,
now,
mangaId,
runCatching {
(source as? HttpSource)?.getChapterUrl(fetchedChapter)
}.getOrNull(),
)

transaction {
if (chaptersToInsert.isNotEmpty()) {
ChapterTable.batchInsert(chaptersToInsert) {
this[ChapterTable.url] = it.url
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.fetchedAt] = it.fetchedAt
this[ChapterTable.manga] = it.mangaId
this[ChapterTable.realUrl] = it.realUrl
if (chapterEntry == null) {
val newChapterData =
if (chapterData.uploadDate == 0L) {
val altDateUpload = if (maxSeenUploadDate == 0L) now else maxSeenUploadDate
chapterData.copy(uploadDate = altDateUpload)
} else {
maxSeenUploadDate = max(maxSeenUploadDate, chapterData.uploadDate)
Syer10 marked this conversation as resolved.
Show resolved Hide resolved
chapterData
}
chaptersToInsert.add(newChapterData)
} else {
val newChapterData =
if (chapterData.uploadDate == 0L) {
chapterData.copy(uploadDate = chapterEntry.uploadDate)
} else {
chapterData
}
chaptersToUpdate.add(newChapterData)
}
}
}

if (chaptersToUpdate.isNotEmpty()) {
BatchUpdateStatement(ChapterTable).apply {
chaptersToUpdate.forEach {
addBatch(EntityID(it.id, ChapterTable))
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.realUrl] = it.realUrl
val deletedChapterNumbers = TreeSet<Float>()
val deletedReadChapterNumbers = TreeSet<Float>()
val deletedBookmarkedChapterNumbers = TreeSet<Float>()
val deletedDownloadedChapterNumbers = TreeSet<Float>()
val deletedChapterNumberDateFetchMap = mutableMapOf<Float, kotlin.Long>()

// clear any orphaned/duplicate chapters that are in the db but not in `chapterList`
val dbChapterCount = chaptersInDb.count()
if (dbChapterCount > chapterList.size) { // we got some clean up due
val chapterUrls = chapterList.map { it.url }.toSet()

val chaptersIdsToDelete =
chaptersInDb.mapNotNull { dbChapter ->
if (!chapterUrls.contains(dbChapter.url)) {
if (dbChapter.read) deletedReadChapterNumbers.add(dbChapter.chapterNumber)
Syer10 marked this conversation as resolved.
Show resolved Hide resolved
if (dbChapter.bookmarked) deletedBookmarkedChapterNumbers.add(dbChapter.chapterNumber)
if (dbChapter.downloaded) deletedDownloadedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumbers.add(dbChapter.chapterNumber)
deletedChapterNumberDateFetchMap[dbChapter.chapterNumber] = dbChapter.fetchedAt
dbChapter.id
} else {
null
}
}

transaction {
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete }
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete }
}
execute(this@transaction)
}
}

MangaTable.update({ MangaTable.id eq mangaId }) {
it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond
}
}
transaction {
if (chaptersToInsert.isNotEmpty()) {
ChapterTable.batchInsert(chaptersToInsert) { chapter ->
this[ChapterTable.url] = chapter.url
this[ChapterTable.name] = chapter.name
this[ChapterTable.date_upload] = chapter.uploadDate
this[ChapterTable.chapter_number] = chapter.chapterNumber
this[ChapterTable.scanlator] = chapter.scanlator
this[ChapterTable.sourceOrder] = chapter.index
this[ChapterTable.fetchedAt] = chapter.fetchedAt
this[ChapterTable.manga] = chapter.mangaId
this[ChapterTable.realUrl] = chapter.realUrl
this[ChapterTable.isRead] = false
this[ChapterTable.isBookmarked] = false
this[ChapterTable.isDownloaded] = false

// is recognized chapter number
if (chapter.chapterNumber >= 0f && chapter.chapterNumber in deletedChapterNumbers) {
this[ChapterTable.isRead] = chapter.chapterNumber in deletedReadChapterNumbers
this[ChapterTable.isBookmarked] = chapter.chapterNumber in deletedBookmarkedChapterNumbers
this[ChapterTable.isDownloaded] = chapter.chapterNumber in deletedDownloadedChapterNumbers
// Try to use the fetch date of the original entry to not pollute 'Updates' tab
deletedChapterNumberDateFetchMap[chapter.chapterNumber]?.let {
this[ChapterTable.fetchedAt] = it
}
}
}
}

val newChapters =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList()
}
if (chaptersToUpdate.isNotEmpty()) {
BatchUpdateStatement(ChapterTable).apply {
chaptersToUpdate.forEach {
addBatch(EntityID(it.id, ChapterTable))
this[ChapterTable.name] = it.name
this[ChapterTable.date_upload] = it.uploadDate
this[ChapterTable.chapter_number] = it.chapterNumber
this[ChapterTable.scanlator] = it.scanlator
this[ChapterTable.sourceOrder] = it.index
this[ChapterTable.realUrl] = it.realUrl
}
execute(this@transaction)
}
}

// clear any orphaned/duplicate chapters that are in the db but not in `chapterList`
val dbChapterCount = newChapters.count()
if (dbChapterCount > chapterList.size) { // we got some clean up due
val dbChapterList =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.url to ASC).toList()
MangaTable.update({ MangaTable.id eq mangaId }) {
it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond
}
}

val chapterUrls = chapterList.map { it.url }.toSet()

val chaptersIdsToDelete =
dbChapterList.mapIndexedNotNull { index, dbChapter ->
val isOrphaned = !chapterUrls.contains(dbChapter[ChapterTable.url])
val isDuplicate =
index < dbChapterList.lastIndex && dbChapter[ChapterTable.url] == dbChapterList[index + 1][ChapterTable.url]
val deleteChapter = isOrphaned || isDuplicate
if (deleteChapter) {
dbChapter[ChapterTable.id].value
} else {
null
val newChapters =
transaction {
ChapterTable.select { ChapterTable.manga eq mangaId }
.orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList()
}

if (manga.inLibrary) {
downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters)
}

transaction {
PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete }
ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete }
chapterList
}
}

if (manga.inLibrary) {
downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters)
}

return chapterList
}
Expand Down
Loading