Skip to content

Commit

Permalink
Merge branch 'master' into feature/upgradeAndRefactor
Browse files Browse the repository at this point in the history
  • Loading branch information
pidoubleyou authored Jan 7, 2024
2 parents 007456c + 9c4d052 commit 46f4e65
Show file tree
Hide file tree
Showing 17 changed files with 221 additions and 59 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,13 @@ java -jar MServer.jar
| Crawler | liest Mediathek | beinhaltet Sender | bestückt Sender | entspricht Develop |
|---------|-----------|--------|---------|--|
| 3sat|3sat-Mediathek|3sat |3sat|x|
| ARD|ARD-Mediathek|Alpha, BR, Das Erste, HR, MDR, NDR, ONE, Radio Bremen, RBB, SR, SWR, WDR, tagesschau24|ARD, HR, MDR, NDR, Radio Bremen, RBB, SWR, WDR| x|
| ARD|ARD-Mediathek|Alpha, BR, Das Erste, HR, MDR, NDR, ONE, Radio Bremen, RBB, SR, SWR, WDR, tagesschau24|ARD, BR, HR, MDR, NDR, Radio Bremen, RBB, SWR, WDR| x|
| ARTE|ARTE-Mediathek|ARTE in DE, FR, EN, ES, PL, IT|ARTE.DE, ARTE.FR||
| BR|BR-Mediathek|BR|BR||
| DW|DW-Mediathek|DW|DW|x|
| FUNK | FUNK-Webseite | FUNK |FUNK |x|
| KIKA|KIKA-Mediathek|KIKA|KIKA|x|
| ORF|ORF-Mediathek|ORF1, ORF2, ORF3, ORFSport|ORF|x|
| PHOENIX|PHOENIX-Mediathek|PHOENIX|PHOENIX|x|
| SR|SR-Mediathek|SR|SR|x|
| SRF|SRF-Mediathek|SRF1, SRF2, SRFinfo|SRF|x|
| ZDF|ZDF-Mediathek|ZDF, ZDFneo, ZDFinfo|ZDF|x|
| ZDF|ZDF-Mediathek|ZDF, ZDFneo, ZDFinfo|ZDF|x|
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
group = 'de.mediathekview'
archivesBaseName = "MServer"
version = '3.1.221'
version = '3.1.226'

def jarName = 'MServer.jar'
def mainClass = 'mServer.Main'
Expand Down
2 changes: 2 additions & 0 deletions dist/mserver.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

<!-- banned film list (titles) -->
<system-bannedFilmList>file:bannedFilmList.txt</system-bannedFilmList>

<system-crawler-list>ARD,ZDF,ARTE,DW,KIKA,FUNK,3SAT,SR,SRF,SRFPOD,ORF,PHONIX</system-crawler-list>

<system-proxy-url></system-proxy-url>
<system-proxy-port></system-proxy-port>
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/mServer/crawler/AddToFilmlist.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ private void performInitialCleanup() {
listeEinsortieren.removeIf(f -> !f.arr[DatenFilm.FILM_URL].toLowerCase().startsWith("http"));
listeEinsortieren.removeIf(f -> f.arr[DatenFilm.FILM_SENDER].equals(Const.ORF) && f.arr[DatenFilm.FILM_URL]
.matches(OrfVideoInfoDTO.FILTER_JUGENDSCHUTZ));
listeEinsortieren.removeIf(f -> f.arr[DatenFilm.FILM_SENDER].equals(Const.ARD) && isArdUrlToRemove(f.arr[DatenFilm.FILM_URL]));
listeEinsortieren.removeIf(f -> {
String groesse = f.arr[DatenFilm.FILM_GROESSE];
if (groesse.isEmpty()) {
Expand All @@ -132,7 +133,13 @@ private void performInitialCleanup() {
updateFunkMissingHost(listeEinsortieren);
removeSrfUrlParameter(listeEinsortieren);
}


private boolean isArdUrlToRemove(final String url) {
return url.startsWith("https://tvdlzdf-a.akamaihd.net")
|| url.startsWith("https://arteptweb-a.akamaihd.net")
|| url.startsWith("https://pmdonlinekika-a.akamaihd.net");
}

// check https://github.com/mediathekview/MServer/issues/904 for examples and more information
private void removeSrfUrlParameter(ListeFilme listeEinsortieren) {
final List<DatenFilm> list = listeEinsortieren.parallelStream()
Expand Down
61 changes: 48 additions & 13 deletions src/main/java/mServer/crawler/FilmeSuchen.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,20 @@
import mServer.crawler.sender.sr.SrCrawler;
import mServer.crawler.sender.srf.SrfCrawler;
import mServer.crawler.sender.zdf.ZdfCrawler;
import mServer.tool.MserverDaten;
import mServer.tool.MserverKonstanten;
import mServer.tool.StatsUpload;
import org.apache.commons.lang3.time.FastDateFormat;

import javax.swing.event.EventListenerList;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;


/**
* ###########################################################################################################
Expand All @@ -71,19 +77,48 @@ public FilmeSuchen() {
// für jeden Sender einen MediathekReader anlegen, mit der Prio ob
// sofort gestartet oder erst später
//Reader laden Spaltenweises Laden
mediathekListe.add(new ArdCrawler(this, 0));
mediathekListe.add(new ZdfCrawler(this, 0));
mediathekListe.add(new MediathekArte(this, 0));
mediathekListe.add(new DreiSatCrawler(this, 1));
mediathekListe.add(new DwCrawler(this, 0));
mediathekListe.add(new KikaApiCrawler(this, 0));
mediathekListe.add(new FunkCrawler(this, 0));
// Spalte 2
mediathekListe.add(new SrCrawler(this, 1));
mediathekListe.add(new SrfCrawler(this, 1));
mediathekListe.add(new MediathekSrfPod(this, 1));
mediathekListe.add(new OrfCrawler(this, 1));
mediathekListe.add(new PhoenixCrawler(this, 1));
List<String> crawlerList = Arrays.asList(MserverDaten.system[MserverKonstanten.SYSTEM_CRAWLER_LIST_NR].split(","));
if (MserverDaten.system[MserverKonstanten.SYSTEM_CRAWLER_LIST_NR].isEmpty()) {
crawlerList = new ArrayList<>(Arrays.asList("ARD","ZDF","ARTE","DW","KIKA","FUNK","3SAT","SR","SRF","SRFPOD","ORF","PHONIX"));
}

if (crawlerList.contains("ARD")) {
mediathekListe.add(new ArdCrawler(this, 0));
}
if (crawlerList.contains("ZDF")) {
mediathekListe.add(new ZdfCrawler(this, 0));
}
if (crawlerList.contains("ARTE")) {
mediathekListe.add(new MediathekArte(this, 0));
}
if (crawlerList.contains("DW")) {
mediathekListe.add(new DwCrawler(this, 0));
}
if (crawlerList.contains("KIKA")) {
mediathekListe.add(new KikaApiCrawler(this, 0));
}
if (crawlerList.contains("FUNK")) {
mediathekListe.add(new FunkCrawler(this, 0));
}
if (crawlerList.contains("3SAT")) {
mediathekListe.add(new DreiSatCrawler(this, 1));
}
if (crawlerList.contains("SR")) {
mediathekListe.add(new SrCrawler(this, 1));
}
if (crawlerList.contains("SRF")) {
mediathekListe.add(new SrfCrawler(this, 1));
}
if (crawlerList.contains("SRFPOD")) {
mediathekListe.add(new MediathekSrfPod(this, 1));
}
if (crawlerList.contains("ORF")) {
mediathekListe.add(new OrfCrawler(this, 1));
}
if (crawlerList.contains("PHONIX")) {
mediathekListe.add(new PhoenixCrawler(this, 1));
}

}

public static String[] getNamenSender() {
Expand Down
74 changes: 74 additions & 0 deletions src/main/java/mServer/crawler/sender/ard/ArdUrlOptimizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package mServer.crawler.sender.ard;

import mServer.crawler.sender.base.UrlUtils;

import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

public class ArdUrlOptimizer {

private static final String BR_URL_1280 = "_X.mp4";
private static final String BR_URL_1920 = "_HD.mp4";
private static final String HR_URL_1280 = "1280x720-50p-3200kbit.mp4";
private static final String HR_URL_1920 = "1920x1080-50p-5000kbit.mp4";
private static final String NDR_URL_1280 = ".hd.mp4";
private static final String NDR_URL_1920 = ".1080.mp4";
private static final String RBB_URL_1280 = "hd1080-avc720.mp4";
private static final String RBB_URL_1920 = "hd1080-avc1080.mp4";
private static final String SR_URL_1280 = "_P.mp4";
private static final String SR_URL_1920 = "_H.mp4";
private static final String SWR_URL_1280 = ".xl.mp4";
private static final String SWR_URL_1920 = ".xxl.mp4";

private static final Map<String, String[]> HD_OPTIMIZE = new HashMap<>();

static {
HD_OPTIMIZE.put(BR_URL_1280, new String[] {BR_URL_1920});
HD_OPTIMIZE.put(HR_URL_1280, new String[] {HR_URL_1920});
HD_OPTIMIZE.put(NDR_URL_1280, new String[] {NDR_URL_1920});
HD_OPTIMIZE.put(RBB_URL_1280, new String[] {RBB_URL_1920});
HD_OPTIMIZE.put(SR_URL_1280, new String[] {SR_URL_1920});
HD_OPTIMIZE.put(SWR_URL_1280, new String[] {SWR_URL_1920});
}

public String optimizeHdUrl(final String url) {
String fullHdUrl = "";
if (url.contains("wdrmedien")) {
fullHdUrl = determineWdrFullHdUrl(url);
} else {
for (Map.Entry<String, String[]> entry : HD_OPTIMIZE.entrySet()) {
if (url.contains(entry.getKey())) {
for (String optimizeFragment : entry.getValue()) {
fullHdUrl = url.replace(entry.getKey(), optimizeFragment);
}
}
}
}

if (!fullHdUrl.isEmpty() && UrlUtils.existsUrl(fullHdUrl)) {
return fullHdUrl;
}

return url;
}

/**
* wdr urls uses the following pattern: the last part of the filename determines the quality this
* is the actual order: 1920, 480, 640, 960, 1280 to determine the 1920-url by the 1280-url
* substract 4
* example:
* 1280: https://wdrmedien-a.akamaihd.net/.../2625725_54085881.mp4
* 1920: https://wdrmedien-a.akamaihd.net/.../2625725_54085877.mp4
*/
private String determineWdrFullHdUrl(String url) {
final Optional<String> fileName = UrlUtils.getFileName(url);
if (fileName.isPresent()) {
final String s = fileName.get();
final String substring = s.substring(s.indexOf("_") + 1).replace(".mp4", "");
final int hdInt = Integer.parseInt(substring) - 4;
return url.replace(substring, Integer.toString(hdInt));
}
return url;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import java.util.stream.StreamSupport;
import mServer.crawler.sender.MediathekReader;
import mServer.crawler.sender.ard.ArdFilmUrlInfoDto;
import mServer.crawler.sender.ard.ArdUrlOptimizer;
import mServer.crawler.sender.base.JsonUtils;
import mServer.crawler.sender.base.UrlUtils;
import mServer.crawler.sender.base.Qualities;
import mServer.crawler.sender.swr.SwrUrlOptimizer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -42,12 +42,12 @@ public class ArdMediaArrayToDownloadUrlsConverter {

private static final String FILE_TYPE_F4M = "f4m";

private final SwrUrlOptimizer ardOptimizer;
private final ArdUrlOptimizer ardOptimizer;
private final Map<Qualities, Set<ArdFilmUrlInfoDto>> urls;
private MediathekReader crawler;

public ArdMediaArrayToDownloadUrlsConverter() {
ardOptimizer = new SwrUrlOptimizer();
ardOptimizer = new ArdUrlOptimizer();
urls = new EnumMap<>(Qualities.class);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Optional;
Expand All @@ -16,12 +17,16 @@
public class ArdTopicsOverviewDeserializer implements JsonDeserializer<Set<CrawlerUrlDTO>> {

private static final String ELEMENT_COMPILATIONS = "compilations";
private static final String ELEMENT_PUBLICATION_SERVICE = "publicationService";
private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";
private static final String ELEMENT_WIDGETS = "widgets";

private static final String ATTRIBUTE_ID = "id";
private static final String ATTRIBUTE_NAME = "name";

private static final String[] IGNORED_SENDER = new String[] {"zdf", "kika", "3sat", "arte"};

@Override
public Set<CrawlerUrlDTO> deserialize(JsonElement jsonElement, Type type,
Expand Down Expand Up @@ -71,10 +76,29 @@ private Set<CrawlerUrlDTO> parseLetter(final JsonObject letterObject) {
id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
}

id.ifPresent(s -> results.add(new CrawlerUrlDTO(
(ArdConstants.TOPIC_URL).formatted(s, ArdConstants.TOPIC_PAGE_SIZE))));
if (isRelevant(teaserObject)) {
id.ifPresent(s -> results.add(new CrawlerUrlDTO(
(ArdConstants.TOPIC_URL).formatted(s, ArdConstants.TOPIC_PAGE_SIZE))));
}
}

return results;
}


private boolean isRelevant(final JsonObject teaserObject) {
if (teaserObject.has(ELEMENT_PUBLICATION_SERVICE)) {
final JsonObject publicationService =
teaserObject.get(ELEMENT_PUBLICATION_SERVICE).getAsJsonObject();
final Optional<String> attributeAsString =
JsonUtils.getAttributeAsString(publicationService, ATTRIBUTE_NAME);
if (attributeAsString.isPresent()) {

return !Arrays.stream(IGNORED_SENDER)
.anyMatch(sender -> sender.equalsIgnoreCase(attributeAsString.get()));
}
}

return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ public class ArdTopicPageTask extends ArdTaskBase<ArdFilmInfoDto, CrawlerUrlDTO>
// Sportschau
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL3Nwb3J0c2NoYXU");
// temporary load all => remove if old entries exists
// Morden im Norden
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL21vcmRlbi1pbS1ub3JkZW4");
// Babylon Berlin
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu");
// Watzmann ermittelt
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2Rhc2Vyc3RlLmRlL3dhdHptYW5uLWVybWl0dGVsdA");
// Odysso
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3N3ci5kZS8yMjI2MTE0");
// MDR in aller Freundschaft
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0");
// Super.markt
TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3JiYi1vbmxpbmUuZGUvc3VwZXJtYXJrdA");
}

public ArdTopicPageTask(MediathekReader aCrawler,
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/mServer/crawler/sender/arte/MediathekArte.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ protected synchronized void meldungStart() {

senderLanguages.put(Const.ARTE_DE, "de");
senderLanguages.put(Const.ARTE_FR, "fr");
/*if (LocalDate.now().getDayOfYear() % 2 == 0) {
if (LocalDate.now().getDayOfYear() % 2 == 0) {
senderLanguages.put(ARTE_EN, "en");
senderLanguages.put(ARTE_ES, "es");
} else {
senderLanguages.put(ARTE_IT, "it");
senderLanguages.put(ARTE_PL, "pl");
}*/
}

// starte Sprachen Sender, da es sonst zu doppelten Sendern kommen kann
senderLanguages.keySet().forEach(sender -> mlibFilmeSuchen.melden(sender, getMax(), getProgress(), ""));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import jakarta.ws.rs.core.Response;
import mServer.crawler.CrawlerTool;
import mServer.crawler.sender.MediathekReader;
import mServer.crawler.sender.ard.ArdUrlOptimizer;
import mServer.crawler.sender.base.AbstractJsonRestTask;
import mServer.crawler.sender.base.AbstractRecursivConverterTask;
import mServer.crawler.sender.base.GeoLocations;
Expand All @@ -17,6 +18,7 @@
import mServer.crawler.sender.kika.KikaApiVideoInfoDto;
import mServer.crawler.sender.kika.Resolution;
import mServer.crawler.sender.kika.json.KikaApiVideoInfoPageDeserializer;
import mServer.crawler.sender.zdf.ZdfVideoUrlOptimizer;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

Expand All @@ -36,9 +38,13 @@ public class KikaApiFilmTask extends AbstractJsonRestTask<DatenFilm, KikaApiVide
private static final long serialVersionUID = 1L;
private static final Logger LOG = LogManager.getLogger(KikaApiFilmTask.class);
private static final RateLimiter LIMITER = RateLimiter.create(15);
private transient ArdUrlOptimizer ardUrlOptimizer;
private transient ZdfVideoUrlOptimizer zdfVideoUrlOptimizer;

public KikaApiFilmTask(MediathekReader crawler, ConcurrentLinkedQueue<KikaApiFilmDto> urlToCrawlDTOs) {
super(crawler, urlToCrawlDTOs, Optional.empty());
ardUrlOptimizer = new ArdUrlOptimizer();
zdfVideoUrlOptimizer = new ZdfVideoUrlOptimizer();
}

@Override
Expand Down Expand Up @@ -113,7 +119,10 @@ protected void postProcessing(KikaApiVideoInfoDto aResponseObj, KikaApiFilmDto a
CrawlerTool.addUrlKlein(aFilm, videoUrls.get(Resolution.SMALL));
}
if (videoUrls.containsKey(Resolution.HD)) {
CrawlerTool.addUrlHd(aFilm, videoUrls.get(Resolution.HD));
String url = videoUrls.get(Resolution.HD);
url = ardUrlOptimizer.optimizeHdUrl(url);
url = zdfVideoUrlOptimizer.getOptimizedUrlHd(url);
CrawlerTool.addUrlHd(aFilm, url);
}
//
getGeo(aDTO).ifPresent(geos -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ private static Optional<Duration> parseDuration(Document aDocument) {
}

ChronoUnit unitValue = unit.get();
if (unitValue == ChronoUnit.MINUTES) {
if (unitValue == ChronoUnit.SECONDS || unitValue == ChronoUnit.MINUTES) {
return Optional.of(
Duration.ofMinutes(Long.parseLong(parts[0]))
.plusSeconds(Long.parseLong(parts[1]))
Expand All @@ -212,6 +212,9 @@ private static Optional<ChronoUnit> determineChronoUnit(String aDuration) {
if (aDuration.contains("Std.")) {
return Optional.of(ChronoUnit.HOURS);
}
if (aDuration.contains("Sek.")) {
return Optional.of(ChronoUnit.SECONDS);
}

return Optional.empty();
}
Expand Down
Loading

0 comments on commit 46f4e65

Please sign in to comment.