Skip to content

Commit

Permalink
Merge pull request #958 from mediathekview/feature/ard_topics
Browse files Browse the repository at this point in the history
ard: use new experimental topics-urls
  • Loading branch information
pidoubleyou authored Feb 7, 2024
2 parents 462eb30 + 5921d9e commit e3ef3f9
Show file tree
Hide file tree
Showing 12 changed files with 1,417 additions and 134 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ public class ArdConstants {

public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/";

public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/shows/";
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false";
public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/radiobremen/editorials/%s?pageNumber=0&pageSize=%s";
public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d";
public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d";

public static final int DAY_PAGE_SIZE = 100;
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
public static final int TOPIC_PAGE_SIZE = 50;

public static final String DEFAULT_CLIENT = "ard";
Expand Down
27 changes: 18 additions & 9 deletions src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import de.mediathekview.mlib.messages.listener.MessageListener;
import de.mediathekview.mserver.base.config.MServerConfigManager;
import de.mediathekview.mserver.base.messages.ServerMessages;
import de.mediathekview.mserver.crawler.ard.tasks.ArdDayPageTask;
import de.mediathekview.mserver.crawler.ard.tasks.ArdFilmDetailTask;
import de.mediathekview.mserver.crawler.ard.tasks.ArdTopicPageTask;
import de.mediathekview.mserver.crawler.ard.tasks.ArdTopicsOverviewTask;
import de.mediathekview.mserver.crawler.ard.tasks.*;
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
Expand Down Expand Up @@ -76,6 +73,7 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {
for (final ForkJoinTask<Set<CrawlerUrlDTO>> senderTopicTask : senderTopicTasks) {
senderTopicUrls.addAll(senderTopicTask.get());
}
LOG.debug("sender topic tasks: {}", senderTopicUrls.size());
final ArdTopicPageTask topicTask =
new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(senderTopicUrls));
final int showsCountBefore = shows.size();
Expand All @@ -99,16 +97,27 @@ protected RecursiveTask<Set<Film>> createCrawlerTask() {

private Set<ForkJoinTask<Set<CrawlerUrlDTO>>> createSenderTopicTasks() {
final Set<ForkJoinTask<Set<CrawlerUrlDTO>>> topicTasks = new HashSet<>();
topicTasks.add(getTopicEntriesBySender(ArdConstants.DEFAULT_CLIENT));
try {
topicTasks.add(getTopicEntriesBySender(ArdConstants.DEFAULT_CLIENT));
} catch (ExecutionException | InterruptedException e) {
LOG.error("exception sender topic {}", ArdConstants.DEFAULT_CLIENT, e);
}
for (final String client : ArdConstants.CLIENTS) {
topicTasks.add(getTopicEntriesBySender(client));
try {
topicTasks.add(getTopicEntriesBySender(client));
} catch (ExecutionException | InterruptedException e) {
LOG.error("exception sender topic {}", client, e);
}
}
return topicTasks;
}

private ForkJoinTask<Set<CrawlerUrlDTO>> getTopicEntriesBySender(final String sender) {
return forkJoinPool.submit(
new ArdTopicsOverviewTask(this, sender, createTopicsOverviewUrl(sender)));
private ForkJoinTask<Set<CrawlerUrlDTO>> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
Set<CrawlerUrlDTO> senderTopics = forkJoinPool.submit(
new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender))).get();

LOG.debug("topics task result {}", senderTopics.size());
return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderTopics)));
}

private Queue<CrawlerUrlDTO> createTopicsOverviewUrl(final String client) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package de.mediathekview.mserver.crawler.ard;

import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;

import java.util.HashSet;
import java.util.Set;

public class PaginationUrlDto {
private final Set<CrawlerUrlDTO> urls = new HashSet<>();
private int actualPage;
private int maxPages;

public void addUrl(CrawlerUrlDTO url) {
urls.add(url);
}

public void addAll(Set<CrawlerUrlDTO> urls) {
this.urls.addAll(urls);
}

public Set<CrawlerUrlDTO> getUrls() {
return urls;
}

public int getActualPage() {
return actualPage;
}

public int getMaxPages() {
return maxPages;
}

public void setActualPage(int actualPage) {
this.actualPage = actualPage;
}

public void setMaxPages(int maxPages) {
this.maxPages = maxPages;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.JsonArray;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
import java.lang.reflect.Type;
import java.util.*;

public class ArdTopicsDeserializer implements JsonDeserializer<Set<CrawlerUrlDTO>> {
private static final String ELEMENT_WIDGETS = "widgets";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_SELF = "self";

private static final String ATTRIBUTE_ID = "id";

@Override
public Set<CrawlerUrlDTO> deserialize(
JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) {
final Set<CrawlerUrlDTO> result = new HashSet<>();

if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) {
final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS);
widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add));
}

return result;
}

private Optional<CrawlerUrlDTO> parseWidget(final JsonElement compilation) {
if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) {
final JsonElement selfLink =
compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF);
final Optional<String> id =
JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID);
if (id.isPresent()) {
return Optional.of(
new CrawlerUrlDTO(
String.format(
ArdConstants.TOPICS_COMPILATION_URL,
id.get(),
ArdConstants.TOPICS_COMPILATION_PAGE_SIZE)));
}
}

return Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.ard.PaginationUrlDto;
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;

import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;

public class ArdTopicsLetterDeserializer implements JsonDeserializer<PaginationUrlDto> {

private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";
private static final String ELEMENT_PAGE_NUMBER = "pageNumber";
private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements";
private static final String ELEMENT_PAGE_SIZE = "pageSize";
private static final String ELEMENT_PAGINATION = "pagination";

private static final String ATTRIBUTE_ID = "id";

@Override
public PaginationUrlDto deserialize(
final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) {
final PaginationUrlDto results = new PaginationUrlDto();

if (!jsonElement.getAsJsonObject().has(ELEMENT_TEASERS)
|| !jsonElement.getAsJsonObject().get(ELEMENT_TEASERS).isJsonArray()
|| jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).isEmpty()) {
return results;
}

jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).forEach(teaser -> results.addAll(parseTeaser(teaser.getAsJsonObject())));

final JsonElement paginationElement = jsonElement.getAsJsonObject().get(ELEMENT_PAGINATION);
results.setActualPage(getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER));
final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS);
final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE);
int maxPageSize = pageSize == 0 ? 0 :
(totalElements+pageSize-1)/pageSize;
results.setMaxPages(maxPageSize);

return results;
}

private int getChildElementAsIntOrNullIfNotExist(
final JsonElement parentElement, final String childElementName) {
if (parentElement == null || parentElement.isJsonNull()) {
return 0;
}
return getJsonElementAsIntOrNullIfNotExist(
parentElement.getAsJsonObject().get(childElementName));
}

private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) {
if (element.isJsonNull()) {
return 0;
}
return element.getAsInt();
}

private Set<CrawlerUrlDTO> parseTeaser(final JsonObject teaserObject) {
final Set<CrawlerUrlDTO> results = new HashSet<>();

final Optional<String> id;

if (JsonUtils.checkTreePath(teaserObject, null, ELEMENT_LINKS, ELEMENT_TARGET)) {
final JsonObject targetObject =
teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject();
id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID);
} else {
id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
}

id.ifPresent(
nonNullId ->
results.add(
new CrawlerUrlDTO(
String.format(
ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE))));

return results;
}
}

This file was deleted.

Loading

0 comments on commit e3ef3f9

Please sign in to comment.