Skip to content

Commit

Permalink
fix(FeedExpander): if parse fails, include offending url in exception…
Browse files Browse the repository at this point in the history
… message (#3938)

Also some refactors
  • Loading branch information
dvikan authored Jan 29, 2024
1 parent b2c8475 commit d01c462
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 41 deletions.
28 changes: 16 additions & 12 deletions actions/DetectAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ class DetectAction implements ActionInterface
{
public function execute(Request $request)
{
$targetURL = $request->get('url');
$url = $request->get('url');
$format = $request->get('format');

if (!$targetURL) {
throw new \Exception('You must specify a url!');
if (!$url) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url']));
}
if (!$format) {
throw new \Exception('You must specify a format!');
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
}

$bridgeFactory = new BridgeFactory();
Expand All @@ -23,19 +23,23 @@ public function execute(Request $request)

$bridge = $bridgeFactory->create($bridgeClassName);

$bridgeParams = $bridge->detectParameters($targetURL);
$bridgeParams = $bridge->detectParameters($url);

if (is_null($bridgeParams)) {
if (!$bridgeParams) {
continue;
}

$bridgeParams['bridge'] = $bridgeClassName;
$bridgeParams['format'] = $format;

$url = '?action=display&' . http_build_query($bridgeParams);
return new Response('', 301, ['location' => $url]);
$query = [
'action' => 'display',
'bridge' => $bridgeClassName,
'format' => $format,
];
$query = array_merge($query, $bridgeParams);
return new Response('', 301, ['location' => '?' . http_build_query($query)]);
}

throw new \Exception('No bridge found for given URL: ' . $targetURL);
return new Response(render(__DIR__ . '/../templates/error.html.php', [
'message' => 'No bridge found for given URL: ' . $url,
]));
}
}
6 changes: 3 additions & 3 deletions actions/FindfeedAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ class FindfeedAction implements ActionInterface
{
public function execute(Request $request)
{
$targetURL = $request->get('url');
$url = $request->get('url');
$format = $request->get('format');

if (!$targetURL) {
if (!$url) {
return new Response('You must specify a url', 400);
}
if (!$format) {
Expand All @@ -29,7 +29,7 @@ public function execute(Request $request)

$bridge = $bridgeFactory->create($bridgeClassName);

$bridgeParams = $bridge->detectParameters($targetURL);
$bridgeParams = $bridge->detectParameters($url);

if ($bridgeParams === null) {
continue;
Expand Down
11 changes: 8 additions & 3 deletions bridges/BMDSystemhausBlogBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,17 @@ public function collectData()
//-----------------------------------------------------
public function detectParameters($url)
{
$parsed_url = parse_url($url);
if ($parsed_url['host'] != 'www.bmd.com') {
try {
$parsedUrl = Url::fromString($url);
} catch (UrlException $e) {
return null;
}

$path = explode('/', $parsed_url['path']);
if ($parsedUrl->getHost() != 'www.bmd.com') {
return null;
}

$path = explode('/', $parsedUrl->getPath());

if ($this->getURIbyCountry($path[1]) == '') {
return null;
Expand Down
40 changes: 20 additions & 20 deletions bridges/FirefoxAddonsBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,6 @@ class FirefoxAddonsBridge extends BridgeAbstract
const CACHE_TIMEOUT = 3600;

private $feedName = '';
private $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
private $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
private $outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';

private $urlRegex = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';

public function detectParameters($url)
{
$params = [];

if (preg_match($this->urlRegex, $url, $matches)) {
$params['id'] = $matches[1];
return $params;
}

return null;
}

public function collectData()
{
Expand All @@ -52,7 +35,8 @@ public function collectData()
$item['uri'] = $this->getURI();
$item['author'] = $author;

if (preg_match($this->releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
$releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
if (preg_match($releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
$item['timestamp'] = $match[1];
$size = $match[2];
}
Expand All @@ -68,7 +52,8 @@ public function collectData()

$releaseNotes = $this->removeLinkRedirects($li->find('div.AddonVersionCard-releaseNotes', 0));

if (preg_match($this->xpiFileRegex, $downloadlink, $match)) {
$xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
if (preg_match($xpiFileRegex, $downloadlink, $match)) {
$xpiFilename = $match[0];
}

Expand Down Expand Up @@ -110,10 +95,25 @@ public function getName()
*/
private function removeLinkRedirects($html)
{
$outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
foreach ($html->find('a') as $a) {
$a->href = urldecode(preg_replace($this->outgoingRegex, '', $a->href));
$a->href = urldecode(preg_replace($outgoingRegex, '', $a->href));
}

return $html->innertext;
}

public function detectParameters($url)
{
$params = [];

// Example: https://addons.mozilla.org/en-US/firefox/addon/ublock-origin
$pattern = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
if (preg_match($pattern, $url, $matches)) {
$params['id'] = $matches[1];
return $params;
}

return null;
}
}
12 changes: 9 additions & 3 deletions lib/FeedExpander.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,20 @@ public function collectExpandableDatas(string $url, $maxItems = -1)
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
}
// prepare/massage the xml to make it more acceptable
$badStrings = [
$problematicStrings = [
' ',
'»',
'’',
];
$xmlString = str_replace($badStrings, '', $xmlString);
$xmlString = str_replace($problematicStrings, '', $xmlString);

$feedParser = new FeedParser();
$this->feed = $feedParser->parseFeed($xmlString);
try {
$this->feed = $feedParser->parseFeed($xmlString);
} catch (\Exception $e) {
throw new \Exception(sprintf('Failed to parse xml from %s: %s', $url, create_sane_exception_message($e)));
}

$items = array_slice($this->feed['items'], 0, $maxItems);
// todo: extract parse logic out from FeedParser
foreach ($items as $item) {
Expand Down

0 comments on commit d01c462

Please sign in to comment.