forked from itohsnap/ttrss_fullpost
-
Notifications
You must be signed in to change notification settings - Fork 4
/
init.php
235 lines (194 loc) · 7.32 KB
/
init.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
<?php
// Initial version of this plugin: https://github.com/atallo/ttrss_fullpost/
// Relies on PHP-Readability by fivefilters.org: http://code.fivefilters.org/php-readability/
// This Version is changed by ManuelW to get ALL! feeds with fulltext, except this in comma separated list
// Start Code
class Af_Fullpost extends Plugin implements IHandler
{
private $host;
function about() {
return array(0.01,
"Full post for ALL articles (requires CURL).",
"ManuelW");
}
function api_version() {
return 2;
}
function init($host) {
$this->host = $host;
$host->add_hook($host::HOOK_PREFS_TABS, $this);
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
}
function hook_article_filter($article) {
if (!function_exists("curl_init"))
return $article;
// do not process an article more than once
if (strpos($article['plugin_data'], "fullpost,$owner_uid:") !== false) {
if (isset($article['stored']['content'])) $article['content'] = $article['stored']['content'];
break;
}
$json_conf = $this->host->get($this, 'json_conf');
$showInfoEnabled = sql_bool_to_bool($this->host->get($this, "af_fullpost_showinfo", bool_to_sql_bool(TRUE)));
$owner_uid = $article['owner_uid'];
// get url's for exclusion
$data = explode(',', str_replace(",,", ",", str_replace("\n", ",", $json_conf)));
//$data = explode(",", $json_conf);
try {
// if there is some stuff in the array
if (is_array($data)) {
// check url for excluded
foreach ($data as $urlpart) {
if (stripos($article['link'], trim($urlpart)) !== false) {
$check_content = "Skipped";
break;
}
}
}
// if the array is empty or url in list
if ($check_content != "Skipped") {
$check_content = $this->get_full_post($article['link']);
}
// If enabled print some information if content was processed by readability
if ($check_content != "Failed" && $check_content != "Skipped" && trim($check_content) != "") {
$article['content'] = $check_content;
if ($showInfoEnabled === True) $article['content'] .= "<br>Processed by Readability";
}
elseif ($check_content == "Skipped") {
$article['content'] = $article['content'];
if ($showInfoEnabled === True) $article['content'] .= "<br>You skipped Readability";
}
else {
$article['content'] = $article['content'];
if ($showInfoEnabled === True) $article['content'] .= "<br>Failed processing by Readability";
}
} catch (Exception $e) {
// Readability failed to parse the page (?); don't process this article and keep going
$article['content'] = $article['content'] . "<br>ERROR processing by Readability<br>" . $e;
}
// clean links without http, some sites do <img src="//www.site.com"> for safe to get images with http and https
$toClean = array("\"//");
$article["content"] = str_replace($toClean, "\"http://", $article["content"], $count);
if ($showInfoEnabled === True) {
$article['content'] .= " + " . $count . " Replacements";
}
// mark article as processed
$article['plugin_data'] = "fullpost,$owner_uid:" . $article['plugin_data'];
return $article;
}
private function get_full_post($request_url) {
try {
try {
$handle = curl_init();
curl_setopt_array($handle, array(
CURLOPT_USERAGENT => "Tiny Tiny RSS",
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HEADER => false,
CURLOPT_HTTPGET => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_TIMEOUT => 30,
CURLOPT_URL => $request_url
));
$source = curl_exec($handle);
curl_close($handle);
}
catch (Exception $e) {
$source = file_get_contents($request_url);
}
// fix encoding -> done by itohsnap: https://github.com/itohsnap/ttrss_fullpost/commit/815e163b724fbfb426eff43bde6c3aa744a22ae5
preg_match("/charset=([\w|\-]+);?/", $source, $match);
$charset = isset($match[1]) ? $match[1] : 'utf-8';
$source = mb_convert_encoding($source, 'UTF-8', $charset);
// Clean with tidy, if exists
if (function_exists('tidy_parse_string')) {
$tidy = tidy_parse_string($source, array(), 'UTF8');
$tidy->cleanRepair();
$source = $tidy->value;
}
// get the Text
require_once 'Readability.php';
$readability = new Readability($source);
$readability->debug = false;
$readability->convertLinksToFootnotes = false;
$result = $readability->init();
$content = $readability->getContent()->innerHTML;
// if we've got Tidy, let's clean it up for output
if (function_exists('tidy_parse_string')) {
$tidy = tidy_parse_string($content, array('indent'=>true, 'show-body-only' => true), 'UTF8');
$tidy->cleanRepair();
$content = $tidy->value;
}
$Data['content'] = $content;
}
catch (Exception $e) {
// do nothing if it dont grep fulltext succesfully
}
return $Data['content'];
}
function hook_prefs_tabs($args)
{
print '<div id="fullpostConfigTab" dojoType="dijit.layout.ContentPane"
href="backend.php?op=af_fullpost"
title="' . __('Exclude FullPost') . '"></div>';
}
function index()
{
$pluginhost = PluginHost::getInstance();
$json_conf = $pluginhost->get($this, 'json_conf');
$showInfoEnabled = $pluginhost->get($this, 'af_fullpost_showinfo');
if ($showInfoEnabled) {
$fullPostChecked = "checked=\"1\"";
} else {
$fullPostChecked = "";
}
print "<p>Comma-separated list or one address per lin of webaddresses, for which you don't would fetch the full post.<br>Example: site1.com, site2.org, site3.de</p>";
print "<form dojoType=\"dijit.form.Form\">";
print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
evt.preventDefault();
if (this.validate()) {
new Ajax.Request('backend.php', {
parameters: dojo.objectToQuery(this.getValues()),
onComplete: function(transport) {
if (transport.responseText.indexOf('error')>=0) notify_error(transport.responseText);
else notify_info(transport.responseText);
}
});
//this.reset();
}
</script>";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"op\" value=\"pluginhandler\">";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"method\" value=\"save\">";
print "<input dojoType=\"dijit.form.TextBox\" style=\"display : none\" name=\"plugin\" value=\"af_fullpost\">";
print "<table width='100%'><tr><td>";
print "Show processed by Readability info on article bottom: <input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" name=\"af_fullpost_showinfo\" id=\"af_fullpost_showinfo\" $fullPostChecked>";
print "</tr></td>";
print "<tr><td>";
print "<textarea dojoType=\"dijit.form.SimpleTextarea\" name=\"json_conf\" style=\"font-size: 12px; width: 99%; height: 500px;\">$json_conf</textarea>";
print "</td></tr></table>";
print "<p><button dojoType=\"dijit.form.Button\" type=\"submit\">".__("Save")."</button>";
print "</form>";
}
function save()
{
$json_conf = $_POST['json_conf'];
$this->host->set($this, 'json_conf', $json_conf);
$this->host->set($this, "af_fullpost_showinfo", checkbox_to_sql_bool($_POST["af_fullpost_showinfo"]));
echo __("Configuration saved.");
}
function csrf_ignore($method)
{
$csrf_ignored = array("index", "edit");
return array_search($method, $csrf_ignored) !== false;
}
function before($method)
{
if ($_SESSION["uid"]) {
return true;
}
return false;
}
function after()
{
return true;
}
}
?>