-
Notifications
You must be signed in to change notification settings - Fork 385
/
Copy pathclass-amp-image-dimension-extractor.php
417 lines (357 loc) · 13.5 KB
/
class-amp-image-dimension-extractor.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
<?php
/**
* Class AMP_Image_Dimension_Extractor
*
* @package AMP
*/
/**
* Class with static methods to extract image dimensions.
*
* @internal
*/
class AMP_Image_Dimension_Extractor {
const STATUS_FAILED_LAST_ATTEMPT = 'failed';
const STATUS_IMAGE_EXTRACTION_FAILED = 'failed';
/**
* Internal flag whether callbacks have been registered.
*
* @var bool
*/
private static $callbacks_registered = false;
/**
* Extracts dimensions from image URLs.
*
* @since 0.2
*
* @param array|string $urls Array of URLs to extract dimensions from, or a single URL string.
* @return array|string Extracted dimensions keyed by original URL, or else the single set of dimensions if one URL string is passed.
*/
public static function extract( $urls ) {
if ( ! self::$callbacks_registered ) {
self::register_callbacks();
}
$return_dimensions = [];
// Back-compat for users calling this method directly.
$is_single = is_string( $urls );
if ( $is_single ) {
$urls = [ $urls ];
}
// Normalize URLs and also track a map of normalized-to-original as we'll need it to reformat things when returning the data.
$url_map = [];
$normalized_urls = [];
foreach ( $urls as $original_url ) {
$normalized_url = self::normalize_url( $original_url );
if ( false !== $normalized_url ) {
$url_map[ $original_url ] = $normalized_url;
$normalized_urls[] = $normalized_url;
} else {
// This is not a URL we can extract dimensions from, so default to false.
$return_dimensions[ $original_url ] = false;
}
}
$extracted_dimensions = array_fill_keys( $normalized_urls, false );
/**
* Filters the dimensions extracted from image URLs.
*
* @since 0.5.1
*
* @param array $extracted_dimensions Extracted dimensions, initially mapping images URLs to false.
*/
$extracted_dimensions = apply_filters( 'amp_extract_image_dimensions_batch', $extracted_dimensions );
// We need to return a map with the original (un-normalized URL) as we that to match nodes that need dimensions.
foreach ( $url_map as $original_url => $normalized_url ) {
$return_dimensions[ $original_url ] = $extracted_dimensions[ $normalized_url ];
}
// Back-compat: just return the dimensions, not the full mapped array.
if ( $is_single ) {
return current( $return_dimensions );
}
return $return_dimensions;
}
/**
* Normalizes the given URL.
*
* This method ensures the URL has a scheme and, if relative, is prepended the WordPress site URL.
*
* @param string $url URL to normalize.
* @return string|false Normalized URL, or false if normalization failed.
*/
public static function normalize_url( $url ) {
if ( empty( $url ) ) {
return false;
}
if ( 0 === strpos( $url, 'data:' ) ) {
return false;
}
$normalized_url = $url;
if ( 0 === strpos( $url, '//' ) ) {
$normalized_url = set_url_scheme( $url, 'http' );
} else {
$parsed = wp_parse_url( $url );
if ( ! isset( $parsed['host'] ) ) {
$path = '';
if ( isset( $parsed['path'] ) ) {
$path .= $parsed['path'];
}
if ( isset( $parsed['query'] ) ) {
$path .= '?' . $parsed['query'];
}
$home = home_url();
$home_path = wp_parse_url( $home, PHP_URL_PATH );
if ( ! empty( $home_path ) ) {
$home = substr( $home, 0, - strlen( $home_path ) );
}
$normalized_url = $home . $path;
}
}
/**
* Apply filters on the normalized image URL for dimension extraction.
*
* @since 1.1
*
* @param string $normalized_url Normalized image URL.
* @param string $url Original image URL.
*/
$normalized_url = apply_filters( 'amp_normalized_dimension_extractor_image_url', $normalized_url, $url );
return $normalized_url;
}
/**
* Registers the necessary callbacks.
*/
private static function register_callbacks() {
self::$callbacks_registered = true;
add_filter( 'amp_extract_image_dimensions_batch', [ __CLASS__, 'extract_by_filename_or_filesystem' ], 100 );
add_filter( 'amp_extract_image_dimensions_batch', [ __CLASS__, 'extract_by_downloading_images' ], 999, 1 );
/**
* Fires after the amp_extract_image_dimensions_batch filter has been added to extract by downloading images.
*
* @since 0.5.1
*/
do_action( 'amp_extract_image_dimensions_batch_callbacks_registered' );
}
/**
* Extract dimensions from filename if dimension exists or from file system.
*
* @param array $dimensions Image urls mapped to dimensions.
* @return array Dimensions mapped to image urls, or false if they could not be retrieved
*/
public static function extract_by_filename_or_filesystem( $dimensions ) {
if ( empty( $dimensions ) || ! is_array( $dimensions ) ) {
return [];
}
$using_ext_object_cache = wp_using_ext_object_cache();
$ext_types = wp_get_ext_types();
if ( empty( $ext_types['image'] ) ) {
return $dimensions;
}
$image_ext_types = $ext_types['image'];
unset( $ext_types );
$upload_dir = wp_get_upload_dir();
$base_upload_uri = trailingslashit( $upload_dir['baseurl'] );
$base_upload_dir = trailingslashit( $upload_dir['basedir'] );
foreach ( $dimensions as $url => $value ) {
// Check whether some other callback attached to the filter already provided dimensions for this image.
if ( ! empty( $value ) && is_array( $value ) ) {
continue;
}
$url_without_query_fragment = strtok( $url, '?#' );
// Parse info out of the URL, including the file extension and (optionally) the dimensions.
if ( ! preg_match( '/(?:-(?<width>[1-9][0-9]*)x(?<height>[1-9][0-9]*))?\.(?<ext>\w+)$/', $url_without_query_fragment, $matches ) ) {
continue;
}
// Skip images don't have recognized extensions.
if ( ! in_array( strtolower( $matches['ext'] ), $image_ext_types, true ) ) {
continue;
}
// Use image dimension from the file name.
if ( ! empty( $matches['width'] ) && ! empty( $matches['height'] ) ) {
$dimensions[ $url ] = [
'width' => (int) $matches['width'],
'height' => (int) $matches['height'],
];
continue;
}
// Verify that the URL is for an uploaded file.
if ( 0 !== strpos( $url_without_query_fragment, $base_upload_uri ) ) {
continue;
}
$upload_relative_path = substr( $url_without_query_fragment, strlen( $base_upload_uri ) );
// Bail if the URL contains relative paths.
if ( 0 !== validate_file( $upload_relative_path ) ) {
continue;
}
// Get image dimension from file system.
$image_file = $base_upload_dir . $upload_relative_path;
$image_size = [];
list( $transient_name ) = self::get_transient_names( $url );
// When using an external object cache, try to first see if dimensions have already been obtained. This is
// not done for a non-external object cache (i.e. when wp_options is used for transients) because then
// we are not storing the dimensions in a transient, because it is more performant to read the dimensions
// from the filesystem--both in terms of time and storage--than to store dimensions in wp_options.
if ( $using_ext_object_cache ) {
$image_size = get_transient( $transient_name );
$image_size = ( ! empty( $image_size ) && is_array( $image_size ) ) ? $image_size : [];
}
if ( empty( $image_size ) && file_exists( $image_file ) ) {
if ( function_exists( 'wp_getimagesize' ) ) {
$image_size = wp_getimagesize( $image_file );
} elseif ( function_exists( 'getimagesize' ) ) {
$image_size = @getimagesize( $image_file ); // phpcs:ignore WordPress.PHP.NoSilencedErrors
}
if ( $using_ext_object_cache && ! empty( $image_size ) && is_array( $image_size ) ) {
set_transient( $transient_name, $image_size );
}
}
if ( ! empty( $image_size ) && is_array( $image_size ) ) {
$dimensions[ $url ] = [
'width' => (int) $image_size[0],
'height' => (int) $image_size[1],
];
}
}
return $dimensions;
}
/**
* Get transient names.
*
* @param string $url Image URL.
* @return array {
* @type string $0 Transient name for storing dimensions.
* @type string $1 Transient name for image fetching lock.
* }
*/
private static function get_transient_names( $url ) {
$url_hash = md5( $url );
return [
sprintf( 'amp_img_%s', $url_hash ),
sprintf( 'amp_lock_%s', $url_hash ),
];
}
/**
* Extract dimensions from downloaded images (or transient/cached dimensions from downloaded images)
*
* @param array $dimensions Image urls mapped to dimensions.
* @param bool $mode Deprecated.
* @return array Dimensions mapped to image urls, or false if they could not be retrieved
*/
public static function extract_by_downloading_images( $dimensions, $mode = false ) {
if ( $mode ) {
_deprecated_argument( __METHOD__, 'AMP 1.1' );
}
$transient_expiration = 30 * DAY_IN_SECONDS;
$urls_to_fetch = [];
$images = [];
self::determine_which_images_to_fetch( $dimensions, $urls_to_fetch );
try {
self::fetch_images( $urls_to_fetch, $images );
self::process_fetched_images( $urls_to_fetch, $images, $dimensions, $transient_expiration );
} catch ( Exception $exception ) {
trigger_error( esc_html( $exception->getMessage() ), E_USER_WARNING ); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_trigger_error
}
return $dimensions;
}
/**
* Determine which images to fetch by checking for dimensions in transient/cache.
* Creates a short lived transient that acts as a semaphore so that another visitor
* doesn't trigger a remote fetch for the same image at the same time.
*
* @param array $dimensions Image urls mapped to dimensions.
* @param array $urls_to_fetch Urls of images to fetch because dimensions are not in transient/cache.
*/
private static function determine_which_images_to_fetch( &$dimensions, &$urls_to_fetch ) {
foreach ( $dimensions as $url => $value ) {
// Check whether some other callback attached to the filter already provided dimensions for this image.
if ( is_array( $value ) || empty( $url ) ) {
continue;
}
list( $transient_name, $transient_lock_name ) = self::get_transient_names( $url );
$cached_dimensions = get_transient( $transient_name );
// If we're able to retrieve the dimensions from a transient, set them and move on.
if ( is_array( $cached_dimensions ) ) {
$dimensions[ $url ] = [
'width' => $cached_dimensions[0],
'height' => $cached_dimensions[1],
];
continue;
}
// If the value in the transient reflects we couldn't get dimensions for this image the last time we tried, move on.
if ( self::STATUS_FAILED_LAST_ATTEMPT === $cached_dimensions ) {
$dimensions[ $url ] = false;
continue;
}
// If somebody is already trying to extract dimensions for this transient right now, move on.
if ( false !== get_transient( $transient_lock_name ) ) {
$dimensions[ $url ] = false;
continue;
}
// Include the image as a url to fetch.
$urls_to_fetch[ $url ] = [];
$urls_to_fetch[ $url ]['url'] = $url;
$urls_to_fetch[ $url ]['transient_name'] = $transient_name;
$urls_to_fetch[ $url ]['transient_lock_name'] = $transient_lock_name;
set_transient( $transient_lock_name, 1, MINUTE_IN_SECONDS );
}
}
/**
* Fetch dimensions of remote images
*
* @throws Exception When cURL handle cannot be added.
*
* @param array $urls_to_fetch Image src urls to fetch.
* @param array $images Array to populate with results of image/dimension inspection.
*/
private static function fetch_images( $urls_to_fetch, &$images ) {
$urls = array_keys( $urls_to_fetch );
$client = new \FasterImage\FasterImage();
/**
* Filters the user agent for obtaining the image dimensions.
*
* @param string $user_agent User agent.
*/
$client->setUserAgent( apply_filters( 'amp_extract_image_dimensions_get_user_agent', self::get_default_user_agent() ) );
$client->setBufferSize( 1024 );
$client->setSslVerifyHost( true );
$client->setSslVerifyPeer( true );
$images = $client->batch( $urls );
}
/**
* Determine success or failure of remote fetch, integrate fetched dimensions into url to dimension mapping,
* cache fetched dimensions via transient and release/delete semaphore transient
*
* @param array $urls_to_fetch List of image urls that were fetched and transient names corresponding to each (for unlocking semaphore, setting "real" transient).
* @param array $images Results of remote fetch mapping fetched image url to dimensions.
* @param array $dimensions Map of image url to dimensions to be updated with results of remote fetch.
* @param int $transient_expiration Duration image dimensions should exist in transient/cache.
*/
private static function process_fetched_images( $urls_to_fetch, $images, &$dimensions, $transient_expiration ) {
foreach ( $urls_to_fetch as $url_data ) {
$image_data = $images[ $url_data['url'] ];
if ( self::STATUS_IMAGE_EXTRACTION_FAILED === $image_data['size'] ) {
$dimensions[ $url_data['url'] ] = false;
set_transient( $url_data['transient_name'], self::STATUS_FAILED_LAST_ATTEMPT, $transient_expiration );
} else {
$dimensions[ $url_data['url'] ] = [
'width' => $image_data['size'][0],
'height' => $image_data['size'][1],
];
set_transient(
$url_data['transient_name'],
[
$image_data['size'][0],
$image_data['size'][1],
],
$transient_expiration
);
}
delete_transient( $url_data['transient_lock_name'] );
}
}
/**
* Get default user agent
*
* @return string
*/
public static function get_default_user_agent() {
return 'amp-wp, v' . AMP__VERSION . ', ' . home_url();
}
}