From 9ca7b5b76e2ebe0ba69237c043c1f1a50705e1bb Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Mon, 21 Oct 2019 08:47:37 +0100 Subject: [PATCH 01/12] Scaffold out basic new endpoint --- lib/class-wp-rest-url-details-controller.php | 139 +++++++++++++++++++ lib/load.php | 5 + lib/rest-api.php | 11 ++ 3 files changed, 155 insertions(+) create mode 100644 lib/class-wp-rest-url-details-controller.php diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php new file mode 100644 index 00000000000000..fc51d019376491 --- /dev/null +++ b/lib/class-wp-rest-url-details-controller.php @@ -0,0 +1,139 @@ +namespace = '__experimental'; + $this->rest_base = 'url-details'; + } + + /** + * Registers the necessary REST API routes. + * + * @access public + */ + public function register_routes() { + register_rest_route( + $this->namespace, + '/' . $this->rest_base, + array( + array( + 'methods' => WP_REST_Server::READABLE, + 'callback' => array( $this, 'get_remote_url' ), + // 'permission_callback' => array( $this, 'get_remote_url_permissions_check' ), + ), + // 'schema' => array( $this, 'get_public_item_schema' ), + ) + ); + + } + + /** + * Retrieves the comment's schema, conforming to JSON Schema. + * + * @since 6.1.0 + * + * @return array + */ + public function get_item_schema() { + $schema = array( + '$schema' => 'http://json-schema.org/draft-04/schema#', + 'title' => 'widget-area', + 'type' => 'object', + 'properties' => array( + 'id' => array( + 'description' => __( 'Unique identifier for the object.', 'gutenberg' ), + 'type' => 'string', + 'context' => array( 'view', 'edit', 'embed' ), + 'readonly' => true, + ), + 'content' => array( + 'description' => __( 'The content for the object.', 'gutenberg' ), + 'type' => 'object', + 'context' => array( 'view', 'edit', 'embed' ), + 'arg_options' => array( + 'sanitize_callback' => null, + 'validate_callback' => null, + ), + 'properties' => array( + 'raw' => array( + 'description' => __( 'Content for the object, as it exists in the database.', 'gutenberg' ), + 'type' => 'string', + 'context' => array( 'view', 'edit', 'embed' ), + ), + 'rendered' => array( + 'description' => __( 'HTML content for the object, transformed for display.', 'gutenberg' ), + 'type' => 'string', + 'context' => array( 'view', 'edit', 'embed' ), + 'readonly' => true, + ), + 'block_version' => array( + 'description' => __( 'Version of the content block format used by the object.', 'gutenberg' ), + 'type' => 'integer', + 'context' => array( 'view', 'edit', 'embed' ), + 'readonly' => true, + ), + ), + ), + ), + ); + + return $schema; + } + + /** + * Checks whether a given request has permission to read widget areas. + * + * @since 5.7.0 + * + * @param WP_REST_Request $request Full details about the request. + * @return WP_Error|bool True if the request has read access, WP_Error object otherwise. + * + * This function is overloading a function defined in WP_REST_Controller so it should have the same parameters. + * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + */ + public function get_items_permissions_check( $request ) { + if ( ! current_user_can( 'edit_theme_options' ) ) { + return new WP_Error( + 'rest_user_cannot_view', + __( 'Sorry, you are not allowed to read sidebars.', 'gutenberg' ) + ); + } + + return true; + } + /* phpcs:enable */ + + /** + * Retrieves all widget areas. + * + * @since 5.7.0 + * + * @param WP_REST_Request $request Full details about the request. + * @return WP_Error|WP_REST_Response Response object on success, or WP_Error object on failure. + */ + public function get_remote_url( $request ) { + + $data = [ 'hello-world' ]; + + return rest_ensure_response( $data ); + } +} diff --git a/lib/load.php b/lib/load.php index 7076be29f63127..e3ee9b3a894d69 100644 --- a/lib/load.php +++ b/lib/load.php @@ -43,6 +43,11 @@ function gutenberg_is_experiment_enabled( $name ) { * End: Include for phase 2 */ + if ( ! class_exists( 'WP_REST_URL_Details_Controller' ) ) { + require dirname( __FILE__ ) . '/class-wp-rest-url-details-controller.php'; + } + + require dirname( __FILE__ ) . '/rest-api.php'; } diff --git a/lib/rest-api.php b/lib/rest-api.php index e0b6e7be592e7b..f1b97b4bf86969 100644 --- a/lib/rest-api.php +++ b/lib/rest-api.php @@ -54,6 +54,17 @@ function gutenberg_filter_oembed_result( $response, $handler, $request ) { +/** + * Registers the REST API routes for URL Details. + * + * @since 5.0.0 + */ +function gutenberg_register_url_details_routes() { + $url_details_controller = new WP_REST_URL_Details_Controller(); + $url_details_controller->register_routes(); +} +add_action( 'rest_api_init', 'gutenberg_register_url_details_routes' ); + /** * Start: Include for phase 2 */ From 4e017452fc8afe5d45d48ad7ec77ea476a9e978b Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Mon, 21 Oct 2019 09:43:34 +0100 Subject: [PATCH 02/12] Implement basic retrival of title tag from remote url --- lib/class-wp-rest-url-details-controller.php | 120 ++++++++----------- 1 file changed, 50 insertions(+), 70 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index fc51d019376491..45e8fe7348700c 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -33,74 +33,20 @@ public function __construct() { public function register_routes() { register_rest_route( $this->namespace, - '/' . $this->rest_base, + '/' . $this->rest_base . '/title', array( array( 'methods' => WP_REST_Server::READABLE, - 'callback' => array( $this, 'get_remote_url' ), + 'callback' => array( $this, 'get_title' ), // 'permission_callback' => array( $this, 'get_remote_url_permissions_check' ), ), - // 'schema' => array( $this, 'get_public_item_schema' ), ) ); } /** - * Retrieves the comment's schema, conforming to JSON Schema. - * - * @since 6.1.0 - * - * @return array - */ - public function get_item_schema() { - $schema = array( - '$schema' => 'http://json-schema.org/draft-04/schema#', - 'title' => 'widget-area', - 'type' => 'object', - 'properties' => array( - 'id' => array( - 'description' => __( 'Unique identifier for the object.', 'gutenberg' ), - 'type' => 'string', - 'context' => array( 'view', 'edit', 'embed' ), - 'readonly' => true, - ), - 'content' => array( - 'description' => __( 'The content for the object.', 'gutenberg' ), - 'type' => 'object', - 'context' => array( 'view', 'edit', 'embed' ), - 'arg_options' => array( - 'sanitize_callback' => null, - 'validate_callback' => null, - ), - 'properties' => array( - 'raw' => array( - 'description' => __( 'Content for the object, as it exists in the database.', 'gutenberg' ), - 'type' => 'string', - 'context' => array( 'view', 'edit', 'embed' ), - ), - 'rendered' => array( - 'description' => __( 'HTML content for the object, transformed for display.', 'gutenberg' ), - 'type' => 'string', - 'context' => array( 'view', 'edit', 'embed' ), - 'readonly' => true, - ), - 'block_version' => array( - 'description' => __( 'Version of the content block format used by the object.', 'gutenberg' ), - 'type' => 'integer', - 'context' => array( 'view', 'edit', 'embed' ), - 'readonly' => true, - ), - ), - ), - ), - ); - - return $schema; - } - - /** - * Checks whether a given request has permission to read widget areas. + * Checks whether a given request has permission to read remote urls. * * @since 5.7.0 * @@ -110,11 +56,11 @@ public function get_item_schema() { * This function is overloading a function defined in WP_REST_Controller so it should have the same parameters. * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable */ - public function get_items_permissions_check( $request ) { + public function get_remote_url_permissions_check( $request ) { if ( ! current_user_can( 'edit_theme_options' ) ) { return new WP_Error( 'rest_user_cannot_view', - __( 'Sorry, you are not allowed to read sidebars.', 'gutenberg' ) + __( 'Sorry, you are not allowed to access remote urls.', 'gutenberg' ) ); } @@ -122,18 +68,52 @@ public function get_items_permissions_check( $request ) { } /* phpcs:enable */ - /** - * Retrieves all widget areas. - * - * @since 5.7.0 - * - * @param WP_REST_Request $request Full details about the request. - * @return WP_Error|WP_REST_Response Response object on success, or WP_Error object on failure. - */ - public function get_remote_url( $request ) { + public function get_title( $request ) { + + // TODO: Sanitize and validate + $url = $request->get_param( 'url' ); + + $html = $this->get_remote_url_html( $url ); + + if ( is_wp_error( $html ) ) { + return new WP_Error( 'no_title', 'Unable to retrieve title tag . ' . $html->get_error_message(), array( 'status' => 404 ) ); + } + + $title_list = $html->getElementsByTagName( 'title' ); + + $title = $title_list->item( 0 ); + + if ( empty( $title ) ) { + return new WP_Error( 'no_title', 'No title tag at remote url', array( 'status' => 404 ) ); + } + + $title_text = $title->nodeValue; + + return rest_ensure_response( $title_text ); + } + + + private function get_remote_url_html( $url ) { + + $response = wp_remote_get( $url ); + + if ( is_wp_error( $response ) || ! is_array( $response ) ) { + return new WP_Error( 'no_response', 'Unable to contact remote url . ' . $response->get_error_message(), array( 'status' => 404 ) ); + } + + $body = wp_remote_retrieve_body( $response ); + + $dom = new DOMDocument( '1.0', 'UTF - 8' ); + + // set error level + $internalErrors = libxml_use_internal_errors( true ); + + // load HTML + $dom->loadHTML( $body ); - $data = [ 'hello-world' ]; + // Restore error level + libxml_use_internal_errors( $internalErrors ); - return rest_ensure_response( $data ); + return $dom; } } From b5270cb85c3a5c770b2fa37eece3d80a52e15679 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Mon, 21 Oct 2019 10:04:31 +0100 Subject: [PATCH 03/12] Adds validation, sanitization and permissions checks. --- lib/class-wp-rest-url-details-controller.php | 78 +++++++++++++------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index 45e8fe7348700c..97a9626acfc87a 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -38,11 +38,56 @@ public function register_routes() { array( 'methods' => WP_REST_Server::READABLE, 'callback' => array( $this, 'get_title' ), - // 'permission_callback' => array( $this, 'get_remote_url_permissions_check' ), + 'args' => array( + 'url' => array( + 'validate_callback' => function( $param ) { + return $this->validate_url( $param ); + }, + 'sanitize_callback' => function( $param ) { + return $this->sanitize_url( $param ); + }, + ), + ), + 'permission_callback' => array( $this, 'get_remote_url_permissions_check' ), ), ) ); + } + + + + + + public function get_title( $request ) { + + // TODO: Sanitize and validate + $url = $request->get_param( 'url' ); + + $html_response = $this->get_remote_url_html( $url ); + + if ( is_wp_error( $html_response ) ) { + return new WP_Error( 'no_title', 'Unable to retrieve title tag. ' . $html_response->get_error_message(), array( 'status' => 404 ) ); + } + + $title_list = $html_response->getElementsByTagName( 'title' ); + + $title = $title_list->item( 0 ); + + if ( empty( $title ) ) { + return new WP_Error( 'no_title', 'No title tag at remote url.', array( 'status' => 404 ) ); + } + + $title_text = $title->nodeValue; + + return rest_ensure_response( $title_text ); + } + public function validate_url( $url ) { + return wp_http_validate_url( $url ); + } + + public function sanitize_url( $url ) { + return esc_url_raw( $url ); } /** @@ -51,13 +96,16 @@ public function register_routes() { * @since 5.7.0 * * @param WP_REST_Request $request Full details about the request. - * @return WP_Error|bool True if the request has read access, WP_Error object otherwise. + * @return WP_Error|bool True if the request has access, WP_Error object otherwise. * * This function is overloading a function defined in WP_REST_Controller so it should have the same parameters. * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable */ public function get_remote_url_permissions_check( $request ) { - if ( ! current_user_can( 'edit_theme_options' ) ) { + + $required_cap = 'edit_posts'; + + if ( ! current_user_can( $required_cap ) ) { return new WP_Error( 'rest_user_cannot_view', __( 'Sorry, you are not allowed to access remote urls.', 'gutenberg' ) @@ -68,30 +116,6 @@ public function get_remote_url_permissions_check( $request ) { } /* phpcs:enable */ - public function get_title( $request ) { - - // TODO: Sanitize and validate - $url = $request->get_param( 'url' ); - - $html = $this->get_remote_url_html( $url ); - - if ( is_wp_error( $html ) ) { - return new WP_Error( 'no_title', 'Unable to retrieve title tag . ' . $html->get_error_message(), array( 'status' => 404 ) ); - } - - $title_list = $html->getElementsByTagName( 'title' ); - - $title = $title_list->item( 0 ); - - if ( empty( $title ) ) { - return new WP_Error( 'no_title', 'No title tag at remote url', array( 'status' => 404 ) ); - } - - $title_text = $title->nodeValue; - - return rest_ensure_response( $title_text ); - } - private function get_remote_url_html( $url ) { From 0849a336b6923e7670e963ff09536b865af64538 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Mon, 21 Oct 2019 10:11:46 +0100 Subject: [PATCH 04/12] i18n fixes and docblocks --- lib/class-wp-rest-url-details-controller.php | 42 +++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index 97a9626acfc87a..a01306da231c98 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -7,7 +7,8 @@ */ /** - * Controller which provides REST endpoint for the widget areas. + * Controller which provides REST endpoint for retrieving information + * from a remote site's HTML response. * * @since 5.?.0 * @@ -57,16 +58,22 @@ public function register_routes() { - + /** + * Retrieves the contents of the tag from the HTML + * response. + * + * @access public + * @param WP_REST_REQUEST $request Full details about the request. + * @return String|WP_Error the title text or an error. + */ public function get_title( $request ) { - // TODO: Sanitize and validate $url = $request->get_param( 'url' ); $html_response = $this->get_remote_url_html( $url ); if ( is_wp_error( $html_response ) ) { - return new WP_Error( 'no_title', 'Unable to retrieve title tag. ' . $html_response->get_error_message(), array( 'status' => 404 ) ); + return new WP_Error( 'no_title', __( 'Unable to retrieve title tag.', 'gutenberg' ) . $html_response->get_error_message(), array( 'status' => 404 ) ); } $title_list = $html_response->getElementsByTagName( 'title' ); @@ -74,7 +81,7 @@ public function get_title( $request ) { $title = $title_list->item( 0 ); if ( empty( $title ) ) { - return new WP_Error( 'no_title', 'No title tag at remote url.', array( 'status' => 404 ) ); + return new WP_Error( 'no_title', __( 'No title tag at remote url.', 'gutenberg' ), array( 'status' => 404 ) ); } $title_text = $title->nodeValue; @@ -82,10 +89,22 @@ public function get_title( $request ) { return rest_ensure_response( $title_text ); } + /** + * Validates a given URL + * + * @param String $url the url to validate + * @return Boolean whether or not the URL is considered valid. + */ public function validate_url( $url ) { return wp_http_validate_url( $url ); } + /** + * Sanitizes a given URL. + * + * @param String $url the URL to sanitize. + * @return String the sanitized version of the URL. + */ public function sanitize_url( $url ) { return esc_url_raw( $url ); } @@ -93,8 +112,6 @@ public function sanitize_url( $url ) { /** * Checks whether a given request has permission to read remote urls. * - * @since 5.7.0 - * * @param WP_REST_Request $request Full details about the request. * @return WP_Error|bool True if the request has access, WP_Error object otherwise. * @@ -117,17 +134,24 @@ public function get_remote_url_permissions_check( $request ) { /* phpcs:enable */ + /** + * Retrives a DOMDocument representation of the + * HTML from a remote URL + * + * @param String $url the website url whose HTML we want to access. + * @return DOMDocument the loaded HTML response. + */ private function get_remote_url_html( $url ) { $response = wp_remote_get( $url ); if ( is_wp_error( $response ) || ! is_array( $response ) ) { - return new WP_Error( 'no_response', 'Unable to contact remote url . ' . $response->get_error_message(), array( 'status' => 404 ) ); + return new WP_Error( 'no_response', __( 'Unable to contact remote url.', 'gutenberg' ) . $response->get_error_message(), array( 'status' => 404 ) ); } $body = wp_remote_retrieve_body( $response ); - $dom = new DOMDocument( '1.0', 'UTF - 8' ); + $dom = new DOMDocument( '1.0', 'UTF-8' ); // set error level $internalErrors = libxml_use_internal_errors( true ); From 9a027d6c8b8f98362b149c581b2556f659b015a0 Mon Sep 17 00:00:00 2001 From: Dave Smith <getdavemail@gmail.com> Date: Mon, 21 Oct 2019 10:57:33 +0100 Subject: [PATCH 05/12] Adds caching of remote request --- lib/class-wp-rest-url-details-controller.php | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index a01306da231c98..e33665073d7152 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -143,10 +143,25 @@ public function get_remote_url_permissions_check( $request ) { */ private function get_remote_url_html( $url ) { - $response = wp_remote_get( $url ); + $response = null; - if ( is_wp_error( $response ) || ! is_array( $response ) ) { - return new WP_Error( 'no_response', __( 'Unable to contact remote url.', 'gutenberg' ) . $response->get_error_message(), array( 'status' => 404 ) ); + // Transient per URL + $cache_key = 'g_url_details_response_' . md5( $url ); + + // Attempt to retrieve cached response + $cached_response = get_transient( $cache_key ); + + if ( ! empty( $cached_response ) ) { + $response = $cached_response; + } else { + $response = wp_remote_get( $url ); + + if ( is_wp_error( $response ) || ! is_array( $response ) ) { + return new WP_Error( 'no_response', __( 'Unable to contact remote url.', 'gutenberg' ) . $response->get_error_message(), array( 'status' => 404 ) ); + } + + // Only cache valid responses. + set_transient( $cache_key, $response, HOUR_IN_SECONDS ); } $body = wp_remote_retrieve_body( $response ); From dddcd7fc4b7ce211832a9050878651d98c35905b Mon Sep 17 00:00:00 2001 From: Konstantin Obenland <obenland@gmx.de> Date: Thu, 31 Oct 2019 15:07:40 -0700 Subject: [PATCH 06/12] Update with feedback --- lib/class-wp-rest-url-details-controller.php | 125 ++++++------------- 1 file changed, 37 insertions(+), 88 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index e33665073d7152..e2ff62b635b8c6 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -41,12 +41,8 @@ public function register_routes() { 'callback' => array( $this, 'get_title' ), 'args' => array( 'url' => array( - 'validate_callback' => function( $param ) { - return $this->validate_url( $param ); - }, - 'sanitize_callback' => function( $param ) { - return $this->sanitize_url( $param ); - }, + 'validate_callback' => 'wp_http_validate_url', + 'sanitize_callback' => 'esc_url_raw', ), ), 'permission_callback' => array( $this, 'get_remote_url_permissions_check' ), @@ -55,9 +51,6 @@ public function register_routes() { ); } - - - /** * Retrieves the contents of the <title> tag from the HTML * response. @@ -67,62 +60,23 @@ public function register_routes() { * @return String|WP_Error the title text or an error. */ public function get_title( $request ) { + $url = $request->get_param( 'url' ); + $title = $this->get_remote_url_title( $url ); - $url = $request->get_param( 'url' ); - - $html_response = $this->get_remote_url_html( $url ); - - if ( is_wp_error( $html_response ) ) { - return new WP_Error( 'no_title', __( 'Unable to retrieve title tag.', 'gutenberg' ) . $html_response->get_error_message(), array( 'status' => 404 ) ); - } - - $title_list = $html_response->getElementsByTagName( 'title' ); - - $title = $title_list->item( 0 ); - - if ( empty( $title ) ) { - return new WP_Error( 'no_title', __( 'No title tag at remote url.', 'gutenberg' ), array( 'status' => 404 ) ); - } - - $title_text = $title->nodeValue; - - return rest_ensure_response( $title_text ); - } - - /** - * Validates a given URL - * - * @param String $url the url to validate - * @return Boolean whether or not the URL is considered valid. - */ - public function validate_url( $url ) { - return wp_http_validate_url( $url ); - } - - /** - * Sanitizes a given URL. - * - * @param String $url the URL to sanitize. - * @return String the sanitized version of the URL. - */ - public function sanitize_url( $url ) { - return esc_url_raw( $url ); + return rest_ensure_response( $title ); } /** * Checks whether a given request has permission to read remote urls. * + * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + * * @param WP_REST_Request $request Full details about the request. * @return WP_Error|bool True if the request has access, WP_Error object otherwise. - * - * This function is overloading a function defined in WP_REST_Controller so it should have the same parameters. - * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable */ public function get_remote_url_permissions_check( $request ) { - - $required_cap = 'edit_posts'; - - if ( ! current_user_can( $required_cap ) ) { + /* phpcs:enable */ + if ( ! current_user_can( 'edit_posts' ) ) { return new WP_Error( 'rest_user_cannot_view', __( 'Sorry, you are not allowed to access remote urls.', 'gutenberg' ) @@ -131,52 +85,47 @@ public function get_remote_url_permissions_check( $request ) { return true; } - /* phpcs:enable */ - /** - * Retrives a DOMDocument representation of the - * HTML from a remote URL + * Retrieves the document title from a remote URL * * @param String $url the website url whose HTML we want to access. - * @return DOMDocument the loaded HTML response. + * @return string|WP_Error The URL's document title on success, WP_Error on failure. */ - private function get_remote_url_html( $url ) { - - $response = null; + private function get_remote_url_title( $url ) { + // Transient per URL. + $cache_key = 'g_url_details_response_' . hash( 'crc32b', $url ); - // Transient per URL - $cache_key = 'g_url_details_response_' . md5( $url ); + // Attempt to retrieve cached response. + $title = null;//get_transient( $cache_key ); - // Attempt to retrieve cached response - $cached_response = get_transient( $cache_key ); + if ( empty( $title ) ) { + $request = wp_safe_remote_get( $url, array( + 'timeout' => 10, + // 'redirection' => 0, + 'limit_response_size' => 153600, // 150 KB + ) ); + $remote_source = wp_remote_retrieve_body( $request ); + + if ( ! $remote_source ) { + return new WP_Error( 'no_response', __( 'The source URL does not exist.', 'gutenberg' ), array( 'status' => 404 ) ); + } - if ( ! empty( $cached_response ) ) { - $response = $cached_response; - } else { - $response = wp_remote_get( $url ); + // Work around bug in strip_tags(): + $remote_source = str_replace( '<!DOC', '<DOC', $remote_source ); + $remote_source = preg_replace( '/[\r\n\t ]+/', ' ', $remote_source ); // Normalize spaces. + $remote_source = preg_replace( '/<\/*(h1|h2|h3|h4|h5|h6|p|th|td|li|dt|dd|pre|caption|input|textarea|button|body)[^>]*>/', "\n\n", $remote_source ); - if ( is_wp_error( $response ) || ! is_array( $response ) ) { - return new WP_Error( 'no_response', __( 'Unable to contact remote url.', 'gutenberg' ) . $response->get_error_message(), array( 'status' => 404 ) ); + preg_match( '|<title>([^<]*?)|is', $remote_source, $match_title ); + $title = isset( $match_title[1] ) ? $match_title[1] : ''; + if ( empty( $title ) ) { + return new WP_Error( 'no_title', __( 'No document title at remote url.', 'gutenberg' ), array( 'status' => 404 ) ); } // Only cache valid responses. - set_transient( $cache_key, $response, HOUR_IN_SECONDS ); + set_transient( $cache_key, $title, HOUR_IN_SECONDS ); } - $body = wp_remote_retrieve_body( $response ); - - $dom = new DOMDocument( '1.0', 'UTF-8' ); - - // set error level - $internalErrors = libxml_use_internal_errors( true ); - - // load HTML - $dom->loadHTML( $body ); - - // Restore error level - libxml_use_internal_errors( $internalErrors ); - - return $dom; + return $title; } } From a2b901a9b743a5ff88ddaf70cc36a0496edb21e6 Mon Sep 17 00:00:00 2001 From: Konstantin Obenland Date: Fri, 1 Nov 2019 08:16:15 -0700 Subject: [PATCH 07/12] Tie up loose ends --- lib/class-wp-rest-url-details-controller.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index e2ff62b635b8c6..70202c5e7ec34d 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -56,8 +56,8 @@ public function register_routes() { * response. * * @access public - * @param WP_REST_REQUEST $request Full details about the request. - * @return String|WP_Error the title text or an error. + * @param WP_REST_REQUEST $request Full details about the request. + * @return String|WP_Error The title text or an error. */ public function get_title( $request ) { $url = $request->get_param( 'url' ); @@ -87,9 +87,9 @@ public function get_remote_url_permissions_check( $request ) { } /** - * Retrieves the document title from a remote URL + * Retrieves the document title from a remote URL. * - * @param String $url the website url whose HTML we want to access. + * @param string $url The website url whose HTML we want to access. * @return string|WP_Error The URL's document title on success, WP_Error on failure. */ private function get_remote_url_title( $url ) { @@ -97,12 +97,11 @@ private function get_remote_url_title( $url ) { $cache_key = 'g_url_details_response_' . hash( 'crc32b', $url ); // Attempt to retrieve cached response. - $title = null;//get_transient( $cache_key ); + $title = get_transient( $cache_key ); if ( empty( $title ) ) { $request = wp_safe_remote_get( $url, array( 'timeout' => 10, - // 'redirection' => 0, 'limit_response_size' => 153600, // 150 KB ) ); $remote_source = wp_remote_retrieve_body( $request ); From a5d9c256b23b733d5208e5e617d4c8b06dcce2df Mon Sep 17 00:00:00 2001 From: Konstantin Obenland Date: Fri, 1 Nov 2019 15:09:05 -0700 Subject: [PATCH 08/12] Remove unneed replacements --- lib/class-wp-rest-url-details-controller.php | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/lib/class-wp-rest-url-details-controller.php b/lib/class-wp-rest-url-details-controller.php index 70202c5e7ec34d..158e9e6c5adf22 100644 --- a/lib/class-wp-rest-url-details-controller.php +++ b/lib/class-wp-rest-url-details-controller.php @@ -69,13 +69,9 @@ public function get_title( $request ) { /** * Checks whether a given request has permission to read remote urls. * - * phpcs:disable VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable - * - * @param WP_REST_Request $request Full details about the request. * @return WP_Error|bool True if the request has access, WP_Error object otherwise. */ - public function get_remote_url_permissions_check( $request ) { - /* phpcs:enable */ + public function get_remote_url_permissions_check() { if ( ! current_user_can( 'edit_posts' ) ) { return new WP_Error( 'rest_user_cannot_view', @@ -102,7 +98,7 @@ private function get_remote_url_title( $url ) { if ( empty( $title ) ) { $request = wp_safe_remote_get( $url, array( 'timeout' => 10, - 'limit_response_size' => 153600, // 150 KB + 'limit_response_size' => 153600, // 150 KB. ) ); $remote_source = wp_remote_retrieve_body( $request ); @@ -110,13 +106,9 @@ private function get_remote_url_title( $url ) { return new WP_Error( 'no_response', __( 'The source URL does not exist.', 'gutenberg' ), array( 'status' => 404 ) ); } - // Work around bug in strip_tags(): - $remote_source = str_replace( ']*>/', "\n\n", $remote_source ); - preg_match( '|([^<]*?)|is', $remote_source, $match_title ); - $title = isset( $match_title[1] ) ? $match_title[1] : ''; + $title = isset( $match_title[1] ) ? trim( $match_title[1] ) : ''; + if ( empty( $title ) ) { return new WP_Error( 'no_title', __( 'No document title at remote url.', 'gutenberg' ), array( 'status' => 404 ) ); } From 1899841cedddf7e295e7f010ef2767af7f3aec5f Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Thu, 2 Jan 2020 15:40:55 +0000 Subject: [PATCH 09/12] Adds title tag info to URL search results POC to show that the REST API endpoint will return and display title tag info from the remote site. --- .../src/components/link-control/index.js | 47 ++++++++++++++++--- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/packages/block-editor/src/components/link-control/index.js b/packages/block-editor/src/components/link-control/index.js index 18ae9da7c5401b..0d4356761e895a 100644 --- a/packages/block-editor/src/components/link-control/index.js +++ b/packages/block-editor/src/components/link-control/index.js @@ -7,6 +7,10 @@ import { isFunction, noop, startsWith } from 'lodash'; /** * WordPress dependencies */ +/* eslint-disable import/no-extraneous-dependencies */ +import apiFetch from '@wordpress/api-fetch'; +/* eslint-enable import/no-extraneous-dependencies */ + import { Button, ExternalLink, @@ -27,6 +31,7 @@ import { isURL, prependHTTP, getProtocol, + addQueryArgs, } from '@wordpress/url'; import { withInstanceId, compose } from '@wordpress/compose'; @@ -40,7 +45,6 @@ import LinkControlSearchItem from './search-item'; import LinkControlSearchInput from './search-input'; const MODE_EDIT = 'edit'; -// const MODE_SHOW = 'show'; function LinkControl( { className, @@ -112,6 +116,20 @@ function LinkControl( { setInputValue( '' ); }; + const fetchRemoteURLTitle = ( value ) => { + const endpoint = '/__experimental/url-details/title'; + + const args = { + url: prependHTTP( value ), + }; + + const queryArgs = addQueryArgs( endpoint, args ); + + return apiFetch( { + path: queryArgs, + } ); + }; + const handleDirectEntry = ( value ) => { let type = 'URL'; @@ -129,13 +147,28 @@ function LinkControl( { type = 'internal'; } + const defaultResponse = { + id: '-1', + title: value, + url: type === 'URL' ? prependHTTP( value ) : value, + type, + }; + + // If it's a URL then request the `` tag + // Todo: + // * avoid invalid requests for incomplete URLS + // * avoid concurrent requests - cancel existing AJAX requests if already pending + if ( type === 'URL' && isURL( prependHTTP( value ) ) ) { + return fetchRemoteURLTitle( value ).then( ( title ) => { + return [ { + ...defaultResponse, + title: title || value, + } ]; + } ).catch( () => [ defaultResponse ] ); + } + return Promise.resolve( - [ { - id: '-1', - title: value, - url: type === 'URL' ? prependHTTP( value ) : value, - type, - } ] + [ defaultResponse ] ); }; From 9b49fa371991135787c9db9dc3634a17ef17d3de Mon Sep 17 00:00:00 2001 From: Dave Smith <getdavemail@gmail.com> Date: Fri, 3 Jan 2020 09:42:32 +0000 Subject: [PATCH 10/12] Updates to move URL fetch API calls into Block Editor Provider settings See https://github.com/WordPress/gutenberg/pull/19387#issuecomment-570517847 --- .../src/components/link-control/index.js | 36 ++++++------------- .../editor/src/components/provider/index.js | 15 +++++++- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/packages/block-editor/src/components/link-control/index.js b/packages/block-editor/src/components/link-control/index.js index 0d4356761e895a..570c9bf65ba209 100644 --- a/packages/block-editor/src/components/link-control/index.js +++ b/packages/block-editor/src/components/link-control/index.js @@ -7,10 +7,6 @@ import { isFunction, noop, startsWith } from 'lodash'; /** * WordPress dependencies */ -/* eslint-disable import/no-extraneous-dependencies */ -import apiFetch from '@wordpress/api-fetch'; -/* eslint-enable import/no-extraneous-dependencies */ - import { Button, ExternalLink, @@ -31,7 +27,6 @@ import { isURL, prependHTTP, getProtocol, - addQueryArgs, } from '@wordpress/url'; import { withInstanceId, compose } from '@wordpress/compose'; @@ -51,6 +46,7 @@ function LinkControl( { currentLink, currentSettings, fetchSearchSuggestions, + fetchRemoteURLTitle, instanceId, onClose = noop, onChangeMode = noop, @@ -116,21 +112,7 @@ function LinkControl( { setInputValue( '' ); }; - const fetchRemoteURLTitle = ( value ) => { - const endpoint = '/__experimental/url-details/title'; - - const args = { - url: prependHTTP( value ), - }; - - const queryArgs = addQueryArgs( endpoint, args ); - - return apiFetch( { - path: queryArgs, - } ); - }; - - const handleDirectEntry = ( value ) => { + const handleDirectEntry = async ( value ) => { let type = 'URL'; const protocol = getProtocol( value ) || ''; @@ -159,17 +141,18 @@ function LinkControl( { // * avoid invalid requests for incomplete URLS // * avoid concurrent requests - cancel existing AJAX requests if already pending if ( type === 'URL' && isURL( prependHTTP( value ) ) ) { - return fetchRemoteURLTitle( value ).then( ( title ) => { + try { + const urlTitle = await fetchRemoteURLTitle( value ); return [ { ...defaultResponse, - title: title || value, + title: urlTitle || value, } ]; - } ).catch( () => [ defaultResponse ] ); + } catch ( error ) { + return [ defaultResponse ]; + } } - return Promise.resolve( - [ defaultResponse ] - ); + return [ defaultResponse ]; }; const handleEntitySearch = async ( value ) => { @@ -297,6 +280,7 @@ export default compose( const { getSettings } = select( 'core/block-editor' ); return { fetchSearchSuggestions: getSettings().__experimentalFetchLinkSuggestions, + fetchRemoteURLTitle: getSettings().__experimentalFetchRemoteURLTitle, }; } ) )( LinkControl ); diff --git a/packages/editor/src/components/provider/index.js b/packages/editor/src/components/provider/index.js index 1d3714e5c09885..df4e395b3196ac 100644 --- a/packages/editor/src/components/provider/index.js +++ b/packages/editor/src/components/provider/index.js @@ -14,7 +14,7 @@ import { __ } from '@wordpress/i18n'; import { EntityProvider } from '@wordpress/core-data'; import { BlockEditorProvider, transformStyles } from '@wordpress/block-editor'; import apiFetch from '@wordpress/api-fetch'; -import { addQueryArgs } from '@wordpress/url'; +import { addQueryArgs, prependHTTP } from '@wordpress/url'; import { decodeEntities } from '@wordpress/html-entities'; /** @@ -42,6 +42,18 @@ const fetchLinkSuggestions = async ( search ) => { } ) ); }; +const fetchRemoteURLTitle = async ( url ) => { + const endpoint = '/__experimental/url-details/title'; + + const args = { + url: prependHTTP( url ), + }; + + return apiFetch( { + path: addQueryArgs( endpoint, args ), + } ); +}; + class EditorProvider extends Component { constructor( props ) { super( ...arguments ); @@ -118,6 +130,7 @@ class EditorProvider extends Component { __experimentalFetchReusableBlocks, __experimentalFetchLinkSuggestions: fetchLinkSuggestions, __experimentalCanUserUseUnfilteredHTML: canUserUseUnfilteredHTML, + __experimentalFetchRemoteURLTitle: fetchRemoteURLTitle, }; } From 7399becbd18a1abb56171a2c64d74784b6a0659d Mon Sep 17 00:00:00 2001 From: Dave Smith <getdavemail@gmail.com> Date: Fri, 3 Jan 2020 10:04:24 +0000 Subject: [PATCH 11/12] Fix to only fetch URL details for values that could actually be URLs --- .../src/components/link-control/index.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/block-editor/src/components/link-control/index.js b/packages/block-editor/src/components/link-control/index.js index 570c9bf65ba209..fcabde566ccc41 100644 --- a/packages/block-editor/src/components/link-control/index.js +++ b/packages/block-editor/src/components/link-control/index.js @@ -112,7 +112,7 @@ function LinkControl( { setInputValue( '' ); }; - const handleDirectEntry = async ( value ) => { + const handleDirectEntry = async ( value, { fetchUrlInfo = true } = {} ) => { let type = 'URL'; const protocol = getProtocol( value ) || ''; @@ -140,7 +140,7 @@ function LinkControl( { // Todo: // * avoid invalid requests for incomplete URLS // * avoid concurrent requests - cancel existing AJAX requests if already pending - if ( type === 'URL' && isURL( prependHTTP( value ) ) ) { + if ( fetchUrlInfo && type === 'URL' && isURL( prependHTTP( value ) ) && value.length > 3 ) { try { const urlTitle = await fetchRemoteURLTitle( value ); return [ { @@ -156,13 +156,15 @@ function LinkControl( { }; const handleEntitySearch = async ( value ) => { + const couldBeURL = ! value.includes( ' ' ); + const results = await Promise.all( [ fetchSearchSuggestions( value ), - handleDirectEntry( value ), + handleDirectEntry( value, { + fetchUrlInfo: couldBeURL, + } ), ] ); - const couldBeURL = ! value.includes( ' ' ); - // If it's potentially a URL search then concat on a URL search suggestion // just for good measure. That way once the actual results run out we always // have a URL option to fallback on. From ecf7720ab2de1854436e51bf216cc4fbbe72df2b Mon Sep 17 00:00:00 2001 From: Dave Smith <getdavemail@gmail.com> Date: Fri, 3 Jan 2020 11:41:23 +0000 Subject: [PATCH 12/12] Include remote site url in search results display --- .../block-editor/src/components/link-control/search-item.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/block-editor/src/components/link-control/search-item.js b/packages/block-editor/src/components/link-control/search-item.js index 7b086860d30560..d28992fd1a645e 100644 --- a/packages/block-editor/src/components/link-control/search-item.js +++ b/packages/block-editor/src/components/link-control/search-item.js @@ -36,7 +36,7 @@ export const LinkControlSearchItem = ( { itemProps, suggestion, isSelected = fal <span aria-hidden={ ! isURL } className="block-editor-link-control__search-item-info"> { ! isURL && ( safeDecodeURI( suggestion.url ) || '' ) } { isURL && ( - __( 'Press ENTER to add this link' ) + `${ safeDecodeURI( suggestion.url ) } - ${ __( 'press ENTER to add this link' ) }` ) } </span> </span>