Skip to content

Commit

Permalink
Validate and normalize museum URLs when submitting artwork
Browse files Browse the repository at this point in the history
  • Loading branch information
acabal committed Jan 12, 2024
1 parent 9c27d80 commit 16df5b2
Show file tree
Hide file tree
Showing 9 changed files with 554 additions and 41 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ Before submitting design contributions, please discuss them with the Standard Eb
- Allow submitter or admins to edit unapproved artwork submissions. Approved/in use submissions should not be editable by anyone.
- Write responsive CSS to make artwork list at `/artworks` mobile-friendly.
- Include in-use ebook slug as a search parameter when searching for artwork by keyword.
## PHP code style
Expand Down
53 changes: 33 additions & 20 deletions lib/Artwork.php
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,11 @@ protected function Validate(array &$uploadedFile = []): void{
$error->Add(new Exceptions\StringTooLongException('Link to an approved museum page'));
}

if($this->MuseumUrl == '' || filter_var($this->MuseumUrl, FILTER_VALIDATE_URL) === false){
$error->Add(new Exceptions\InvalidMuseumUrlException());
}

// Don't allow unapproved museums
try{
Museum::GetByUrl($this->MuseumUrl);
$this->Museum = Museum::GetByUrl($this->MuseumUrl);
$this->MuseumUrl = Museum::NormalizeUrl($this->MuseumUrl);
}
catch(Exceptions\MuseumNotFoundException $ex){
catch(Exceptions\MuseumNotFoundException | Exceptions\InvalidUrlException $ex){
$error->Add($ex);
}
}
Expand Down Expand Up @@ -466,53 +462,68 @@ private function NormalizePageScanUrl(string $url): string{
}

if(stripos($parsedUrl['host'], 'hathitrust.org') !== false){
// https://babel.hathitrust.org/cgi/pt?id=hvd.32044034383265&seq=13
$exampleUrl = 'https://babel.hathitrust.org/cgi/pt?id=hvd.32044034383265&seq=13';

if($parsedUrl['host'] != 'babel.hathitrust.org'){
throw new Exceptions\InvalidHathiTrustUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

if($parsedUrl['path'] != '/cgi/pt'){
throw new Exceptions\InvalidHathiTrustUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

parse_str($parsedUrl['query'] ?? '', $vars);

if(!isset($vars['id']) || !isset($vars['seq']) || is_array($vars['id']) || is_array($vars['seq'])){
throw new Exceptions\InvalidHathiTrustUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

$outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path'] . '?id=' . $vars['id'] . '&seq=' . $vars['seq'];

return $outputUrl;
}

if(stripos($parsedUrl['host'], 'archive.org') !== false){
// https://archive.org/details/royalacademypict1902roya/page/n9/mode/1up
$exampleUrl = 'https://archive.org/details/royalacademypict1902roya/page/n9/mode/1up';

if($parsedUrl['host'] != 'archive.org'){
throw new Exceptions\InvalidInternetArchiveUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

if(!preg_match('|^/details/[^/]+?/page/[^/]+/mode/1up$|ius', $parsedUrl['path'])){
throw new Exceptions\InvalidInternetArchiveUrlException();
// If we're missing the view mode, append it
if(preg_match('|^/details/[^/]+?/page/[^/]+$|ius', $parsedUrl['path'])){
$parsedUrl['path'] = $parsedUrl['path'] . '/mode/1up';
}

// archive.org URLs may have both a book ID and collection ID, like
// https://archive.org/details/TheStrandMagazineAnIllustratedMonthly/TheStrandMagazine1914bVol.XlviiiJul-dec/page/n254/mode/1up
// The `/page/<number>` portion of the URL may also be missing if we're on page 1 (like the cover)
if(!preg_match('|^/details/[^/]+?(/[^/]+?)?(/page/[^/]+)?/mode/1up$|ius', $parsedUrl['path'])){
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

$outputUrl = 'https://' . $parsedUrl['host'] . $parsedUrl['path'];

return $outputUrl;
}

if(stripos($parsedUrl['host'], 'google.com') !== false){
// Old style: https://books.google.com/books?id=mZpAAAAAYAAJ&pg=PA70-IA2
// New style: https://www.google.com/books/edition/_/mZpAAAAAYAAJ?gbpv=1&pg=PA70-IA2

$exampleUrl = 'https://www.google.com/books/edition/_/mZpAAAAAYAAJ?gbpv=1&pg=PA70-IA2';

if($parsedUrl['host'] == 'books.google.com'){
// Old style, convert to new style

if($parsedUrl['path'] != '/books'){
throw new Exceptions\InvalidGoogleBooksUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

parse_str($parsedUrl['query'] ?? '', $vars);

if(!isset($vars['id']) || !isset($vars['pg']) || is_array($vars['id']) || is_array($vars['pg'])){
throw new Exceptions\InvalidGoogleBooksUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

$outputUrl = 'https://www.google.com/books/edition/_/' . $vars['id'] . '?gbpv=1&pg=' . $vars['pg'];
Expand All @@ -521,7 +532,7 @@ private function NormalizePageScanUrl(string $url): string{
// New style

if(!preg_match('|^/books/edition/[^/]+/[^/]+$|ius', $parsedUrl['path'])){
throw new Exceptions\InvalidGoogleBooksUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

preg_match('|^/books/edition/[^/]+/([^/]+)$|ius', $parsedUrl['path'], $matches);
Expand All @@ -530,14 +541,16 @@ private function NormalizePageScanUrl(string $url): string{
parse_str($parsedUrl['query'] ?? '', $vars);

if(!isset($vars['gbpv']) || $vars['gbpv'] !== '1' || !isset($vars['pg']) || is_array($vars['pg'])){
throw new Exceptions\InvalidGoogleBooksUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

$outputUrl = 'https://' . $parsedUrl['host'] . '/books/edition/_/' . $id . '?gbpv=' . $vars['gbpv'] . '&pg=' . $vars['pg'];
}
else{
throw new Exceptions\InvalidGoogleBooksUrlException();
throw new Exceptions\InvalidPageScanUrlException($url, $exampleUrl);
}

return $outputUrl;
}

return $outputUrl;
Expand Down
6 changes: 0 additions & 6 deletions lib/Exceptions/InvalidGoogleBooksUrlException.php

This file was deleted.

6 changes: 0 additions & 6 deletions lib/Exceptions/InvalidHathiTrustUrlException.php

This file was deleted.

6 changes: 0 additions & 6 deletions lib/Exceptions/InvalidInternetArchiveUrlException.php

This file was deleted.

4 changes: 3 additions & 1 deletion lib/Exceptions/InvalidMuseumUrlException.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
namespace Exceptions;

class InvalidMuseumUrlException extends InvalidUrlException{
protected $message = 'Invalid link to an approved museum page.';
public function __construct(string $url, string $exampleUrl){
$this->message = 'Invalid museum URL: <' . $url . '>. Expected a URL like: <'. $exampleUrl . '>.';
}
}
8 changes: 8 additions & 0 deletions lib/Exceptions/InvalidPageScanUrlException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?
namespace Exceptions;

class InvalidPageScanUrlException extends InvalidUrlException{
public function __construct(string $url, string $exampleUrl){
$this->message = 'Invalid page scan URL: <' . $url . '>. Expected a URL like: <'. $exampleUrl . '>.';
}
}
2 changes: 1 addition & 1 deletion lib/Exceptions/InvalidUrlException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class InvalidUrlException extends AppException{

public function __construct(?string $url = null){
if($url !== null){
parent::__construct('Invalid URL: ' . $url . '.');
parent::__construct('Invalid URL: <' . $url . '>.');
}
}
}
Loading

0 comments on commit 16df5b2

Please sign in to comment.