diff --git a/src/Imdb/Person.php b/src/Imdb/Person.php index cae42016..53383812 100644 --- a/src/Imdb/Person.php +++ b/src/Imdb/Person.php @@ -598,74 +598,143 @@ public function height() public function spouse() { if (empty($this->spouses)) { - $this->getPage("Bio"); - $doc = new \DOMDocument(); - @$doc->loadHTML($this->page["Bio"]); - $xp = new \DOMXPath($doc); - $posters = array(); - $found = false; - if ($tab = $doc->getElementById('tableSpouses')) { - foreach ($tab->getElementsByTagName('tr') as $sp) { - $first = $sp->getElementsByTagName('td')->item(0); // name and IMDBID - $nam = trim($first->nodeValue); - if ($href = $first->getElementsByTagName('a')->item(0)) { - $mid = preg_replace('!.*/name/nm(\d+).*!', '$1', $href->getAttribute('href')); - } else { - $mid = ''; - } - if (!empty($nam)) { - $found = true; - } - $first = $sp->getElementsByTagName('td')->item(1); // additional details - $html = $first->ownerDocument->saveXML($first); - preg_match_all('!(\(.+?\))!ms', $html, $matches); - $comment = ''; - $children = ''; - for ($i = 0; $i < count($matches[0]); ++$i) { - if ($i == 0) { // usually the "lifespan" of the relation - if (preg_match('!(\(query("//table[contains(@id, 'tableFamily')]/tr[1]/td[1]"); + if ($spouse->count()) { + if (trim($spouse->item(0)->nodeValue) == "Spouse") { + if ($tab = $xp->query("//table[contains(@id, 'tableFamily')]/tr[1]/td[2]")) { + $html = $tab->item(0)->ownerDocument->saveXML($tab->item(0)); + $htmlParts = explode("
", $html); + foreach ($htmlParts as $parts) { + // imdbid + $mid = ''; + if (preg_match('/
/', $parts, $url)) { + $mid = $url[1]; + } + // spouse name + $name = ''; + if (preg_match('![^(]*\([^(\d]*!', $parts, $nameRaw)) { + $nameClean = preg_replace('/[^A-Za-z0-9().\-\"\"\W ]/', '', strip_tags($nameRaw[0])); + if (strpos($nameClean, ')') && !strpos($nameClean, '?')) { + $name = trim($nameClean); + echo 'name'; + } else { + $nameClean = explode('(', $nameClean); + if (!strpos($nameClean[0], '?')) { + $name = trim($nameClean[0]); + } + } + } + //Dates, comment and children + preg_match_all('!(\(.+?\))!ms', strip_tags($parts), $matches); + // remove leftover spouse name parts (imdbid 0001228 extra name between brackets) + if (!preg_match('~[0-9]+~', $matches[0][0]) && !strpos($matches[0][0], '?')) { + unset($matches[0][0]); + sort($matches[0]); + } + $datesRaw = preg_replace('/[^A-Za-z0-9-]/', ' ', $matches[0][0]); + //from date + $fromDay = ''; + $fromMonth = ''; + $fromYear = ''; + $fromDateRaw = explode('-', $datesRaw); + if (array_key_exists(0, $fromDateRaw) && preg_match('~[0-9]+~', $fromDateRaw[0])) { + $fromDate = array_values(array_filter(explode(' ', trim($fromDateRaw[0])))); + $count = count($fromDate); + if ($count == 1) { + if (preg_match('~[0-9]+~', $fromDate[0])) { + $fromYear = $fromDate[0]; + } + } elseif ($count == 2) { + $fromMonth = trim($fromDate[0]); + if (preg_match('~[0-9]+~', $fromDate[1])) { + $fromYear = $fromDate[1]; + } + } elseif ($count == 3) { + if (preg_match('~[0-9]+~', $fromDate[0])) { + $fromDay = $fromDate[0]; + } + $fromMonth = trim($fromDate[1]); + if (preg_match('~[0-9]+~', $fromDate[2])) { + $fromYear = $fromDate[2]; + } + } $from = array( - "day" => $match['day'], - "month" => $match['month'], - "mon" => $match['monthname'], - "year" => $match['year'] - ); + "day" => $fromDay, + "month" => $fromMonth, + "mon" => $this->monthNo($fromMonth), + "year" => $fromYear + ); } else { - $from = array("day" => '', "month" => '', "mon" => '', "year" => ''); + $from = array("day" => '', "month" => '', "mon" => '', "year" => ''); } - if (preg_match('!(.+?)\s+-\s+( $match['day'], - "month" => $match['month'], - "mon" => $match['monthname'], - "year" => $match['year'] - ); + "day" => $toDay, + "month" => $toMonth, + "mon" => $this->monthNo($toMonth), + "year" => $toYear + ); } else { - $to = array("day" => '', "month" => '', "mon" => '', "year" => ''); + $to = array("day" => '', "month" => '', "mon" => '', "year" => ''); } - } - if ($i > 0 || empty($from)) { - $comment .= $matches[0][$i] . " "; + // Comment and Children + $elements = count($matches[0]) - 1; //count remaining elements after dates + $comment = ''; + $children = 0; + if ($elements == 1) { + if (preg_match('!(\d+) child!', $matches[0][1], $match)) { + $children = $match[1]; + } else { + $comment = trim($matches[0][1], " ()"); + } + } elseif ($elements == 2) { + //sometimes those 2 values are reversed, don't know why, so have to check. + if (preg_match('!(\d+) child!', $matches[0][1], $match)) { + $children = $match[1]; + $comment = trim($matches[0][2], " ()"); + } elseif (preg_match('!(\d+) child!', $matches[0][2], $match)) { + $children = $match[1]; + $comment = trim($matches[0][1], " ()"); + } + } + $this->spouses[] = array( + 'imdb' => $mid, + 'name' => $name, + 'from' => $from, + 'to' => $to, + 'comment' => $comment, + 'children' => (int)$children + ); } } - if (preg_match('!(\d+) child!', $html, $match)) { - $children = $match[1]; - } - $this->spouses[] = array( - 'imdb' => $mid, - 'name' => $nam, - 'from' => $from, - 'to' => $to, - 'comment' => $comment, - 'children' => $children - ); } } - if (!$found) { - return $this->spouses; - } // no spouses } return $this->spouses; } diff --git a/tests/PersonTest.php b/tests/PersonTest.php index 31810957..b783116b 100644 --- a/tests/PersonTest.php +++ b/tests/PersonTest.php @@ -128,7 +128,9 @@ public function test_movies_soundtrack() $result = $person->movies_soundtrack(); $this->assertIsArray($result); $this->assertGreaterThanOrEqual(6, $result); - $poppyHill = current(array_filter($result, function ($item) { return $item['mid'] == '1798188'; })); + $poppyHill = current(array_filter($result, function ($item) { + return $item['mid'] == '1798188'; + })); $this->assertEquals('1798188', $poppyHill['mid']); $this->assertEquals('From Up on Poppy Hill', $poppyHill['name']); $this->assertEquals('2011', $poppyHill['year']); @@ -176,10 +178,8 @@ public function test_movies_self() $this->assertLessThan(35, count($result)); $matches = 0; - foreach($result as $movie) - { - if($movie['mid'] == 1095875) - { + foreach ($result as $movie) { + if ($movie['mid'] == 1095875) { $this->assertEquals('Jônetsu tairiku', $movie['name']); $this->assertEquals('2014', $movie['year']); $this->assertEquals('', $movie['chid']); @@ -224,7 +224,7 @@ public function test_movies_archive() $this->assertEquals(array(), $oscars['addons']); $troldspejlet = array_find_item($result, 'mid', '0318251'); - $this->assertEquals('0318251',$troldspejlet['mid']); + $this->assertEquals('0318251', $troldspejlet['mid']); $this->assertEquals('Troldspejlet', $troldspejlet['name']); $this->assertEquals('2009', $troldspejlet['year']); $this->assertEquals('', $troldspejlet['chid']); @@ -284,13 +284,111 @@ public function test_height() $this->assertEquals('1.64 m', $result['metric']); } - // @TODO Write proper tests for this method - // @TODO this method doesn't work -// public function test_spouse() -// { -// $person = $this->getimdb_person(); -// $this->assertNotEmpty($person->spouse()); -// } + public function test_spouse_still_married() + { + $person = $this->getimdb_person(); + $result = $person->spouse(); + $this->assertNotEmpty($result); + $this->assertCount(1, $result); + $first = $result[0]; + $this->assertEquals(array( + 'imdb' => '1088112', + 'name' => 'Akemi Ôta', + 'from' => + array( + 'day' => '', + 'month' => 'October', + 'mon' => '10', + 'year' => '1965', + ), + 'to' => + array( + 'day' => '', + 'month' => '', + 'mon' => '', + 'year' => '', + ), + 'comment' => '', + 'children' => 2, + ), $first); + } + + public function test_spouse_multiple_spouse() + { + $person = $this->getimdb_person('0000245'); + $result = $person->spouse(); + $this->assertNotEmpty($result); + $this->assertCount(3, $result); + $this->assertEquals(array( + 'imdb' => '6699367', + 'name' => 'Susan Schneider', + 'from' => + array( + 'day' => 22, + 'month' => 'October', + 'mon' => 10, + 'year' => 2011, + ), + 'to' => + array( + 'day' => 11, + 'month' => 'August', + 'mon' => 8, + 'year' => 2014, + ), + 'comment' => 'his death', + 'children' => 0, + ), $result[0]); + + $this->assertEquals(array( + 'imdb' => '0931265', + 'name' => 'Marsha Garces Williams', + 'from' => + array( + 'day' => 30, + 'month' => 'April', + 'mon' => 4, + 'year' => 1989, + ), + 'to' => + array( + 'day' => '', + 'month' => '', + 'mon' => '', + 'year' => 2010, + ), + 'comment' => 'divorced', + 'children' => 2, + ), $result[1]); + + $this->assertEquals(array( + 'imdb' => '0892239', + 'name' => 'Valerie Velardi', + 'from' => + array( + 'day' => 4, + 'month' => 'June', + 'mon' => 6, + 'year' => 1978, + ), + 'to' => + array( + 'day' => 6, + 'month' => 'December', + 'mon' => 12, + 'year' => 1988, + ), + 'comment' => 'divorced', + 'children' => 1, + ), $result[2]); + } + + public function test_spouse_no_spouse() + { + $person = $this->getimdb_person('0005132'); + $result = $person->spouse(); + $this->assertCount(0, $result); + } //@TODO Write proper tests for this method public function test_bio() @@ -384,19 +482,19 @@ public function test_magcovers() 'inturl' => '', 'name' => 'Comixene (DE)', 'date' => - array( - 'day' => '', - 'month' => 'September', - 'mon' => '09', - 'year' => '2005', - 'full' => 'September 2005', - ), + array( + 'day' => '', + 'month' => 'September', + 'mon' => '09', + 'year' => '2005', + 'full' => 'September 2005', + ), 'details' => 'Iss. 89', 'auturl' => '', 'author' => '', - ), $first); + ), $first); } - + public function test_real_id() { $person = $this->getimdb_person();