Skip to content

Commit

Permalink
Merge pull request #17 from hmerritt/v1.2.7
Browse files Browse the repository at this point in the history
V1.2.7
  • Loading branch information
hmerritt authored Dec 18, 2022
2 parents 86ba665 + 8000ca3 commit c1fd186
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 40 deletions.
77 changes: 63 additions & 14 deletions src/HtmlPieces.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class HtmlPieces
* @param string $element
* @return string
*/
public function get(object $page, string $element)
public function get(object $page, string $element, string $url='')
{
// Initiate dom object
// -> handles page scraping
Expand All @@ -32,7 +32,7 @@ public function get(object $page, string $element)

return $this->strClean($title);
break;

case "genre":
$allGenres = $dom->find($page, "div[data-testid=genres] a");
$genres = [];
Expand Down Expand Up @@ -193,14 +193,14 @@ public function get(object $page, string $element)
if ($this->count($castRow->find('img')) === 0) {
continue;
}

$actor = [];
$actor["actor"] = "";
$actor["avatar"] = "";
$actor["avatar_hq"] = "";
$actor["actor_id"] = "";
$actor["character"] = "";

// Actor
$actorLink = $castRow->find('a[data-testid=title-cast-item__actor]');
if ($this->count($actorLink)) {
Expand All @@ -217,7 +217,7 @@ public function get(object $page, string $element)
$actor["avatar_hq"] = preg_match('/\.\_/', $actor["avatar_hq"]) ? preg_split('/\.\_.*/', $actor["avatar_hq"])[0] . ".jpg" : $actor["avatar_hq"];
}
}

// Actor ID
$link = $castRow->find('a');
if ($this->count($link)) {
Expand All @@ -227,24 +227,72 @@ public function get(object $page, string $element)
$actor["actor_id"] = $matches[0];
}
}

// Character
$characterLink = $castRow->find('[data-testid=cast-item-characters-link] span');
if ($this->count($characterLink)) {
$actor["character"] = $characterLink->text;
}

$actor["character"] = $this->strClean($actor["character"]);
$actor["actor"] = $this->strClean($actor["actor"]);
$actor["avatar"] = $this->strClean($actor["avatar"]);
$actor["actor_id"] = $this->strClean($actor["actor_id"]);

array_push($cast, $actor);
}
}
return $cast;
break;

case "tvShow":
preg_match('/TV Series/i', $page, $matches, PREG_OFFSET_CAPTURE);
return !!$this->count($matches);
break;

case "seasons":
$seasons = [];
$findAllSeasons = $dom->find($page, "#bySeason > option");
$dom = new \PHPHtmlParser\Dom();
foreach ($findAllSeasons as $seasonRow){
$season = [];
$seasonValue = $seasonRow->getAttribute('value');
$season['season'] = $seasonValue;
// Using imdb ajax api to get episodes
$season['episodes'] = $this->get($dom->loadFromUrl($url."/_ajax?season=".$seasonValue), "episodes");
array_push($seasons, $season);
}
return $seasons;
break;

case "episodes":
$episodes = [];
$findAllEpisodes = $dom->find($page, ".eplist > .list_item");
foreach ($findAllEpisodes as $episodeRow){
$episode = [];
$hyperlink = $episodeRow->find("a[itemprop=url]");
$episode["id"] = $this->extractImdbId($hyperlink->getAttribute("href"));
$episode['title'] = $episodeRow->find('a[itemprop=name]')->text;
$episode['description'] = $episodeRow->find(".item_description")->text;
$rating = $episodeRow->find(".ipl-rating-star__rating");
$episode["poster"] = "";
if($this->count($rating)) {
$episode['rating'] = $rating->text;
}
$image = $hyperlink->find("img");
if($this->count($image)) {
$poster = $image->getAttribute("src");
$episode["poster"] = preg_match('/@/', $poster) ? preg_split('~@(?=[^@]*$)~', $poster)[0] . "@.jpg" : $poster;

if ($poster == $episode["poster"]) {
$episode["poster"] = preg_match('/\.\_/', $episode["poster"]) ? preg_split('/\.\_.*/', $episode["poster"])[0] . ".jpg" : $episode["poster"];
}
}
array_push($episodes, $episode);
}
return $episodes;
break;

case "technical_specs":
$technical_specs = [];
$table = $dom->find($page, '.dataTable tr');
Expand All @@ -265,29 +313,30 @@ public function get(object $page, string $element)

case "titles":
case "names":
case "people":
case "companies":
$response = [];
$sections = $dom->find($page, ".findSection");
$sections = $dom->find($page, ".ipc-page-section");
if ($this->count($sections) > 0)
{
foreach ($sections as $section)
{
$sectionName = @strtolower($section->find(".findSectionHeader")->text);
$sectionName = @strtolower($dom->find($section, ".ipc-title__text")->text);
if ($sectionName === $element) {
$sectionRows = $section->find(".findList tr");
$sectionRows = $section->find("ul li");
if ($this->count($sectionRows) > 0)
{
foreach ($sectionRows as $sectionRow)
{
$row = [];

$link = $dom->find($sectionRow, 'td.result_text a');
$link = $dom->find($sectionRow, 'a');
$row["title"] = $link->text;
if ($row["title"] == "") {
continue;
}

$row["image"] = $dom->find($sectionRow, 'td.primary_photo img')->src;
$row["image"] = $dom->find($sectionRow, '.ipc-image')->src;
if (preg_match('/@/', $row["image"]))
{
$row["image"] = preg_split('~@(?=[^@]*$)~', $row["image"])[0] . "@.jpg";
Expand Down Expand Up @@ -315,7 +364,7 @@ public function get(object $page, string $element)
*
* @param object $page
* @param array $patterns
* @return string
* @return string
*/
public function findMatchInPatterns(object $dom, object $page, array $patterns, string $type = "text")
{
Expand Down
17 changes: 14 additions & 3 deletions src/Imdb.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ private function populateOptions(array $options = []): array
'category' => 'all',
'curlHeaders' => ['Accept-Language: en-US,en;q=0.5'],
'techSpecs' => true,
'seasons' => false,
];

// Merge any user options with the default ones
Expand Down Expand Up @@ -103,16 +104,26 @@ public function film(string $filmId, array $options = []): array
$response->add("rating_votes", $htmlPieces->get($page, "rating_votes"));
$response->add("poster", $htmlPieces->get($page, "poster"));
$response->add("trailer", $htmlPieces->get($page, "trailer"));
$response->add("tvShow", $htmlPieces->get($page, "tvShow"));
$response->add("cast", $htmlPieces->get($page, "cast"));
$response->add("seasons", []);
$response->add("technical_specs", []);

// If techSpecs is enabled in user $options
// -> Make a second request to load the full techSpecs page
if ($options["techSpecs"]) {
$page_techSpecs = $dom->fetch("https://www.imdb.com/title/$filmId/technical", $options);
$response->add("technical_specs", $htmlPieces->get($page_techSpecs, "technical_specs"));
}
else {
$response->add("technical_specs", []);

// If seasons is enabled & is a tv show
if ($options['seasons'] && $response->get("tvShow")) {
$url = "https://www.imdb.com/title/$filmId/episodes";
$page_seasons = $dom->fetch($url, $options);
// If film has episodes or seasons
if (count($page_seasons->find(".error_code_404")) == 0) {
$response->add("seasons", $htmlPieces->get($page_seasons, "seasons", $url));
}
}

// If caching is enabled
Expand Down Expand Up @@ -157,7 +168,7 @@ public function search(string $search, array $options = []): array

// Add all search data to response $store
$response->add("titles", $htmlPieces->get($page, "titles"));
$response->add("names", $htmlPieces->get($page, "names"));
$response->add("names", $htmlPieces->get($page, "people"));
$response->add("companies", $htmlPieces->get($page, "companies"));

return $response->return();
Expand Down
2 changes: 2 additions & 0 deletions src/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ public function default(string $endpoint): array
"id" => "",
"link" => ""
],
"tvShow" => false,
"cast" => [],
"seasons" => [],
"technical_specs" => []
];
break;
Expand Down
24 changes: 1 addition & 23 deletions tests/ImdbTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public function testFilmCache()
{
$imdb = new Imdb;
$cache = new Cache;
$film = $imdb->film('tt0816692', [ 'techSpecs' => false ]);
$film = $imdb->film('tt0816692', [ 'cache' => true, 'techSpecs' => false ]);
$cache_film = $cache->get('tt0816692')->film;

$this->assertEquals(true, $cache->has('tt0816692'));
Expand All @@ -91,26 +91,4 @@ public function testSearch()
$this->assertEquals('The Life and Death of Colonel Blimp', $search_2['titles'][0]['title']);
$this->assertEquals('tt0036112', $search_2['titles'][0]['id']);
}

public function test404Page()
{
$imdb = new Imdb;
$response = new Response;

$film = $imdb->film('ttest404', [ 'cache' => false ]);
$film_search = $imdb->film('interstellartest4040404040404', [ 'cache' => false ]);
$search = $imdb->search('ttest404040404004', [ 'category' => 'test404' ]);

$emptyResponse = [
'film' => $response->default('film'),
'film_search' => $response->default('film'),
'search' => $response->default('search'),
];
$emptyResponse['film']['id'] = 'ttest404';

$this->assertEquals($emptyResponse['film'], $film);
$this->assertEquals($emptyResponse['film_search'], $film_search);
$this->assertEquals($emptyResponse['search'], $search);
}

}

0 comments on commit c1fd186

Please sign in to comment.