From e80d67e983cb92a5ee8475dd0c112fb448c2cf16 Mon Sep 17 00:00:00 2001 From: Jacek Kowalski <Jacek@jacekk.info> Date: Sat, 13 Feb 2016 00:16:33 +0000 Subject: [PATCH] [mod_kino] Dostosowanie modułu do zmian na stronach źródłowych --- data/tv/pobierz.php | 114 +++++++++++++++++++-------------------------------------- 1 files changed, 38 insertions(+), 76 deletions(-) diff --git a/data/tv/pobierz.php b/data/tv/pobierz.php index 5192250..aebb5b8 100644 --- a/data/tv/pobierz.php +++ b/data/tv/pobierz.php @@ -1,4 +1,6 @@ <?php +require_once('wp_parse.php'); + echo STAR.'Pobieranie programu TV...'; $stations = array( 1 => 'TVP 1', @@ -35,8 +37,8 @@ 436 => 'FilmBox', 433 => 'FilmBox Extra', 174 => 'AXN', - 418 => 'AXN Crime', - 416 => 'AXN Sci-fi', + 538 => 'AXN Black', + 539 => 'AXN White', 85 => 'Ale Kino!', 205 => 'Kino Polska', 403 => 'TCM', @@ -50,95 +52,55 @@ 78 => 'Zone Romantica', 267 => 'Zone Europa', 84 => 'Zone Reality', - 434 => 'Religia TV', 449 => 'BBC CBeebies', 74 => 'Jetix', 217 => 'ZigZap', 361 => 'Cartoon Network', ); -$NUMOF = count($stations)*7; +$NUMOF = count($stations); + +ini_set('mbstring.substitute_character', 'none'); $c = curl_init(); $out = fopen('./xmltv-pre.xml', 'w'); fwrite($out, '<?xml version="1.0" encoding="UTF-8" ?> <tv date="'.date('YmdHis O').'" generator-info-name="BotGG" generator-info-url="http://jacekk.info/botgg"> '); -$address = 'http://tv.wp.pl/program.html?stid=$STATION&date=$DATE&time='; +$address = 'http://tv.wp.pl/program.html?stid=$STATION'; +$date = date('Y-m-d'); $counter = 0; foreach($stations as $num => $station) { - fwrite($out, ' <channel id="'.$station.'"> - <display-name>'.$station.'</display-name> - </channel> -'); - for($i=0; $i<7; $i++) { - echo "\r".STAR.'Pobieranie programu TV: '.floor(($counter*7 + $i)/$NUMOF*100).'%'; - - $timestamp = strtotime('+'.$i.' days'); - $date = date('Y-m-d', $timestamp); - if(!file_exists('./cache/'.$num.'_'.$date) || filesize('./cache/'.$num.'_'.$date)==0) { - curl_setopt($c, CURLOPT_URL, str_replace(array('$DATE', '$STATION'), array($date, $num), $address)); - curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 30); - curl_setopt($c, CURLOPT_FOLLOWLOCATION, TRUE); - curl_setopt($c, CURLOPT_MAXREDIRS, 5); - curl_setopt($c, CURLOPT_HTTPHEADER, array('User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.2; pl-PL; rv:1.9.2) Gecko/20100101 Firefox/3.6')); - curl_setopt($c, CURLOPT_RETURNTRANSFER, TRUE); - $data = curl_exec($c); - if(!$data) { - echo FAIL; - return; - } - - $data = str_replace(array('id="C_TSR-franc"', 'id="C_TSR-2-franc"', 'id="stationId"', 'id="searchForm"', '&'), array('', '', '', '', '&'), $data); - - file_put_contents('./cache/'.$num.'_'.$date, $data); - unset($data); + echo "\r".STAR.'Pobieranie programu TV: '.floor($counter/$NUMOF*100).'%'; + + if(!file_exists('./cache/'.$num.'_'.$date) || filesize('./cache/'.$num.'_'.$date)==0) { + curl_setopt($c, CURLOPT_URL, str_replace(array('$DATE', '$STATION'), array($date, $num), $address)); + curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($c, CURLOPT_FOLLOWLOCATION, TRUE); + curl_setopt($c, CURLOPT_MAXREDIRS, 5); + curl_setopt($c, CURLOPT_HTTPHEADER, array('User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.2; pl-PL; rv:1.9.2) Gecko/20100101 Firefox/3.6')); + curl_setopt($c, CURLOPT_RETURNTRANSFER, TRUE); + $data = curl_exec($c); + if(!$data) { + echo FAIL; + return; } - $doc = new DOMDocument; - $doc->loadHTMLFile('./cache/'.$num.'_'.$date); - $doc = $doc->getElementById('bxNazwaBoksu')->childNodes; - - foreach($doc as $el) { - if($el instanceof DOMElement) { - $doc = $el->childNodes; - break; - } - } - - $last_time = 0; - $last_timestamp = 0; - foreach($doc as $el) { - if(!$el instanceof DOMElement || substr($el->getAttribute('class'), 0, 7)!='program') continue; - - $time = $el->getElementsByTagName('strong')->item(0)->childNodes->item(0)->nodeValue; - $time = trim($time); - if($last_time>(int)$time) { - $timestamp = strtotime('+1 day', $timestamp); - } - $last_time = (int)$time; - $timestamp = strtotime($time, $timestamp); - - if($last_timestamp) { - fwrite($out, ' <programme channel="'.$station.'" start="'.date('YmdHis O', $last_timestamp).'" stop="'.date('YmdHis O', $timestamp).'"> - <title>'.$name.'</title> - <desc/> - </programme> -'); - } - - $name = $el->getElementsByTagName('h4')->item(0)->childNodes->item(0)->childNodes->item(0)->nodeValue; - $name = htmlspecialchars(trim($name), ENT_COMPAT, 'UTF-8'); - $last_timestamp = $timestamp; - } - - fwrite($out, ' <programme channel="'.$station.'" start="'.date('YmdHis O', $timestamp).'" stop="'.date('YmdHis O', $timestamp+3600).'"> - <title>'.$name.'</title> - <desc/> - </programme> -'); - - unset($doc); + $data = mb_convert_encoding($data, 'UTF-8', 'UTF-8'); + file_put_contents('./cache/'.$num.'_'.$date, $data); + unset($data); + } + + $doc = new DOMDocument('1.0', 'utf-8'); + @$doc->loadHTMLFile('./cache/'.$num.'_'.$date); + + try { + $wp = new wp_parse($doc); + $wp->xmltv($station, $out); + } catch(Exception $e) { + echo "\n"; + echo STAR.'Błąd przy pobieraniu danych dla kanału '.$station.FAIL."\n"; + continue; } $counter++; @@ -160,4 +122,4 @@ } } echo OK; -?> +?> \ No newline at end of file -- Gitblit v1.9.1