From 0078935fc140f0124af4ae5104cdd4f1613dc521 Mon Sep 17 00:00:00 2001
From: Jacek Kowalski <Jacek@jacekk.info>
Date: Sun, 08 Jul 2012 19:01:42 +0000
Subject: [PATCH] Poprawka parsowania programu telewizyjnego - filtrowanie znaków UTF-8 ze względu na problemy z kodowaniem na stronach WP

---
 data/tv/pobierz.php |  100 +++++++++++++------------------------------------
 1 files changed, 27 insertions(+), 73 deletions(-)

diff --git a/data/tv/pobierz.php b/data/tv/pobierz.php
index 5192250..12fc829 100644
--- a/data/tv/pobierz.php
+++ b/data/tv/pobierz.php
@@ -1,4 +1,6 @@
 <?php
+require_once('wp_parse.php');
+
 echo STAR.'Pobieranie programu TV...';
 $stations = array(
 	1 => 'TVP 1',
@@ -56,90 +58,42 @@
 	217 => 'ZigZap',
 	361 => 'Cartoon Network',
 );
-$NUMOF = count($stations)*7;
+$NUMOF = count($stations);
 
 $c = curl_init();
 $out = fopen('./xmltv-pre.xml', 'w');
 fwrite($out, '<?xml version="1.0" encoding="UTF-8" ?>
 <tv date="'.date('YmdHis O').'" generator-info-name="BotGG" generator-info-url="http://jacekk.info/botgg">
 ');
-$address = 'http://tv.wp.pl/program.html?stid=$STATION&date=$DATE&time=';
+$address = 'http://tv.wp.pl/program.html?stid=$STATION';
+$date = date('Y-m-d');
 
 $counter = 0;
 foreach($stations as $num => $station) {
-	fwrite($out, '	<channel id="'.$station.'">
-		<display-name>'.$station.'</display-name>
-	</channel>
-');
-	for($i=0; $i<7; $i++) {
-		echo "\r".STAR.'Pobieranie programu TV: '.floor(($counter*7 + $i)/$NUMOF*100).'%';
-		
-		$timestamp = strtotime('+'.$i.' days');
-		$date = date('Y-m-d', $timestamp);
-		if(!file_exists('./cache/'.$num.'_'.$date) || filesize('./cache/'.$num.'_'.$date)==0) {
-			curl_setopt($c, CURLOPT_URL, str_replace(array('$DATE', '$STATION'), array($date, $num), $address));
-			curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 30);
-			curl_setopt($c, CURLOPT_FOLLOWLOCATION, TRUE);
-			curl_setopt($c, CURLOPT_MAXREDIRS, 5);
-			curl_setopt($c, CURLOPT_HTTPHEADER, array('User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.2; pl-PL; rv:1.9.2) Gecko/20100101 Firefox/3.6'));
-			curl_setopt($c, CURLOPT_RETURNTRANSFER, TRUE);
-			$data = curl_exec($c);
-			if(!$data) {
-				echo FAIL;
-				return;
-			}
-			
-			$data = str_replace(array('id="C_TSR-franc"', 'id="C_TSR-2-franc"', 'id="stationId"', 'id="searchForm"', '&'), array('', '', '', '', '&amp;'), $data);
-			
-			file_put_contents('./cache/'.$num.'_'.$date, $data);
-			unset($data);
+	echo "\r".STAR.'Pobieranie programu TV: '.floor($counter/$NUMOF*100).'%';
+	
+	if(!file_exists('./cache/'.$num.'_'.$date) || filesize('./cache/'.$num.'_'.$date)==0) {
+		curl_setopt($c, CURLOPT_URL, str_replace(array('$DATE', '$STATION'), array($date, $num), $address));
+		curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 30);
+		curl_setopt($c, CURLOPT_FOLLOWLOCATION, TRUE);
+		curl_setopt($c, CURLOPT_MAXREDIRS, 5);
+		curl_setopt($c, CURLOPT_HTTPHEADER, array('User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.2; pl-PL; rv:1.9.2) Gecko/20100101 Firefox/3.6'));
+		curl_setopt($c, CURLOPT_RETURNTRANSFER, TRUE);
+		$data = curl_exec($c);
+		if(!$data) {
+			echo FAIL;
+			return;
 		}
 		
-		$doc = new DOMDocument;
-		$doc->loadHTMLFile('./cache/'.$num.'_'.$date);
-		$doc = $doc->getElementById('bxNazwaBoksu')->childNodes;
-		
-		foreach($doc as $el) {
-			if($el instanceof DOMElement) {
-				$doc = $el->childNodes;
-				break;
-			}
-		}
-		
-		$last_time = 0;
-		$last_timestamp = 0;
-		foreach($doc as $el) {
-			if(!$el instanceof DOMElement || substr($el->getAttribute('class'), 0, 7)!='program') continue;
-			
-			$time = $el->getElementsByTagName('strong')->item(0)->childNodes->item(0)->nodeValue;
-			$time = trim($time);
-			if($last_time>(int)$time) {
-				$timestamp = strtotime('+1 day', $timestamp);
-			}
-			$last_time = (int)$time;
-			$timestamp = strtotime($time, $timestamp);
-			
-			if($last_timestamp) {
-				fwrite($out, '	<programme channel="'.$station.'" start="'.date('YmdHis O', $last_timestamp).'" stop="'.date('YmdHis O', $timestamp).'">
-		<title>'.$name.'</title>
-		<desc/>
-	</programme>
-');
-			}
-			
-			$name = $el->getElementsByTagName('h4')->item(0)->childNodes->item(0)->childNodes->item(0)->nodeValue;
-			$name = htmlspecialchars(trim($name), ENT_COMPAT, 'UTF-8');
-			$last_timestamp = $timestamp;
-		}
-		
-		fwrite($out, '	<programme channel="'.$station.'" start="'.date('YmdHis O', $timestamp).'" stop="'.date('YmdHis O', $timestamp+3600).'">
-		<title>'.$name.'</title>
-		<desc/>
-	</programme>
-');
-		
-		unset($doc);
+		file_put_contents('./cache/'.$num.'_'.$date, $data);
+		unset($data);
 	}
+	
+	$doc = new DOMDocument;
+	@$doc->loadHTMLFile('./cache/'.$num.'_'.$date);
+	
+	$wp = new wp_parse($doc);
+	$wp->xmltv($station, $out);
 	
 	$counter++;
 }
@@ -160,4 +114,4 @@
 	}
 }
 echo OK;
-?>
+?>
\ No newline at end of file

--
Gitblit v1.9.1