From 33182e945b9b5ee3528f34a483ba84771e5de1fb Mon Sep 17 00:00:00 2001
From: Jacek Kowalski <Jacek@jacekk.info>
Date: Tue, 19 Feb 2019 00:21:59 +0000
Subject: [PATCH] Use correct/incorrect count to decide whether to ignore/replace mapping

---
 parse.php |  217 +++++++++++++++++++++++-------------------------------
 1 files changed, 93 insertions(+), 124 deletions(-)

diff --git a/parse.php b/parse.php
index db165c7..c6406b3 100644
--- a/parse.php
+++ b/parse.php
@@ -1,137 +1,106 @@
 <?php
-require('vendor/autoload.php');
-require('common.php');
+require_once(__DIR__.'/lib/database.php');
+require_once(__DIR__.'/lib/fetch.php');
+require_once(__DIR__.'/lib/mapper.php');
 
-use transit_realtime\FeedMessage;
+$logger = new Monolog\Logger('Parse changes');
 
-class IdMapper {
-	private $jsonTrips = [];
-	private $gtfsTrips = [];
-	
-	private $specialNames = [
-		'Zjazd do zajezdni',
-		'Przejazd techniczny',
-		'Wyjazd na trasÄ™',
-	];
-	
-	public static function convertTripId($tripId) {
-		$tripId = explode('_', $tripId);
-		if($tripId[0] != 'block') return;
-		if($tripId[2] != 'trip') return;
-		return 4096 * (int)$tripId[1] + (int)$tripId[3];
-	}
-	
-	public function loadJson($file) {
-		$json = json_decode(file_get_contents($file));
-		foreach($json->vehicles as $vehicle) {
-			if(isset($vehicle->isDeleted) && $vehicle->isDeleted) continue;
-			if(!isset($vehicle->tripId) || !$vehicle->tripId) continue;
-			if(!isset($vehicle->name) || !$vehicle->name) continue;
-			if(!isset($vehicle->latitude) || !$vehicle->latitude) continue;
-			if(!isset($vehicle->longitude) || !$vehicle->longitude) continue;
-			foreach($this->specialNames as $name) {
-				if(substr($vehicle->name, -strlen($name)) == $name) {
-					continue;
+$sources = [
+	'buses' => [
+		'gtfs' => 'ftp://ztp.krakow.pl/VehiclePositions_A.pb',
+		'gtfs_file' => 'VehiclePositions_A.pb',
+		'ttss' => 'http://91.223.13.70/internetservice/geoserviceDispatcher/services/vehicleinfo/vehicles',
+		'ttss_file' => 'vehicles_A.json',
+		'database' => 'mapping_A.sqlite3',
+		'result' => 'mapping_A.json',
+	],
+];
+
+foreach($sources as $name => $source) {
+	$logger = new Monolog\Logger('fetch_'.$name);
+	try {
+		foreach(['gtfs_file', 'ttss_file', 'database', 'result'] as $field) {
+			$source[$field] = __DIR__.'/data/'.$source[$field];
+		}
+		$source['result_temp'] = $source['result'].'.tmp';
+		
+		$logger->info('Fetching '.$name.' position data from FTP...');
+		$updated = ftp_fetch_if_newer($source['gtfs'], $source['gtfs_file']);
+		if(!$updated) {
+			$logger->info('Nothing to do, remote file not newer than local one');
+			continue;
+		}
+		
+		$logger->info('Fetching '.$name.' positions from TTSS...');
+		fetch($source['ttss'],$source['ttss_file']);
+		
+		$logger->info('Loading data...');
+		$mapper = new Mapper();
+		$mapper->loadTTSS($source['ttss_file']);
+		$mapper->loadGTFS($source['gtfs_file']);
+		
+		$db = new Database($source['database']);
+		
+		$logger->info('Finding correct offset...');
+		$offset = $mapper->findOffset();
+		if(!$offset) {
+			throw new Exception('Offset not found');
+		}
+		
+		$logger->info('Got offset '.$offset.', creating mapping...');
+		$mapping = $mapper->getMapping($offset);
+		
+		$logger->info('Checking the data for correctness...');
+		$weight = count($mapping);
+		
+		$correct = 0;
+		$incorrect = 0;
+		$old = 0;
+		$maxWeight = 0;
+		foreach($mapping as $id => $vehicle) {
+			$dbVehicle = $db->getById($id);
+			if($dbVehicle) {
+				$maxWeight = max($maxWeight, $dbVehicle['weight']);
+				if((int)substr($vehicle['num'], 2) == (int)$dbVehicle['num']) {
+					$correct += 1;
+				} else {
+					$incorrect += 1;
 				}
-			}
-			$this->jsonTrips[(int)$vehicle->tripId] = [
-				'id' => $vehicle->id,
-				'latitude' => (float)$vehicle->latitude / 3600000.0,
-				'longitude' => (float)$vehicle->longitude / 3600000.0,
-			];
-		}
-		ksort($this->jsonTrips);
-	}
-	
-	public function loadGtfs($file) {
-		$data = file_get_contents($file);
-		$feed = new FeedMessage();
-		$feed->parse($data);
-		foreach ($feed->getEntityList() as $entity) {
-			$vehiclePosition = $entity->getVehicle();
-			$position = $vehiclePosition->getPosition();
-			$vehicle = $vehiclePosition->getVehicle();
-			$trip = $vehiclePosition->getTrip();
-			$tripId = $trip->getTripId();
-			$this->gtfsTrips[self::convertTripId($tripId)] = [
-				'id' => $entity->getId(),
-				'num' => $vehicle->getLicensePlate(),
-				'tripId' => $tripId,
-				'latitude' => $position->getLatitude(),
-				'longitude' => $position->getLongitude(),
-			];
-		}
-		ksort($this->gtfsTrips);
-	}
-	
-	public function findOffset() {
-		if(count($this->jsonTrips) == 0 || count($this->gtfsTrips) == 0) {
-			return NULL;
-		}
-		
-		$jsonTripIds = array_keys($this->jsonTrips);
-		$gtfsTripIds = array_keys($this->gtfsTrips);
-		
-		$possibleOffsets = [];
-		for($i = 0; $i < count($this->jsonTrips); $i++) {
-			for($j = 0; $j < count($this->gtfsTrips); $j++) {
-				$possibleOffsets[$jsonTripIds[$i] - $gtfsTripIds[$j]] = TRUE;
-			}
-		}
-		$possibleOffsets = array_keys($possibleOffsets);
-		
-		$bestOffset = 0;
-		$maxMatched = 0;
-		$options = 0;
-		
-		foreach($possibleOffsets as $offset) {
-			$matched = 0;
-			
-			foreach($gtfsTripIds as $tripId) {
-				$tripId += $offset;
-				if(isset($this->jsonTrips[$tripId])) {
-					$matched++;
-				}
+				continue;
 			}
 			
-			if($matched > $maxMatched) {
-				$bestOffset = $offset;
-				$maxMatched = $matched;
-				$options = 1;
-			} elseif($matched == $maxMatched) {
-				$options++;
+			$dbVehicle = $db->getByNum($vehicle['num']);
+			if($dbVehicle && $dbVehicle['id'] != $id) {
+				$old += 1;
 			}
+		}
+		$logger->info('Weight: '.$weight.', correct: '.$correct.', incorrect: '.$incorrect.', old: '.$old);
+		
+		$previousMapping = NULL;
+		if($incorrect > $correct && $maxWeight > $weight) {
+			throw new Exception('Ignoring result due to better data already present');
+		} elseif($old > 0 && $incorrect == 0) {
+			$logger->warn('Replacing DB data with the new mapping');
+			$db->clear();
+		} else {
+			$previousMapping = @json_decode(@file_get_contents($source['result']), TRUE);
 		}
 		
-		if($options != 1) {
-			fwrite(STDERR, 'Found '.$options.' possible mappings!'."\n");
-			return FALSE;
+		$db->addMapping($mapping);
+		
+		if(is_array($previousMapping)) {
+			$logger->info('Merging previous data with current mapping');
+			$mapping = $previousMapping + $mapping;
+			ksort($mapping);
 		}
-		return $bestOffset;
-	}
-	
-	public function getMapping($offset) {
-		$result = [];
-		foreach($this->gtfsTrips as $gtfsTripId => $gtfsTrip) {
-			$jsonTripId = $gtfsTripId + $offset;
-			if(isset($this->jsonTrips[$jsonTripId])) {
-				$data = numToTypeB($gtfsTrip['id']);
-				$num = $gtfsTrip['num'];
-				if($data['num'] != $num) {
-					// Ignore due to incorrect depot markings in the data
-					//fwrite(STDERR, 'Got '.$num.', database has '.$data['num']."\n");
-				}
-				$result[$jsonTripId] = $data;
-			}
+		
+		$json = json_encode($mapping);
+		if(!file_put_contents($source['result_temp'], $json)) {
+			throw new Exception('Result save failed');
 		}
-		return $result;
+		rename($source['result_temp'], $source['result']);
+		$logger->info('Finished');
+	} catch(Throwable $e) {
+		$logger->error($e->getMessage(), ['exception' => $e, 'exception_string' => (string)$e]);
 	}
-}
-
-$mapper = new IdMapper();
-$mapper->loadJson('./data/vehicles_A.json');
-$mapper->loadGtfs('./data/VehiclePositions_A.pb');
-$offset = $mapper->findOffset();
-if($offset) {
-	echo json_encode($mapper->getMapping($offset));
 }

--
Gitblit v1.9.1