230 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			230 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
namespace App\Scraper;
 | 
						|
 | 
						|
use App\Models\Seed;
 | 
						|
use App\Models\Property;
 | 
						|
use App\Models\Extraction;
 | 
						|
use App\Models\Exception;
 | 
						|
use App\Jobs\ScrapeProperty;
 | 
						|
use App\Jobs\ScrapePropertyData;
 | 
						|
use Illuminate\Support\Facades\Http;
 | 
						|
 | 
						|
/**
 | 
						|
 * This Class contains methods for scraping offers from the
 | 
						|
 * website e-domizil.ch. 
 | 
						|
 **/
 | 
						|
class Edomizil{
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Save an exception.
 | 
						|
	 * @param string $response The respsonse form an exception e.g. 404
 | 
						|
	 * @param enum $type Is either 'offer', 'price', 'calendar' or 'property'
 | 
						|
	 * @param integer $entityId Has to be the id of the corresponding entity.
 | 
						|
	 **/
 | 
						|
	public static function saveHttpException($response, $type, $entityId)
 | 
						|
	{
 | 
						|
 | 
						|
		$exception = [];
 | 
						|
 | 
						|
		$exception['status'] = $response->status();
 | 
						|
		$exception['headers'] = $response->headers();
 | 
						|
		$exception['body'] = $response->body();
 | 
						|
 | 
						|
		$exceptionJSON = json_encode($exception);
 | 
						|
		
 | 
						|
		Exception::create([
 | 
						|
			'exception' => $exceptionJSON,
 | 
						|
			'type' => $type,
 | 
						|
			'entity_id' => $entityId
 | 
						|
		]);
 | 
						|
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Get seed urls.
 | 
						|
	 * Get all seed urls (seeds.uris) in random order.
 | 
						|
	 * @return Collection with seed urls.
 | 
						|
	 **/
 | 
						|
	public static function getAllSeeds()
 | 
						|
	{
 | 
						|
		return Seed::select('id','uri')->inRandomOrder()->get();
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Get property ids.
 | 
						|
	 * Get all ids (properties.property_platform_id) in random order.
 | 
						|
	 * @return Collection with property id
 | 
						|
	 **/
 | 
						|
	public static function getAllProperties()
 | 
						|
	{
 | 
						|
		// get all properties from model in random order.
 | 
						|
		return Property::select('id','property_platform_id')->inRandomOrder()->get();
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Scrape for properties.
 | 
						|
	 * Scrapes for properties form seed url and save them to the database.
 | 
						|
	 * @param $seed Seed
 | 
						|
	 **/
 | 
						|
	public static function scrapeProperty($seed)
 | 
						|
	{
 | 
						|
 | 
						|
		$response = Http::get($seed->uri);
 | 
						|
		
 | 
						|
		if($response->successful()){
 | 
						|
 | 
						|
			$json = $response->json();
 | 
						|
 | 
						|
			/** Check if offers are findable in response */
 | 
						|
			if(!$json['offers']){
 | 
						|
 | 
						|
				Exception::create([
 | 
						|
					'exception' => 'No offers found for'.$seed->uri,
 | 
						|
					'entity_type' => 'property',
 | 
						|
					'entity_id' => $property->id
 | 
						|
				]);
 | 
						|
 | 
						|
				return;
 | 
						|
			}
 | 
						|
 | 
						|
			/** Iterate offers */
 | 
						|
			foreach($json['offers'] as $offer){
 | 
						|
				
 | 
						|
				/**
 | 
						|
				 * Check if property with same id is already present in database.
 | 
						|
				 * If already present check if the geoLocation was the same as the first time when found. 
 | 
						|
				 * Otherwise add property to database.
 | 
						|
				 **/
 | 
						|
				$property = Property::firstWhere('property_platform_id', $offer['id']);
 | 
						|
				$geoLocation = implode(',', $offer['geoLocation']);
 | 
						|
 | 
						|
				if($property){
 | 
						|
					
 | 
						|
					/** Update last found attribute */
 | 
						|
					$property->last_found = now();
 | 
						|
					$property->save();
 | 
						|
 | 
						|
					/** check if geoLocation is the same as at creation time and save exception if not */
 | 
						|
					if($property->check_data !== $geoLocation){
 | 
						|
						Exception::create([
 | 
						|
							'exception' => 'geoLocation was different: '.$geoLocation,
 | 
						|
							'entity_type' => 'property',
 | 
						|
							'entity_id' => $property->id
 | 
						|
						]);
 | 
						|
					}
 | 
						|
 | 
						|
				}else{
 | 
						|
					Property::create([
 | 
						|
						'property_platform_id' => $offer['id'],
 | 
						|
						'seed_id' => $seed->id,
 | 
						|
						'check_data' => $geoLocation,
 | 
						|
						'last_found' => now()
 | 
						|
					]);
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			return count($json['offers']);
 | 
						|
 | 
						|
 | 
						|
		}else{
 | 
						|
			/** Save Exception if document could not be found */
 | 
						|
			self::saveHttpException($response,'property', $seed->id);
 | 
						|
			return 0;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Extract details from property.
 | 
						|
	 * Scrapes for offer, price and calendar details from property and save the to extractions table (or exceptions when not found).
 | 
						|
	 * @param $property Id of property (properties.property_platform_id)
 | 
						|
	 **/
 | 
						|
	public static function scrapePropertyData($property){
 | 
						|
 | 
						|
		$result = [];
 | 
						|
 | 
						|
		/** scrape offer details such as name, ammeneties, etc. */
 | 
						|
		$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$property->property_platform_id);
 | 
						|
		
 | 
						|
		if($offer->successful()){
 | 
						|
 | 
						|
			Extraction::create([
 | 
						|
				'property_id' => $property->id,
 | 
						|
				'type' => 'offer',
 | 
						|
				'body' => $offer->body(),
 | 
						|
				'header' => json_encode($offer->headers())
 | 
						|
			]);
 | 
						|
		
 | 
						|
		}else{
 | 
						|
			self::saveHttpException($offer,'offer',$property->id);
 | 
						|
		}
 | 
						|
 | 
						|
		$result['offer'] = $offer->body();
 | 
						|
 | 
						|
		/** scrape for price details */
 | 
						|
		$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$property->property_platform_id);
 | 
						|
 | 
						|
		if($price->successful()){
 | 
						|
 | 
						|
			Extraction::create([
 | 
						|
				'property_id' => $property->id,
 | 
						|
				'type' => 'price',
 | 
						|
				'body' => $price->body(),
 | 
						|
				'header' => json_encode($price->headers())
 | 
						|
			]);
 | 
						|
 | 
						|
		}else{
 | 
						|
			self::saveHttpException($price,'price',$property->id);
 | 
						|
		}
 | 
						|
 | 
						|
		$result['price'] = $price->body();
 | 
						|
 | 
						|
		/** scrape for calendar details */
 | 
						|
		$calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$property->property_platform_id, [
 | 
						|
			'year' => date("Y"),
 | 
						|
			'month' => date("m")
 | 
						|
		]);
 | 
						|
		
 | 
						|
		if($calendar->successful()){
 | 
						|
 | 
						|
			Extraction::create([
 | 
						|
				'property_id' => $property->id,
 | 
						|
				'type' => 'calendar',
 | 
						|
				'body' => $calendar->body(),
 | 
						|
				'header' => json_encode($calendar->headers())
 | 
						|
			]);
 | 
						|
 | 
						|
		}else{
 | 
						|
			self::saveHttpException($calendar,'calendar',$property->id);
 | 
						|
		}
 | 
						|
 | 
						|
		$result['calendar'] = $calendar->body();
 | 
						|
		
 | 
						|
		return json_encode($result);
 | 
						|
	
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Dispatch property jobs.
 | 
						|
	 * Creates jobs for scraping new for properties
 | 
						|
	 **/
 | 
						|
	public static function dispatchPropertyJobs()
 | 
						|
	{
 | 
						|
		$seeds = self::getAllSeeds();
 | 
						|
		foreach($seeds as $seed){
 | 
						|
			ScrapeProperty::dispatch($seed);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Dispatch property data jobs.
 | 
						|
	 * Creates jobs for scraping new for property detail data.
 | 
						|
	 **/
 | 
						|
	public static function dispatchPropertyDataJobs()
 | 
						|
	{
 | 
						|
		$properties = self::getAllProperties();
 | 
						|
		foreach($properties as $property){
 | 
						|
			ScrapePropertyData::dispatch($property);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
} |