ConsultancyProject1_Auslast.../scraper/app/Scraper/Edomizil.php

230 lines
5.5 KiB
PHP
Raw Normal View History

2024-03-23 10:05:14 +01:00
<?php
namespace App\Scraper;
use App\Models\Seed;
use App\Models\Property;
2024-04-13 00:34:40 +02:00
use App\Models\Extraction;
2024-04-09 22:36:40 +02:00
use App\Models\Exception;
use App\Jobs\ScrapeProperty;
use App\Jobs\ScrapePropertyData;
2024-03-23 10:05:14 +01:00
use Illuminate\Support\Facades\Http;
2024-07-01 19:41:12 +02:00
/**
* This Class contains methods for scraping offers from the
* website e-domizil.ch.
**/
2024-03-23 10:05:14 +01:00
class Edomizil{
2024-07-01 19:41:12 +02:00
/**
* Save an exception.
* @param string $response The respsonse form an exception e.g. 404
* @param enum $type Is either 'offer', 'price', 'calendar' or 'property'
* @param integer $entityId Has to be the id of the corresponding entity.
**/
public static function saveHttpException($response, $type, $entityId)
{
2024-04-13 00:34:40 +02:00
$exception = [];
$exception['status'] = $response->status();
$exception['headers'] = $response->headers();
$exception['body'] = $response->body();
$exceptionJSON = json_encode($exception);
Exception::create([
'exception' => $exceptionJSON,
'type' => $type,
'entity_id' => $entityId
]);
}
2024-07-01 19:41:12 +02:00
/**
* Get seed urls.
* Get all seed urls (seeds.uris) in random order.
* @return Collection with seed urls.
**/
2024-04-09 22:36:40 +02:00
public static function getAllSeeds()
{
return Seed::select('id','uri')->inRandomOrder()->get();
}
2024-03-23 10:05:14 +01:00
2024-07-01 19:41:12 +02:00
/**
* Get property ids.
* Get all ids (properties.property_platform_id) in random order.
* @return Collection with property id
**/
2024-04-09 22:36:40 +02:00
public static function getAllProperties()
{
// get all properties from model in random order.
return Property::select('id','property_platform_id')->inRandomOrder()->get();
}
2024-03-23 10:05:14 +01:00
2024-07-01 19:41:12 +02:00
/**
* Scrape for properties.
* Scrapes for properties form seed url and save them to the database.
* @param $seed Seed
**/
2024-04-13 00:34:40 +02:00
public static function scrapeProperty($seed)
2024-04-09 22:36:40 +02:00
{
2024-07-01 19:41:12 +02:00
2024-04-13 00:34:40 +02:00
$response = Http::get($seed->uri);
if($response->successful()){
2024-04-09 22:36:40 +02:00
2024-04-13 00:34:40 +02:00
$json = $response->json();
2024-07-01 19:41:12 +02:00
/** Check if offers are findable in response */
if(!$json['offers']){
Exception::create([
'exception' => 'No offers found for'.$seed->uri,
'entity_type' => 'property',
'entity_id' => $property->id
]);
return;
}
/** Iterate offers */
2024-04-13 00:34:40 +02:00
foreach($json['offers'] as $offer){
2024-07-01 19:41:12 +02:00
/**
* Check if property with same id is already present in database.
* If already present check if the geoLocation was the same as the first time when found.
* Otherwise add property to database.
**/
2024-04-13 00:34:40 +02:00
$property = Property::firstWhere('property_platform_id', $offer['id']);
$geoLocation = implode(',', $offer['geoLocation']);
if($property){
2024-07-01 19:41:12 +02:00
/** Update last found attribute */
2024-04-13 00:34:40 +02:00
$property->last_found = now();
$property->save();
2024-07-01 19:41:12 +02:00
/** check if geoLocation is the same as at creation time and save exception if not */
2024-04-13 00:34:40 +02:00
if($property->check_data !== $geoLocation){
Exception::create([
'exception' => 'geoLocation was different: '.$geoLocation,
'entity_type' => 'property',
2024-04-22 12:00:57 +02:00
'entity_id' => $property->id
2024-04-13 00:34:40 +02:00
]);
}
2024-07-01 19:41:12 +02:00
2024-04-13 00:34:40 +02:00
}else{
Property::create([
'property_platform_id' => $offer['id'],
'seed_id' => $seed->id,
'check_data' => $geoLocation,
'last_found' => now()
2024-03-23 10:05:14 +01:00
]);
}
}
2024-06-24 18:49:43 +02:00
return count($json['offers']);
2024-04-13 00:34:40 +02:00
}else{
2024-07-01 19:41:12 +02:00
/** Save Exception if document could not be found */
2024-04-13 00:34:40 +02:00
self::saveHttpException($response,'property', $seed->id);
2024-06-24 18:49:43 +02:00
return 0;
2024-03-23 10:05:14 +01:00
}
}
2024-07-01 19:41:12 +02:00
/**
* Extract details from property.
* Scrapes for offer, price and calendar details from property and save the to extractions table (or exceptions when not found).
* @param $property Id of property (properties.property_platform_id)
**/
public static function scrapePropertyData($property){
2024-04-13 00:34:40 +02:00
2024-06-24 18:49:43 +02:00
$result = [];
2024-07-01 19:41:12 +02:00
/** scrape offer details such as name, ammeneties, etc. */
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$property->property_platform_id);
2024-04-13 00:34:40 +02:00
2024-07-01 19:41:12 +02:00
if($offer->successful()){
2024-04-13 00:34:40 +02:00
Extraction::create([
'property_id' => $property->id,
2024-04-13 00:34:40 +02:00
'type' => 'offer',
'body' => $offer->body(),
'header' => json_encode($offer->headers())
2024-07-01 19:41:12 +02:00
]);
2024-04-13 00:34:40 +02:00
}else{
self::saveHttpException($offer,'offer',$property->id);
2024-04-13 00:34:40 +02:00
}
2024-06-24 18:49:43 +02:00
$result['offer'] = $offer->body();
2024-07-01 19:41:12 +02:00
/** scrape for price details */
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$property->property_platform_id);
2024-04-13 00:34:40 +02:00
if($price->successful()){
Extraction::create([
'property_id' => $property->id,
2024-04-13 00:34:40 +02:00
'type' => 'price',
'body' => $price->body(),
'header' => json_encode($price->headers())
]);
}else{
self::saveHttpException($price,'price',$property->id);
2024-04-13 00:34:40 +02:00
}
2024-06-24 18:49:43 +02:00
$result['price'] = $price->body();
2024-07-01 19:41:12 +02:00
/** scrape for calendar details */
$calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$property->property_platform_id, [
2024-04-09 22:36:40 +02:00
'year' => date("Y"),
'month' => date("m")
]);
2024-04-13 00:34:40 +02:00
if($calendar->successful()){
2024-04-09 22:36:40 +02:00
2024-04-13 00:34:40 +02:00
Extraction::create([
'property_id' => $property->id,
2024-04-13 00:34:40 +02:00
'type' => 'calendar',
'body' => $calendar->body(),
'header' => json_encode($calendar->headers())
]);
2024-04-09 22:36:40 +02:00
2024-04-13 00:34:40 +02:00
}else{
2024-06-24 18:49:43 +02:00
self::saveHttpException($calendar,'calendar',$property->id);
2024-04-13 00:34:40 +02:00
}
2024-06-24 18:49:43 +02:00
$result['calendar'] = $calendar->body();
return json_encode($result);
2024-04-09 22:36:40 +02:00
2024-03-23 10:05:14 +01:00
}
2024-07-01 19:41:12 +02:00
/**
* Dispatch property jobs.
* Creates jobs for scraping new for properties
**/
public static function dispatchPropertyJobs()
{
$seeds = self::getAllSeeds();
foreach($seeds as $seed){
ScrapeProperty::dispatch($seed);
}
}
/**
* Dispatch property data jobs.
* Creates jobs for scraping new for property detail data.
**/
public static function dispatchPropertyDataJobs()
{
$properties = self::getAllProperties();
foreach($properties as $property){
ScrapePropertyData::dispatch($property);
}
}
2024-03-23 10:05:14 +01:00
2024-07-01 19:41:12 +02:00
}