Slightly less chaos.

main
Giò 2024-04-13 00:34:40 +02:00
parent 956ce5a95b
commit f4a724618e
11 changed files with 156 additions and 109 deletions

View File

@ -21,22 +21,23 @@ Table properties {
created_at timestamp created_at timestamp
} }
Table occupancies { Table extractions {
occupancy_id integer [primary key] extraction_id integer [primary key]
property_id integer [unique, ref: > properties.property_id] property_id integer [unique, ref: > properties.property_id]
occupancy json [not null] body text [not null]
header text [not null] header text [not null]
created_at timestamp [not null] created_at timestamp [not null]
} }
enum entities { enum types {
property property
occupancy calendar
offer
} }
Table exceptions { Table exceptions {
extraction_id integer [primary key] exception_id integer [primary key]
exception json [not null, note: "exception while scraping (e. g. HTTP error message) and called url."] exception json [not null, note: "exception while scraping (e. g. HTTP error message) and called url."]
entity entities [not null, note: "for which entity did the exception occur."] type types [not null, note: "for which entity did the exception occur."]
entity_id integer [not null, note: "either a property_id or occupancy_id"] property_id integer [not null, ref: > properties.property_id, note: "either a property_id"]
} }

View File

@ -3,12 +3,14 @@
namespace App\Jobs; namespace App\Jobs;
use App\Models\Seed; use App\Models\Seed;
use App\Models\Exception;
use Illuminate\Bus\Queueable; use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Contracts\Queue\ShouldBeUnique; use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels; use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Http;
class scrapeProperties implements ShouldQueue, ShouldBeUnique class scrapeProperties implements ShouldQueue, ShouldBeUnique
{ {
@ -29,14 +31,15 @@ class scrapeProperties implements ShouldQueue, ShouldBeUnique
public function handle(): void public function handle(): void
{ {
// $response = Http::get($seed->uri); // $response = Http::get($seed->uri);
dump($seed->uri);
$response = Http::get('https://diani.xyz/test.json'); $response = Http::get('https://diani.xyz/test.json');
$json = $response->json(); $json = $response->json();
foreach($json['offers'] as $offer){ foreach($json['offers'] as $offer){
// Guessed ID to identify property on scraped platform
$property = Property::firstWhere('property_platform_id', $offer['id']); $property = Property::firstWhere('property_platform_id', $offer['id']);
// check if geoLocation hast the same values as the last time at crawltime
if($property && $property->check_data === implode(',', $offer['geoLocation'])){ if($property && $property->check_data === implode(',', $offer['geoLocation'])){
$property->last_found = now(); $property->last_found = now();
$property->save(); $property->save();

View File

@ -8,5 +8,5 @@ class Exception extends Model
{ {
use HasFactory; use HasFactory;
protected $table = 'exceptions'; protected $table = 'exceptions';
protected $fillable = ['exception', 'entity_type', 'entity_id']; protected $fillable = ['exception', 'type', 'entity_id'];
} }

View File

@ -0,0 +1,13 @@
<?php
namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
class Extraction extends Model
{
use HasFactory;
protected $table = 'extractions';
protected $fillable = ['property_id', 'type', 'body', 'header'];
}

View File

@ -5,9 +5,9 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Model;
class Occupancy extends Model class Regions extends Model
{ {
use HasFactory; use HasFactory;
protected $table = 'occupancies'; protected $table = 'regions';
protected $fillable = ['property_id', 'occupancy', 'header']; protected $fillable = ['name'];
} }

View File

@ -1,36 +0,0 @@
<?php
namespace App\Scraper;
use App\Models\Seed;
class Basic{
public function parseJSON($json){
return json_decode($json, 1);
}
public function get($url, bool $isJSON = true){
$ch = curl_init();
$options = array(
CURLOPT_URL => $url,
CURLOPT_HEADER => true,
CURLOPT_FOLLOWLOCATION => true,
);
curl_setopt_array($ch, $options);
$request = curl_exec($ch);
curl_close($ch);
if($isJSON){
$request = $this->parseJSON($request);
}
return $request;
}
}

View File

@ -3,7 +3,7 @@ namespace App\Scraper;
use App\Models\Seed; use App\Models\Seed;
use App\Models\Property; use App\Models\Property;
use App\Models\Occupancy; use App\Models\Extraction;
use App\Models\Exception; use App\Models\Exception;
use App\Jobs\ScrapeProperty; use App\Jobs\ScrapeProperty;
use App\Jobs\ScrapePropertyData; use App\Jobs\ScrapePropertyData;
@ -11,9 +11,27 @@ use Illuminate\Support\Facades\Http;
class Edomizil{ class Edomizil{
public static function saveHttpException($response, $type, $entityId){
$exception = [];
$exception['status'] = $response->status();
$exception['headers'] = $response->headers();
$exception['body'] = $response->body();
$exceptionJSON = json_encode($exception);
Exception::create([
'exception' => $exceptionJSON,
'type' => $type,
'entity_id' => $entityId
]);
}
public static function getAllSeeds() public static function getAllSeeds()
{ {
// get all properties from model in random order. // get all seeds from model in random order.
return Seed::select('id','uri')->inRandomOrder()->get(); return Seed::select('id','uri')->inRandomOrder()->get();
} }
@ -27,7 +45,7 @@ class Edomizil{
{ {
$seeds = self::getAllSeeds(); $seeds = self::getAllSeeds();
foreach($seeds as $seed){ foreach($seeds as $seed){
ScrapeProperty::dispatch($seed->uri); ScrapeProperty::dispatch($seed);
} }
} }
@ -35,66 +53,101 @@ class Edomizil{
{ {
$properties = self::getAllProperties(); $properties = self::getAllProperties();
foreach($properties as $property){ foreach($properties as $property){
dump($property->property_platform_id); ScrapePropertyData::dispatch($property->property_platform_id);
// ScrapePropertyData::dispatch($property->property_platform_id);
} }
} }
public static function scrapeProperty($uri) public static function scrapeProperty($seed)
{ {
//$response = Http::get($seed->uri); $response = Http::get($seed->uri);
$response = Http::get('https://diani.xyz/test_2.json');
$json = $response->json();
foreach($json['offers'] as $offer){ if($response->successful()){
$property = Property::firstWhere('property_platform_id', $offer['id']); $json = $response->json();
$geoLocation = implode(',', $offer['geoLocation']);
if($property){ foreach($json['offers'] as $offer){
$property->last_found = now();
$property->save(); $property = Property::firstWhere('property_platform_id', $offer['id']);
if($property->check_data !== $geoLocation){ $geoLocation = implode(',', $offer['geoLocation']);
Exception::create([
'exception' => 'geoLocation was different: '.$geoLocation, if($property){
'entity_type' => 'property', $property->last_found = now();
'entity_id' => $offer['id'] $property->save();
// check if geoLocation is the same as last crawl
if($property->check_data !== $geoLocation){
Exception::create([
'exception' => 'geoLocation was different: '.$geoLocation,
'entity_type' => 'property',
'entity_id' => $offer['id']
]);
}
}else{
Property::create([
'property_platform_id' => $offer['id'],
'seed_id' => $seed->id,
'check_data' => $geoLocation,
'last_found' => now()
]); ]);
} }
}else{
Property::create([
'property_platform_id' => $offer['id'],
'seed_id' => $seed->id,
'check_data' => $geoLocation,
'last_found' => now()
]);
} }
}else{
self::saveHttpException($response,'property', $seed->id);
} }
} }
public static function scrapeOccupancy($propertyId){ public static function scrapePropertyData($propertyId){
/*
// scrape offer details such as name etc.
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId);
if($offer->successful()){
Extraction::create([
'property_id' => $propertyId,
'type' => 'offer',
'body' => $offer->body(),
'header' => json_encode($offer->headers())
]);
}else{
self::saveHttpException($offer,'offer',$propertyId);
}
// scrape price of property
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId);
if($price->successful()){
Extraction::create([
'property_id' => $propertyId,
'type' => 'price',
'body' => $price->body(),
'header' => json_encode($price->headers())
]);
}else{
self::saveHttpException($price,'price',$propertyId);
}
// scrape calendar which contains occupancies
$calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$propertyId, [ $calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$propertyId, [
'year' => date("Y"), 'year' => date("Y"),
'month' => date("m") 'month' => date("m")
]); ]);
$data_cal = $calendar->json();
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId); if($calendar->successful()){
$data_price = $price->json();
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId); Extraction::create([
$data_offer = $offer->json(); 'property_id' => $propertyId,
*/ 'type' => 'calendar',
/* 'body' => $calendar->body(),
$data = $response->json(); 'header' => json_encode($calendar->headers())
Occupancy::create([ ]);
'property_id' => $property->id,
'occupancy' => json_encode($data['content']['days']),
'header' => json_encode($response->headers())
]);
*/
}else{
self::saveHttpException($calendar,'price',$propertyId);
}
} }

View File

@ -11,11 +11,12 @@ return new class extends Migration
*/ */
public function up(): void public function up(): void
{ {
Schema::create('occupancies', function (Blueprint $table) { Schema::create('extractions', function (Blueprint $table) {
$table->id(); $table->id();
$table->foreignId('property_id')->constrained(); $table->foreignId('property_id')->constrained();
$table->json('occupancy'); $table->text('body');
$table->text('header'); $table->text('header');
$table->enum('type', ['property', 'calendar', 'offer', 'price']);
$table->timestamps(); $table->timestamps();
}); });
} }
@ -25,6 +26,6 @@ return new class extends Migration
*/ */
public function down(): void public function down(): void
{ {
Schema::dropIfExists('occupancies'); Schema::dropIfExists('extractions');
} }
}; };

View File

@ -13,8 +13,8 @@ return new class extends Migration
{ {
Schema::create('exceptions', function (Blueprint $table) { Schema::create('exceptions', function (Blueprint $table) {
$table->id(); $table->id();
$table->json('exception'); $table->text('exception');
$table->enum('entity_type', ['property', 'occupancy']); $table->enum('type', ['property', 'calendar', 'offer', 'price']);
$table->integer('entity_id'); $table->integer('entity_id');
$table->timestamps(); $table->timestamps();
}); });

View File

@ -2,9 +2,10 @@
namespace Database\Seeders; namespace Database\Seeders;
use App\Models\User; use App\Models\Regions;
// use Illuminate\Database\Console\Seeds\WithoutModelEvents; use App\Models\Seeds;
use Illuminate\Database\Seeder; use Illuminate\Database\Seeder;
use Illuminate\Support\Facades\DB;
class DatabaseSeeder extends Seeder class DatabaseSeeder extends Seeder
{ {
@ -15,9 +16,20 @@ class DatabaseSeeder extends Seeder
{ {
// User::factory(10)->create(); // User::factory(10)->create();
User::factory()->create([ DB::table('regions')->insert([
'name' => 'Test User', 'name' => 'Heidiland',
'email' => 'test@example.com', ]);
DB::table('seeds')->insert([
'uri' => 'https://www.e-domizil.ch/search/632d3fb65adbe?_format=json&adults=1&duration=7',
'region_id' => 1
]);
DB::table('properties')->insert([
'property_platform_id' => '12345',
'seed_id' => 1,
'check_data' => '12345',
'last_found' => '2024-01-01'
]); ]);
} }
} }

View File

@ -11,8 +11,8 @@ Route::get('/properties', function () {
Edomizil::dispatchPropertyJobs(); Edomizil::dispatchPropertyJobs();
}); });
Route::get('/occupancy', function () { Route::get('/propertydata', function () {
//$properties = Edomizil::getAllOccupancies(); Edomizil::scrapePropertyData(1);
Edomizil::dispatchPropertyDataJobs(); //Edomizil::dispatchPropertyDataJobs();
}); });