Slightly less chaos.

main
Giò 2024-04-13 00:34:40 +02:00
parent 956ce5a95b
commit f4a724618e
11 changed files with 156 additions and 109 deletions

View File

@ -21,22 +21,23 @@ Table properties {
created_at timestamp
}
Table occupancies {
occupancy_id integer [primary key]
Table extractions {
extraction_id integer [primary key]
property_id integer [unique, ref: > properties.property_id]
occupancy json [not null]
body text [not null]
header text [not null]
created_at timestamp [not null]
}
enum entities {
enum types {
property
occupancy
calendar
offer
}
Table exceptions {
extraction_id integer [primary key]
exception_id integer [primary key]
exception json [not null, note: "exception while scraping (e. g. HTTP error message) and called url."]
entity entities [not null, note: "for which entity did the exception occur."]
entity_id integer [not null, note: "either a property_id or occupancy_id"]
type types [not null, note: "for which entity did the exception occur."]
property_id integer [not null, ref: > properties.property_id, note: "either a property_id"]
}

View File

@ -3,12 +3,14 @@
namespace App\Jobs;
use App\Models\Seed;
use App\Models\Exception;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Http;
class scrapeProperties implements ShouldQueue, ShouldBeUnique
{
@ -29,14 +31,15 @@ class scrapeProperties implements ShouldQueue, ShouldBeUnique
public function handle(): void
{
// $response = Http::get($seed->uri);
dump($seed->uri);
$response = Http::get('https://diani.xyz/test.json');
$json = $response->json();
foreach($json['offers'] as $offer){
// Guessed ID to identify property on scraped platform
$property = Property::firstWhere('property_platform_id', $offer['id']);
// check if geoLocation hast the same values as the last time at crawltime
if($property && $property->check_data === implode(',', $offer['geoLocation'])){
$property->last_found = now();
$property->save();

View File

@ -8,5 +8,5 @@ class Exception extends Model
{
use HasFactory;
protected $table = 'exceptions';
protected $fillable = ['exception', 'entity_type', 'entity_id'];
protected $fillable = ['exception', 'type', 'entity_id'];
}

View File

@ -0,0 +1,13 @@
<?php
namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
class Extraction extends Model
{
use HasFactory;
protected $table = 'extractions';
protected $fillable = ['property_id', 'type', 'body', 'header'];
}

View File

@ -5,9 +5,9 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
class Occupancy extends Model
class Regions extends Model
{
use HasFactory;
protected $table = 'occupancies';
protected $fillable = ['property_id', 'occupancy', 'header'];
protected $table = 'regions';
protected $fillable = ['name'];
}

View File

@ -1,36 +0,0 @@
<?php
namespace App\Scraper;
use App\Models\Seed;
class Basic{
public function parseJSON($json){
return json_decode($json, 1);
}
public function get($url, bool $isJSON = true){
$ch = curl_init();
$options = array(
CURLOPT_URL => $url,
CURLOPT_HEADER => true,
CURLOPT_FOLLOWLOCATION => true,
);
curl_setopt_array($ch, $options);
$request = curl_exec($ch);
curl_close($ch);
if($isJSON){
$request = $this->parseJSON($request);
}
return $request;
}
}

View File

@ -3,7 +3,7 @@ namespace App\Scraper;
use App\Models\Seed;
use App\Models\Property;
use App\Models\Occupancy;
use App\Models\Extraction;
use App\Models\Exception;
use App\Jobs\ScrapeProperty;
use App\Jobs\ScrapePropertyData;
@ -11,9 +11,27 @@ use Illuminate\Support\Facades\Http;
class Edomizil{
public static function saveHttpException($response, $type, $entityId){
$exception = [];
$exception['status'] = $response->status();
$exception['headers'] = $response->headers();
$exception['body'] = $response->body();
$exceptionJSON = json_encode($exception);
Exception::create([
'exception' => $exceptionJSON,
'type' => $type,
'entity_id' => $entityId
]);
}
public static function getAllSeeds()
{
// get all properties from model in random order.
// get all seeds from model in random order.
return Seed::select('id','uri')->inRandomOrder()->get();
}
@ -27,7 +45,7 @@ class Edomizil{
{
$seeds = self::getAllSeeds();
foreach($seeds as $seed){
ScrapeProperty::dispatch($seed->uri);
ScrapeProperty::dispatch($seed);
}
}
@ -35,66 +53,101 @@ class Edomizil{
{
$properties = self::getAllProperties();
foreach($properties as $property){
dump($property->property_platform_id);
// ScrapePropertyData::dispatch($property->property_platform_id);
ScrapePropertyData::dispatch($property->property_platform_id);
}
}
public static function scrapeProperty($uri)
public static function scrapeProperty($seed)
{
//$response = Http::get($seed->uri);
$response = Http::get('https://diani.xyz/test_2.json');
$json = $response->json();
$response = Http::get($seed->uri);
foreach($json['offers'] as $offer){
if($response->successful()){
$property = Property::firstWhere('property_platform_id', $offer['id']);
$geoLocation = implode(',', $offer['geoLocation']);
$json = $response->json();
if($property){
$property->last_found = now();
$property->save();
if($property->check_data !== $geoLocation){
Exception::create([
'exception' => 'geoLocation was different: '.$geoLocation,
'entity_type' => 'property',
'entity_id' => $offer['id']
foreach($json['offers'] as $offer){
$property = Property::firstWhere('property_platform_id', $offer['id']);
$geoLocation = implode(',', $offer['geoLocation']);
if($property){
$property->last_found = now();
$property->save();
// check if geoLocation is the same as last crawl
if($property->check_data !== $geoLocation){
Exception::create([
'exception' => 'geoLocation was different: '.$geoLocation,
'entity_type' => 'property',
'entity_id' => $offer['id']
]);
}
}else{
Property::create([
'property_platform_id' => $offer['id'],
'seed_id' => $seed->id,
'check_data' => $geoLocation,
'last_found' => now()
]);
}
}else{
Property::create([
'property_platform_id' => $offer['id'],
'seed_id' => $seed->id,
'check_data' => $geoLocation,
'last_found' => now()
]);
}
}else{
self::saveHttpException($response,'property', $seed->id);
}
}
public static function scrapeOccupancy($propertyId){
/*
public static function scrapePropertyData($propertyId){
// scrape offer details such as name etc.
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId);
if($offer->successful()){
Extraction::create([
'property_id' => $propertyId,
'type' => 'offer',
'body' => $offer->body(),
'header' => json_encode($offer->headers())
]);
}else{
self::saveHttpException($offer,'offer',$propertyId);
}
// scrape price of property
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId);
if($price->successful()){
Extraction::create([
'property_id' => $propertyId,
'type' => 'price',
'body' => $price->body(),
'header' => json_encode($price->headers())
]);
}else{
self::saveHttpException($price,'price',$propertyId);
}
// scrape calendar which contains occupancies
$calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$propertyId, [
'year' => date("Y"),
'month' => date("m")
]);
$data_cal = $calendar->json();
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId);
$data_price = $price->json();
if($calendar->successful()){
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId);
$data_offer = $offer->json();
*/
/*
$data = $response->json();
Occupancy::create([
'property_id' => $property->id,
'occupancy' => json_encode($data['content']['days']),
'header' => json_encode($response->headers())
]);
*/
Extraction::create([
'property_id' => $propertyId,
'type' => 'calendar',
'body' => $calendar->body(),
'header' => json_encode($calendar->headers())
]);
}else{
self::saveHttpException($calendar,'price',$propertyId);
}
}

View File

@ -11,11 +11,12 @@ return new class extends Migration
*/
public function up(): void
{
Schema::create('occupancies', function (Blueprint $table) {
Schema::create('extractions', function (Blueprint $table) {
$table->id();
$table->foreignId('property_id')->constrained();
$table->json('occupancy');
$table->text('body');
$table->text('header');
$table->enum('type', ['property', 'calendar', 'offer', 'price']);
$table->timestamps();
});
}
@ -25,6 +26,6 @@ return new class extends Migration
*/
public function down(): void
{
Schema::dropIfExists('occupancies');
Schema::dropIfExists('extractions');
}
};

View File

@ -13,8 +13,8 @@ return new class extends Migration
{
Schema::create('exceptions', function (Blueprint $table) {
$table->id();
$table->json('exception');
$table->enum('entity_type', ['property', 'occupancy']);
$table->text('exception');
$table->enum('type', ['property', 'calendar', 'offer', 'price']);
$table->integer('entity_id');
$table->timestamps();
});

View File

@ -2,9 +2,10 @@
namespace Database\Seeders;
use App\Models\User;
// use Illuminate\Database\Console\Seeds\WithoutModelEvents;
use App\Models\Regions;
use App\Models\Seeds;
use Illuminate\Database\Seeder;
use Illuminate\Support\Facades\DB;
class DatabaseSeeder extends Seeder
{
@ -15,9 +16,20 @@ class DatabaseSeeder extends Seeder
{
// User::factory(10)->create();
User::factory()->create([
'name' => 'Test User',
'email' => 'test@example.com',
DB::table('regions')->insert([
'name' => 'Heidiland',
]);
DB::table('seeds')->insert([
'uri' => 'https://www.e-domizil.ch/search/632d3fb65adbe?_format=json&adults=1&duration=7',
'region_id' => 1
]);
DB::table('properties')->insert([
'property_platform_id' => '12345',
'seed_id' => 1,
'check_data' => '12345',
'last_found' => '2024-01-01'
]);
}
}

View File

@ -11,8 +11,8 @@ Route::get('/properties', function () {
Edomizil::dispatchPropertyJobs();
});
Route::get('/occupancy', function () {
//$properties = Edomizil::getAllOccupancies();
Edomizil::dispatchPropertyDataJobs();
Route::get('/propertydata', function () {
Edomizil::scrapePropertyData(1);
//Edomizil::dispatchPropertyDataJobs();
});