Slightly less chaos.
parent
956ce5a95b
commit
f4a724618e
|
@ -21,22 +21,23 @@ Table properties {
|
|||
created_at timestamp
|
||||
}
|
||||
|
||||
Table occupancies {
|
||||
occupancy_id integer [primary key]
|
||||
Table extractions {
|
||||
extraction_id integer [primary key]
|
||||
property_id integer [unique, ref: > properties.property_id]
|
||||
occupancy json [not null]
|
||||
body text [not null]
|
||||
header text [not null]
|
||||
created_at timestamp [not null]
|
||||
}
|
||||
|
||||
enum entities {
|
||||
enum types {
|
||||
property
|
||||
occupancy
|
||||
calendar
|
||||
offer
|
||||
}
|
||||
|
||||
Table exceptions {
|
||||
extraction_id integer [primary key]
|
||||
exception_id integer [primary key]
|
||||
exception json [not null, note: "exception while scraping (e. g. HTTP error message) and called url."]
|
||||
entity entities [not null, note: "for which entity did the exception occur."]
|
||||
entity_id integer [not null, note: "either a property_id or occupancy_id"]
|
||||
type types [not null, note: "for which entity did the exception occur."]
|
||||
property_id integer [not null, ref: > properties.property_id, note: "either a property_id"]
|
||||
}
|
||||
|
|
|
@ -3,12 +3,14 @@
|
|||
namespace App\Jobs;
|
||||
|
||||
use App\Models\Seed;
|
||||
use App\Models\Exception;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Contracts\Queue\ShouldBeUnique;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
|
||||
class scrapeProperties implements ShouldQueue, ShouldBeUnique
|
||||
{
|
||||
|
@ -29,14 +31,15 @@ class scrapeProperties implements ShouldQueue, ShouldBeUnique
|
|||
public function handle(): void
|
||||
{
|
||||
// $response = Http::get($seed->uri);
|
||||
dump($seed->uri);
|
||||
$response = Http::get('https://diani.xyz/test.json');
|
||||
$json = $response->json();
|
||||
|
||||
foreach($json['offers'] as $offer){
|
||||
|
||||
// Guessed ID to identify property on scraped platform
|
||||
$property = Property::firstWhere('property_platform_id', $offer['id']);
|
||||
|
||||
|
||||
// check if geoLocation hast the same values as the last time at crawltime
|
||||
if($property && $property->check_data === implode(',', $offer['geoLocation'])){
|
||||
$property->last_found = now();
|
||||
$property->save();
|
||||
|
|
|
@ -8,5 +8,5 @@ class Exception extends Model
|
|||
{
|
||||
use HasFactory;
|
||||
protected $table = 'exceptions';
|
||||
protected $fillable = ['exception', 'entity_type', 'entity_id'];
|
||||
protected $fillable = ['exception', 'type', 'entity_id'];
|
||||
}
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
class Extraction extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
protected $table = 'extractions';
|
||||
protected $fillable = ['property_id', 'type', 'body', 'header'];
|
||||
}
|
|
@ -5,9 +5,9 @@ namespace App\Models;
|
|||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
class Occupancy extends Model
|
||||
class Regions extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
protected $table = 'occupancies';
|
||||
protected $fillable = ['property_id', 'occupancy', 'header'];
|
||||
protected $table = 'regions';
|
||||
protected $fillable = ['name'];
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
<?php
|
||||
|
||||
namespace App\Scraper;
|
||||
use App\Models\Seed;
|
||||
|
||||
class Basic{
|
||||
|
||||
public function parseJSON($json){
|
||||
return json_decode($json, 1);
|
||||
}
|
||||
|
||||
public function get($url, bool $isJSON = true){
|
||||
|
||||
$ch = curl_init();
|
||||
|
||||
$options = array(
|
||||
CURLOPT_URL => $url,
|
||||
CURLOPT_HEADER => true,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
);
|
||||
|
||||
curl_setopt_array($ch, $options);
|
||||
|
||||
$request = curl_exec($ch);
|
||||
|
||||
curl_close($ch);
|
||||
|
||||
if($isJSON){
|
||||
$request = $this->parseJSON($request);
|
||||
}
|
||||
|
||||
return $request;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -3,7 +3,7 @@ namespace App\Scraper;
|
|||
|
||||
use App\Models\Seed;
|
||||
use App\Models\Property;
|
||||
use App\Models\Occupancy;
|
||||
use App\Models\Extraction;
|
||||
use App\Models\Exception;
|
||||
use App\Jobs\ScrapeProperty;
|
||||
use App\Jobs\ScrapePropertyData;
|
||||
|
@ -11,9 +11,27 @@ use Illuminate\Support\Facades\Http;
|
|||
|
||||
class Edomizil{
|
||||
|
||||
public static function saveHttpException($response, $type, $entityId){
|
||||
|
||||
$exception = [];
|
||||
|
||||
$exception['status'] = $response->status();
|
||||
$exception['headers'] = $response->headers();
|
||||
$exception['body'] = $response->body();
|
||||
|
||||
$exceptionJSON = json_encode($exception);
|
||||
|
||||
Exception::create([
|
||||
'exception' => $exceptionJSON,
|
||||
'type' => $type,
|
||||
'entity_id' => $entityId
|
||||
]);
|
||||
|
||||
}
|
||||
|
||||
public static function getAllSeeds()
|
||||
{
|
||||
// get all properties from model in random order.
|
||||
// get all seeds from model in random order.
|
||||
return Seed::select('id','uri')->inRandomOrder()->get();
|
||||
}
|
||||
|
||||
|
@ -27,7 +45,7 @@ class Edomizil{
|
|||
{
|
||||
$seeds = self::getAllSeeds();
|
||||
foreach($seeds as $seed){
|
||||
ScrapeProperty::dispatch($seed->uri);
|
||||
ScrapeProperty::dispatch($seed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,66 +53,101 @@ class Edomizil{
|
|||
{
|
||||
$properties = self::getAllProperties();
|
||||
foreach($properties as $property){
|
||||
dump($property->property_platform_id);
|
||||
// ScrapePropertyData::dispatch($property->property_platform_id);
|
||||
ScrapePropertyData::dispatch($property->property_platform_id);
|
||||
}
|
||||
}
|
||||
|
||||
public static function scrapeProperty($uri)
|
||||
public static function scrapeProperty($seed)
|
||||
{
|
||||
//$response = Http::get($seed->uri);
|
||||
$response = Http::get('https://diani.xyz/test_2.json');
|
||||
$json = $response->json();
|
||||
$response = Http::get($seed->uri);
|
||||
|
||||
if($response->successful()){
|
||||
|
||||
foreach($json['offers'] as $offer){
|
||||
|
||||
$property = Property::firstWhere('property_platform_id', $offer['id']);
|
||||
$geoLocation = implode(',', $offer['geoLocation']);
|
||||
$json = $response->json();
|
||||
|
||||
if($property){
|
||||
$property->last_found = now();
|
||||
$property->save();
|
||||
if($property->check_data !== $geoLocation){
|
||||
Exception::create([
|
||||
'exception' => 'geoLocation was different: '.$geoLocation,
|
||||
'entity_type' => 'property',
|
||||
'entity_id' => $offer['id']
|
||||
foreach($json['offers'] as $offer){
|
||||
|
||||
$property = Property::firstWhere('property_platform_id', $offer['id']);
|
||||
$geoLocation = implode(',', $offer['geoLocation']);
|
||||
|
||||
if($property){
|
||||
$property->last_found = now();
|
||||
$property->save();
|
||||
|
||||
// check if geoLocation is the same as last crawl
|
||||
if($property->check_data !== $geoLocation){
|
||||
Exception::create([
|
||||
'exception' => 'geoLocation was different: '.$geoLocation,
|
||||
'entity_type' => 'property',
|
||||
'entity_id' => $offer['id']
|
||||
]);
|
||||
}
|
||||
}else{
|
||||
Property::create([
|
||||
'property_platform_id' => $offer['id'],
|
||||
'seed_id' => $seed->id,
|
||||
'check_data' => $geoLocation,
|
||||
'last_found' => now()
|
||||
]);
|
||||
}
|
||||
}else{
|
||||
Property::create([
|
||||
'property_platform_id' => $offer['id'],
|
||||
'seed_id' => $seed->id,
|
||||
'check_data' => $geoLocation,
|
||||
'last_found' => now()
|
||||
]);
|
||||
}
|
||||
}else{
|
||||
self::saveHttpException($response,'property', $seed->id);
|
||||
}
|
||||
}
|
||||
|
||||
public static function scrapeOccupancy($propertyId){
|
||||
/*
|
||||
public static function scrapePropertyData($propertyId){
|
||||
|
||||
// scrape offer details such as name etc.
|
||||
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId);
|
||||
|
||||
if($offer->successful()){
|
||||
|
||||
Extraction::create([
|
||||
'property_id' => $propertyId,
|
||||
'type' => 'offer',
|
||||
'body' => $offer->body(),
|
||||
'header' => json_encode($offer->headers())
|
||||
]);
|
||||
|
||||
}else{
|
||||
self::saveHttpException($offer,'offer',$propertyId);
|
||||
}
|
||||
|
||||
// scrape price of property
|
||||
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId);
|
||||
|
||||
if($price->successful()){
|
||||
|
||||
Extraction::create([
|
||||
'property_id' => $propertyId,
|
||||
'type' => 'price',
|
||||
'body' => $price->body(),
|
||||
'header' => json_encode($price->headers())
|
||||
]);
|
||||
|
||||
}else{
|
||||
self::saveHttpException($price,'price',$propertyId);
|
||||
}
|
||||
|
||||
// scrape calendar which contains occupancies
|
||||
$calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$propertyId, [
|
||||
'year' => date("Y"),
|
||||
'month' => date("m")
|
||||
]);
|
||||
$data_cal = $calendar->json();
|
||||
|
||||
$price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId);
|
||||
$data_price = $price->json();
|
||||
|
||||
$offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId);
|
||||
$data_offer = $offer->json();
|
||||
*/
|
||||
/*
|
||||
$data = $response->json();
|
||||
Occupancy::create([
|
||||
'property_id' => $property->id,
|
||||
'occupancy' => json_encode($data['content']['days']),
|
||||
'header' => json_encode($response->headers())
|
||||
]);
|
||||
*/
|
||||
|
||||
if($calendar->successful()){
|
||||
|
||||
Extraction::create([
|
||||
'property_id' => $propertyId,
|
||||
'type' => 'calendar',
|
||||
'body' => $calendar->body(),
|
||||
'header' => json_encode($calendar->headers())
|
||||
]);
|
||||
|
||||
}else{
|
||||
self::saveHttpException($calendar,'price',$propertyId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -11,11 +11,12 @@ return new class extends Migration
|
|||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('occupancies', function (Blueprint $table) {
|
||||
Schema::create('extractions', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->foreignId('property_id')->constrained();
|
||||
$table->json('occupancy');
|
||||
$table->text('body');
|
||||
$table->text('header');
|
||||
$table->enum('type', ['property', 'calendar', 'offer', 'price']);
|
||||
$table->timestamps();
|
||||
});
|
||||
}
|
||||
|
@ -25,6 +26,6 @@ return new class extends Migration
|
|||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('occupancies');
|
||||
Schema::dropIfExists('extractions');
|
||||
}
|
||||
};
|
|
@ -13,8 +13,8 @@ return new class extends Migration
|
|||
{
|
||||
Schema::create('exceptions', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->json('exception');
|
||||
$table->enum('entity_type', ['property', 'occupancy']);
|
||||
$table->text('exception');
|
||||
$table->enum('type', ['property', 'calendar', 'offer', 'price']);
|
||||
$table->integer('entity_id');
|
||||
$table->timestamps();
|
||||
});
|
||||
|
|
|
@ -2,9 +2,10 @@
|
|||
|
||||
namespace Database\Seeders;
|
||||
|
||||
use App\Models\User;
|
||||
// use Illuminate\Database\Console\Seeds\WithoutModelEvents;
|
||||
use App\Models\Regions;
|
||||
use App\Models\Seeds;
|
||||
use Illuminate\Database\Seeder;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class DatabaseSeeder extends Seeder
|
||||
{
|
||||
|
@ -15,9 +16,20 @@ class DatabaseSeeder extends Seeder
|
|||
{
|
||||
// User::factory(10)->create();
|
||||
|
||||
User::factory()->create([
|
||||
'name' => 'Test User',
|
||||
'email' => 'test@example.com',
|
||||
DB::table('regions')->insert([
|
||||
'name' => 'Heidiland',
|
||||
]);
|
||||
|
||||
DB::table('seeds')->insert([
|
||||
'uri' => 'https://www.e-domizil.ch/search/632d3fb65adbe?_format=json&adults=1&duration=7',
|
||||
'region_id' => 1
|
||||
]);
|
||||
|
||||
DB::table('properties')->insert([
|
||||
'property_platform_id' => '12345',
|
||||
'seed_id' => 1,
|
||||
'check_data' => '12345',
|
||||
'last_found' => '2024-01-01'
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ Route::get('/properties', function () {
|
|||
Edomizil::dispatchPropertyJobs();
|
||||
});
|
||||
|
||||
Route::get('/occupancy', function () {
|
||||
//$properties = Edomizil::getAllOccupancies();
|
||||
Edomizil::dispatchPropertyDataJobs();
|
||||
Route::get('/propertydata', function () {
|
||||
Edomizil::scrapePropertyData(1);
|
||||
//Edomizil::dispatchPropertyDataJobs();
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in New Issue