diff --git a/docs/schema.dbml b/docs/schema.dbml index 9efc2ad..8f2a6f3 100644 --- a/docs/schema.dbml +++ b/docs/schema.dbml @@ -21,22 +21,23 @@ Table properties { created_at timestamp } -Table occupancies { - occupancy_id integer [primary key] +Table extractions { + extraction_id integer [primary key] property_id integer [unique, ref: > properties.property_id] - occupancy json [not null] + body text [not null] header text [not null] created_at timestamp [not null] } -enum entities { +enum types { property - occupancy + calendar + offer } Table exceptions { - extraction_id integer [primary key] + exception_id integer [primary key] exception json [not null, note: "exception while scraping (e. g. HTTP error message) and called url."] - entity entities [not null, note: "for which entity did the exception occur."] - entity_id integer [not null, note: "either a property_id or occupancy_id"] + type types [not null, note: "for which entity did the exception occur."] + property_id integer [not null, ref: > properties.property_id, note: "either a property_id"] } diff --git a/scraper/app/Jobs/scrapeProperties.php b/scraper/app/Jobs/scrapeProperties.php index b227100..dc30f8e 100644 --- a/scraper/app/Jobs/scrapeProperties.php +++ b/scraper/app/Jobs/scrapeProperties.php @@ -3,12 +3,14 @@ namespace App\Jobs; use App\Models\Seed; +use App\Models\Exception; use Illuminate\Bus\Queueable; use Illuminate\Contracts\Queue\ShouldQueue; use Illuminate\Contracts\Queue\ShouldBeUnique; use Illuminate\Foundation\Bus\Dispatchable; use Illuminate\Queue\InteractsWithQueue; use Illuminate\Queue\SerializesModels; +use Illuminate\Support\Facades\Http; class scrapeProperties implements ShouldQueue, ShouldBeUnique { @@ -29,14 +31,15 @@ class scrapeProperties implements ShouldQueue, ShouldBeUnique public function handle(): void { // $response = Http::get($seed->uri); - dump($seed->uri); $response = Http::get('https://diani.xyz/test.json'); $json = $response->json(); foreach($json['offers'] as $offer){ + // Guessed ID to identify property on scraped platform $property = Property::firstWhere('property_platform_id', $offer['id']); - + + // check if geoLocation hast the same values as the last time at crawltime if($property && $property->check_data === implode(',', $offer['geoLocation'])){ $property->last_found = now(); $property->save(); diff --git a/scraper/app/Models/Exception.php b/scraper/app/Models/Exception.php index 752ed8f..980eae8 100644 --- a/scraper/app/Models/Exception.php +++ b/scraper/app/Models/Exception.php @@ -8,5 +8,5 @@ class Exception extends Model { use HasFactory; protected $table = 'exceptions'; - protected $fillable = ['exception', 'entity_type', 'entity_id']; + protected $fillable = ['exception', 'type', 'entity_id']; } diff --git a/scraper/app/Models/Extraction.php b/scraper/app/Models/Extraction.php new file mode 100644 index 0000000..86f4ff8 --- /dev/null +++ b/scraper/app/Models/Extraction.php @@ -0,0 +1,13 @@ + $url, - CURLOPT_HEADER => true, - CURLOPT_FOLLOWLOCATION => true, - ); - - curl_setopt_array($ch, $options); - - $request = curl_exec($ch); - - curl_close($ch); - - if($isJSON){ - $request = $this->parseJSON($request); - } - - return $request; - - } - -} \ No newline at end of file diff --git a/scraper/app/Scraper/Edomizil.php b/scraper/app/Scraper/Edomizil.php index 8b77638..f583abe 100644 --- a/scraper/app/Scraper/Edomizil.php +++ b/scraper/app/Scraper/Edomizil.php @@ -3,7 +3,7 @@ namespace App\Scraper; use App\Models\Seed; use App\Models\Property; -use App\Models\Occupancy; +use App\Models\Extraction; use App\Models\Exception; use App\Jobs\ScrapeProperty; use App\Jobs\ScrapePropertyData; @@ -11,9 +11,27 @@ use Illuminate\Support\Facades\Http; class Edomizil{ + public static function saveHttpException($response, $type, $entityId){ + + $exception = []; + + $exception['status'] = $response->status(); + $exception['headers'] = $response->headers(); + $exception['body'] = $response->body(); + + $exceptionJSON = json_encode($exception); + + Exception::create([ + 'exception' => $exceptionJSON, + 'type' => $type, + 'entity_id' => $entityId + ]); + + } + public static function getAllSeeds() { - // get all properties from model in random order. + // get all seeds from model in random order. return Seed::select('id','uri')->inRandomOrder()->get(); } @@ -27,7 +45,7 @@ class Edomizil{ { $seeds = self::getAllSeeds(); foreach($seeds as $seed){ - ScrapeProperty::dispatch($seed->uri); + ScrapeProperty::dispatch($seed); } } @@ -35,66 +53,101 @@ class Edomizil{ { $properties = self::getAllProperties(); foreach($properties as $property){ - dump($property->property_platform_id); - // ScrapePropertyData::dispatch($property->property_platform_id); + ScrapePropertyData::dispatch($property->property_platform_id); } } - public static function scrapeProperty($uri) + public static function scrapeProperty($seed) { - //$response = Http::get($seed->uri); - $response = Http::get('https://diani.xyz/test_2.json'); - $json = $response->json(); + $response = Http::get($seed->uri); + + if($response->successful()){ - foreach($json['offers'] as $offer){ - - $property = Property::firstWhere('property_platform_id', $offer['id']); - $geoLocation = implode(',', $offer['geoLocation']); + $json = $response->json(); - if($property){ - $property->last_found = now(); - $property->save(); - if($property->check_data !== $geoLocation){ - Exception::create([ - 'exception' => 'geoLocation was different: '.$geoLocation, - 'entity_type' => 'property', - 'entity_id' => $offer['id'] + foreach($json['offers'] as $offer){ + + $property = Property::firstWhere('property_platform_id', $offer['id']); + $geoLocation = implode(',', $offer['geoLocation']); + + if($property){ + $property->last_found = now(); + $property->save(); + + // check if geoLocation is the same as last crawl + if($property->check_data !== $geoLocation){ + Exception::create([ + 'exception' => 'geoLocation was different: '.$geoLocation, + 'entity_type' => 'property', + 'entity_id' => $offer['id'] + ]); + } + }else{ + Property::create([ + 'property_platform_id' => $offer['id'], + 'seed_id' => $seed->id, + 'check_data' => $geoLocation, + 'last_found' => now() ]); } - }else{ - Property::create([ - 'property_platform_id' => $offer['id'], - 'seed_id' => $seed->id, - 'check_data' => $geoLocation, - 'last_found' => now() - ]); } + }else{ + self::saveHttpException($response,'property', $seed->id); } } - public static function scrapeOccupancy($propertyId){ - /* + public static function scrapePropertyData($propertyId){ + + // scrape offer details such as name etc. + $offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId); + + if($offer->successful()){ + + Extraction::create([ + 'property_id' => $propertyId, + 'type' => 'offer', + 'body' => $offer->body(), + 'header' => json_encode($offer->headers()) + ]); + + }else{ + self::saveHttpException($offer,'offer',$propertyId); + } + + // scrape price of property + $price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId); + + if($price->successful()){ + + Extraction::create([ + 'property_id' => $propertyId, + 'type' => 'price', + 'body' => $price->body(), + 'header' => json_encode($price->headers()) + ]); + + }else{ + self::saveHttpException($price,'price',$propertyId); + } + + // scrape calendar which contains occupancies $calendar = Http::get('https://www.e-domizil.ch/api/v2/calendar/'.$propertyId, [ 'year' => date("Y"), 'month' => date("m") ]); - $data_cal = $calendar->json(); - - $price = Http::get('https://www.e-domizil.ch/booking/checkout/priceDetails/'.$propertyId); - $data_price = $price->json(); - - $offer = Http::get('https://www.e-domizil.ch/rental/offer/'.$propertyId); - $data_offer = $offer->json(); -*/ - /* - $data = $response->json(); - Occupancy::create([ - 'property_id' => $property->id, - 'occupancy' => json_encode($data['content']['days']), - 'header' => json_encode($response->headers()) - ]); - */ + if($calendar->successful()){ + + Extraction::create([ + 'property_id' => $propertyId, + 'type' => 'calendar', + 'body' => $calendar->body(), + 'header' => json_encode($calendar->headers()) + ]); + + }else{ + self::saveHttpException($calendar,'price',$propertyId); + } } diff --git a/scraper/database/migrations/2024_03_15_142550_create_occupancies_table.php b/scraper/database/migrations/2024_03_15_142550_create_extractions_table.php similarity index 70% rename from scraper/database/migrations/2024_03_15_142550_create_occupancies_table.php rename to scraper/database/migrations/2024_03_15_142550_create_extractions_table.php index a34f910..488f2e2 100644 --- a/scraper/database/migrations/2024_03_15_142550_create_occupancies_table.php +++ b/scraper/database/migrations/2024_03_15_142550_create_extractions_table.php @@ -11,11 +11,12 @@ return new class extends Migration */ public function up(): void { - Schema::create('occupancies', function (Blueprint $table) { + Schema::create('extractions', function (Blueprint $table) { $table->id(); $table->foreignId('property_id')->constrained(); - $table->json('occupancy'); + $table->text('body'); $table->text('header'); + $table->enum('type', ['property', 'calendar', 'offer', 'price']); $table->timestamps(); }); } @@ -25,6 +26,6 @@ return new class extends Migration */ public function down(): void { - Schema::dropIfExists('occupancies'); + Schema::dropIfExists('extractions'); } }; diff --git a/scraper/database/migrations/2024_03_15_142625_create_exceptions_table.php b/scraper/database/migrations/2024_03_15_142625_create_exceptions_table.php index c40b228..b5d7f5f 100644 --- a/scraper/database/migrations/2024_03_15_142625_create_exceptions_table.php +++ b/scraper/database/migrations/2024_03_15_142625_create_exceptions_table.php @@ -13,8 +13,8 @@ return new class extends Migration { Schema::create('exceptions', function (Blueprint $table) { $table->id(); - $table->json('exception'); - $table->enum('entity_type', ['property', 'occupancy']); + $table->text('exception'); + $table->enum('type', ['property', 'calendar', 'offer', 'price']); $table->integer('entity_id'); $table->timestamps(); }); diff --git a/scraper/database/seeders/DatabaseSeeder.php b/scraper/database/seeders/DatabaseSeeder.php index d01a0ef..1210333 100644 --- a/scraper/database/seeders/DatabaseSeeder.php +++ b/scraper/database/seeders/DatabaseSeeder.php @@ -2,9 +2,10 @@ namespace Database\Seeders; -use App\Models\User; -// use Illuminate\Database\Console\Seeds\WithoutModelEvents; +use App\Models\Regions; +use App\Models\Seeds; use Illuminate\Database\Seeder; +use Illuminate\Support\Facades\DB; class DatabaseSeeder extends Seeder { @@ -15,9 +16,20 @@ class DatabaseSeeder extends Seeder { // User::factory(10)->create(); - User::factory()->create([ - 'name' => 'Test User', - 'email' => 'test@example.com', + DB::table('regions')->insert([ + 'name' => 'Heidiland', + ]); + + DB::table('seeds')->insert([ + 'uri' => 'https://www.e-domizil.ch/search/632d3fb65adbe?_format=json&adults=1&duration=7', + 'region_id' => 1 + ]); + + DB::table('properties')->insert([ + 'property_platform_id' => '12345', + 'seed_id' => 1, + 'check_data' => '12345', + 'last_found' => '2024-01-01' ]); } } diff --git a/scraper/routes/web.php b/scraper/routes/web.php index 3b206c2..5e143da 100644 --- a/scraper/routes/web.php +++ b/scraper/routes/web.php @@ -11,8 +11,8 @@ Route::get('/properties', function () { Edomizil::dispatchPropertyJobs(); }); -Route::get('/occupancy', function () { - //$properties = Edomizil::getAllOccupancies(); - Edomizil::dispatchPropertyDataJobs(); +Route::get('/propertydata', function () { + Edomizil::scrapePropertyData(1); + //Edomizil::dispatchPropertyDataJobs(); });