-
Notifications
You must be signed in to change notification settings - Fork 0
/
project.sql
385 lines (242 loc) · 14.8 KB
/
project.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
create database project;
use project;
select * from dallas_availability;
select * from dallas_hosts;
select * from dallas_listings;
select * from dallas_availability where price != adjusted_price;
select h.host_id,h.host_name, avg(l.review_scores_rating) rating , count(l.id) count_
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id group by h.host_id, h.host_name order by count_ desc;
----. Analyze different metrics to draw the distinction between the different types of property along with their
------price listings(bucketize them within 3-4 categories basis your understanding):
-----To achieve this, you can use the following metrics and explore a few yourself as well.
-----Availability within 15,30,45,etc. days, Acceptance Rate, Average no of bookings, reviews, etc.
select * from dallas_listings;
select property_type, avg(price)/ avg(accommodates) price_per_accomodate
from dallas_listings group by property_type order by price_per_accomodate desc;
with cte1 as (
select listing_id, count(date) available_days
from dallas_availability where available like 'true' group by listing_id )
select available_days, count(listing_id) num_of_listings from cte1 where available_days>=300
group by available_days order by available_days desc;
select month(date) month_num ,datename(month, date) month_of_availability, count(listing_id) count_of_listings_available
from dallas_availability where available like 'true' group by datename(month, date), month(date) order by month_num
select
case when instant_bookable= 0 then 'not_instant_bookable'
else 'instant_bookable' end as booking_type,
count(id) num_of_listings
from dallas_listings
with cte1 as (
select case when instant_bookable= 0 then 'not_instant_bookable'
else 'instant_bookable' end as booking_type, id from dallas_listings)
select booking_type, count(id) num_of_listings from cte1 group by booking_type order by count(id) desc;
select datename(weekday, date) days_of_the_week , count(id) num_of_available_listings
from dallas_availability where available = 'true'
group by datename(weekday, date) order by num_of_available_listings desc;
select * from
(select datename(weekday, date) days_of_the_week , id, year(date) year_
from dallas_availability where available = 'true')a
pivot( count(id) for year_ in ([2022],[2023])) as years_
select distinct(year(date)) from dallas_availability
select * from dallas_listings;
select * from dallas_hosts;
select * from dallas_review;
select * from dallas_availability;
select h.host_id, count(l.id) number_of_listings , h.host_response_time, h.host_response_rate
from dallas_listings l join dallas_hosts h on l.host_id=h.host_id
group by h.host_id, h.host_response_time, h.host_response_rate order by number_of_listings desc;
select * from dallas_availability where listing_id = 48563007 order by date;
select
from dallas_hosts h join dallas_review r
select r.listing_id , r.comments from dallas_hosts h join dallas_listings l
on
r where comments like
select host_id from dallas_hosts where host_is_superhost=1;
select h.host_id,h.host_name, avg(l.review_scores_rating) rating , count(l.id) count_
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 1
group by h.host_id, h.host_name order by count_ desc;
select h.host_id,h.host_name, avg(l.review_scores_rating) rating , count(l.id) count_
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 0
group by h.host_id, h.host_name order by count_ desc;
select * from (select h.host_id,h.host_name, avg(l.review_scores_rating) rating , count(l.id) count_
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 0
group by h.host_id, h.host_name)a where rating = 5 order by rating desc;
select l.neighbourhood_cleansed, count(h.host_id) count_of_superhost
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 1
group by l.neighbourhood_cleansed order by count_of_superhost desc;
select l.neighbourhood_cleansed, count(h.host_id) count_of_non_superhost
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 0
group by l.neighbourhood_cleansed order by count_of_non_superhost desc;
with cte1 as (
select l.neighbourhood_cleansed, count(h.host_id) count_of_superhost
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 1
group by l.neighbourhood_cleansed ),
cte2 as(
select l.neighbourhood_cleansed, count(h.host_id) count_of_non_superhost
from dallas_hosts h inner join dallas_listings l on h.host_id = l.host_id where h.host_is_superhost = 0
group by l.neighbourhood_cleansed )
select cte1.*, cte2.count_of_non_superhost
from cte1 inner join cte2 on cte1.neighbourhood_cleansed = cte2.neighbourhood_cleansed order by cte1.count_of_superhost desc;
select * from dallas_listings;
select * from dallas_hosts;
select * from dallas_review;
select * from dallas_availability;
with cte1 as(
select l.property_type , count(r.comments) number_of_good_comments
from dallas_listings l inner join dallas_review r on l.id = r.listing_id
where r.comments like '%good%' or r.comments like '%great%' or r.comments like '%awesome%' or r.comments like '%love%' or
r.comments = '%easy%'or r.comments = '%wonderful%' or r.comments = '%comfortable%' or r.comments = '%recommended%'
group by l.property_type ),
cte2 as (
select l.property_type , count(r.comments) number_of_bad_comments
from dallas_listings l inner join dallas_review r on l.id = r.listing_id
where r.comments like '%not%' or r.comments like '%bad%' or r.comments like '%worst%' or r.comments like '%uncomfortable%' or
r.comments = '%issue%'
group by l.property_type ),
cte3 as (
select cte1.*, cte2.number_of_bad_comments, number_of_bad_comments+ number_of_good_comments total_number_of_comments
from cte1 inner join cte2 on cte1.property_type = cte2.property_type)
select property_type, number_of_good_comments*100/total_number_of_comments Percent_of_good_comments,
number_of_bad_comments*100/total_number_of_comments percent_of_bad_comments
from cte3 order by Percent_of_good_comments desc;
select 55/11
with cte1 as(
select l.property_type , count(r.comments) number_of_good_comments
from dallas_listings l inner join dallas_review r on l.id = r.listing_id
where r.comments like '%good%' or r.comments like '%great%' or r.comments like '%awesome%' or r.comments like '%love%' or
r.comments = '%easy%'or r.comments = '%wonderful%' or r.comments = '%comfortable%' or r.comments = '%recommended%'
group by l.property_type order by number_of_good_comments)
cte2 as (
select l.property_type , count(r.comments) number_of_bad_comments
from dallas_listings l inner join dallas_review r on l.id = r.listing_id
where r.comments like '%not%' or r.comments like '%bad%' or r.comments like '%worst%' or r.comments like '%uncomfortable%' or
r.comments = '%issue%'
group by l.property_type order by number_of_bad_comments desc)
select cte1.*, cte2.number_of_bad_comments
from cte1 inner join cte2 on cte1.property_type = cte2.property_type;
select * from dallas_hosts;
with cte1 as(
select host_neighbourhood , count(host_id) number_of_nonsuperhosts
from dallas_hosts where host_is_superhost = 0 group by host_neighbourhood order by number_of_nonsuperhosts desc
select host_neighbourhood , count(host_id) number_of_superhosts
from dallas_hosts where host_is_superhost = 1 group by host_neighbourhood order by number_of_superhosts desc
select * from dallas_listings;
select * from dallas_hosts;
select * from dallas_review;
select * from dallas_availability;
with cte1 as (
select month(date) month_num ,datename(month, date) month_of_availability, count(listing_id) count_of_listings_available
from dallas_availability where available like 'true' group by datename(month, date), month(date) order by month_num)
select property_type, count(a.id) over (partition by property_type) number_of_available_listings, month(a.date) month_
from dallas_listings l inner join dallas_availability a on l.id = a.listing_id where a.available like 'true'
group by property_type, month(a.date) order by month(a.date)
select * from(
select month(a.date) as months_, l.property_type, datename(month,a.date) month_,count(a.id) as available_listing,
DENSE_RANK() over(partition by l.property_type order by count(a.id) desc) rank_
from dallas_listings l left join dallas_availability a on l.id = a.listing_id
where a.available like 'true'
group by month(a.date), l.property_type, datename(month,a.date)
) a
where rank_ = 1
order by months_;
select * from(
select month(a.date) as months_, l.property_type, datename(month,a.date) month_,count(a.id) as available_listing,
DENSE_RANK() over(partition by l.property_type order by count(a.id) desc) rank_
from listing_austin l left join df_austin_availability a on l.id = a.listing_id
where a.available like 'true'
group by month(a.date), l.property_type, datename(month,a.date)
) a
where rank_ = 1
order by months_;
select top 10 h.host_id,h.host_name, avg(l.review_scores_rating) rating , count(l.id) count_
from host_austin h inner join listing_austin l on h.host_id = l.host_id group by h.host_id, h.host_name order by count_ desc;
select property_type, avg(price)/ avg(accommodates) price_per_accomodate
from listing_austin group by property_type order by price_per_accomodate desc;
select property_type, avg(price) Average_price, avg(price)/avg(accommodates) Price_per_accommodate
from listingD
group by property_type;
select month(date) month_num ,datename(month, date) month_of_availability, count(listing_id) count_of_listings_available
from df_austin_availability where available like 'true' group by datename(month, date), month(date) order by month_num
with cte1 as (select case when instant_bookable= 0 then 'not_instant_bookable'
else 'instant_bookable' end as booking_type, id from listing_austin)
select booking_type, count(id) num_of_listings from cte1 group by booking_type order by count(id) desc;
select datename(weekday, date) days_of_the_week , count(id) num_of_available_listings
from df_austin_availability where available = 'true'
group by datename(weekday, date) order by num_of_available_listings desc;
select * from listing_austin;
select * from aus
select property_type, avg(price) as average_price
from listing_austin
where instant_bookable like 'true'
group by property_type
having avg(review_scores_rating) >= 4.5
and avg(review_scores_accuracy) >= 4.5
and avg(review_scores_checkin) >= 4.5
and avg(review_scores_cleanliness) >= 4.5
and avg(review_scores_accuracy) >= 4.5
and avg(review_scores_communication) >= 4.5
and avg(review_scores_location) >= 4.5
and avg(review_scores_value) >= 4.5
order by average_price;
select * from listing_austin;
with cte1 as(
select l.property_type , count(r.comments) number_of_good_comments
from listing_austin l inner join review_ausin r on l.id = r.listing_id
where r.comments like '%good%' or r.comments like '%great%' or r.comments like '%awesome%' or
r.comments like '%love%' or
r.comments = '%easy%'or r.comments = '%wonderful%' or r.comments = '%comfortable%' or r.comments = '%recommended%'
group by l.property_type),
cte2 as (
select l.property_type , count(r.comments) number_of_bad_comments
from listing_austin l inner join review_austin r on l.id = r.listing_id
where r.comments like '%not%' or r.comments like '%bad%' or
r.comments like '%worst%' or r.comments like '%uncomfortable%' or r.comments = '%issue%'
group by l.property_type),
cte3 as (
select cte1.*, cte2.number_of_bad_comments, number_of_bad_comments+ number_of_good_comments total_number_of_comments
from cte1 inner join cte2 on cte1.property_type = cte2.property_type)
select property_type, number_of_good_comments*100/total_number_of_comments Percent_of_good_comments,
number_of_bad_comments*100/total_number_of_comments percent_of_bad_comments
from cte3 order by Percent_of_good_comments desc;
with cte1 as(
select l.property_type , count(r.comments) number_of_good_comments
from listing_austin l inner join review_austin r on l.id = r.listing_id
where r.comments like '%good%' or r.comments like '%great%' or r.comments like '%awesome%' or
r.comments like '%love%' or
r.comments = '%easy%'or r.comments = '%wonderful%' or r.comments = '%comfortable%' or r.comments = '%recommended%'
group by l.property_type),
cte2 as (
select l.property_type , count(r.comments) number_of_bad_comments
from listing_austin l inner join review_austin r on l.id = r.listing_id
where r.comments like '%not%' or r.comments like '%bad%' or
r.comments like '%worst%' or r.comments like '%uncomfortable%' or r.comments = '%issue%'
group by l.property_type),
cte3 as (
select cte1.*, cte2.number_of_bad_comments, number_of_bad_comments+ number_of_good_comments total_number_of_comments
from cte1 inner join cte2 on cte1.property_type = cte2.property_type)
select property_type, count(id) instant_bookables
from listing_austin
where instant_bookable = 'True'
group by property_type
order by instant_bookables desc;
select * from review_austin;
select * from host_austin;
select * from df_austin_availability;
with cte1 as(
select l.property_type , count(r.comments) number_of_good_comments
from listing_austin l inner join review_austin r on l.id = r.listing_id
where r.comments like '%good%' or r.comments like '%great%' or r.comments like '%awesome%' or r.comments like '%love%' or
r.comments = '%easy%'or r.comments = '%wonderful%' or r.comments = '%comfortable%' or r.comments = '%recommended%'
group by l.property_type ),
cte2 as (
select l.property_type , count(r.comments) number_of_bad_comments
from listing_austin l inner join review_austin r on l.id = r.listing_id
where r.comments like '%not%' or r.comments like '%bad%' or r.comments like '%worst%' or r.comments like '%uncomfortable%' or
r.comments = '%issue%'
group by l.property_type ),
cte3 as (
select cte1.*, cte2.number_of_bad_comments, number_of_bad_comments+ number_of_good_comments total_number_of_comments
from cte1 inner join cte2 on cte1.property_type = cte2.property_type)
Select l.property_type,avg( h.host_acceptance_rate) host_acceptance_rate
from listing_austin l join host_austin h on l.host_id = h.host_id
Group by l.property_type
Order by host_acceptance_rate desc;