From 556099f55b3aee6c46a69ef8d6a2d29a9343203a Mon Sep 17 00:00:00 2001 From: Baiyue Cao Date: Mon, 28 Nov 2022 23:48:52 -0500 Subject: [PATCH 1/5] testing out dbt --- pluto_build/.python-version | 1 + pluto_build/dbt_project.yml | 13 ++++ pluto_build/instructions.md | 38 ++++++++++++ pluto_build/models/sources.yml | 47 +++++++++++++++ pluto_build/models/staging/schema.yml | 34 +++++++++++ pluto_build/models/staging/stg_bbl.sql | 14 +++++ pluto_build/models/staging/stg_cama.sql | 44 ++++++++++++++ pluto_build/models/staging/stg_geocodes.sql | 55 +++++++++++++++++ pluto_build/models/staging/stg_pts.sql | 53 +++++++++++++++++ pluto_build/models/staging/stg_rpad.sql | 66 +++++++++++++++++++++ pluto_build/packages.yml | 5 ++ pluto_build/requirements.txt | 1 + pluto_build/sql/preprocessing.sql | 9 +-- 13 files changed, 374 insertions(+), 6 deletions(-) create mode 100644 pluto_build/.python-version create mode 100644 pluto_build/dbt_project.yml create mode 100644 pluto_build/instructions.md create mode 100644 pluto_build/models/sources.yml create mode 100644 pluto_build/models/staging/schema.yml create mode 100644 pluto_build/models/staging/stg_bbl.sql create mode 100644 pluto_build/models/staging/stg_cama.sql create mode 100644 pluto_build/models/staging/stg_geocodes.sql create mode 100644 pluto_build/models/staging/stg_pts.sql create mode 100644 pluto_build/models/staging/stg_rpad.sql create mode 100644 pluto_build/packages.yml create mode 100644 pluto_build/requirements.txt diff --git a/pluto_build/.python-version b/pluto_build/.python-version new file mode 100644 index 00000000..89a1ad7a --- /dev/null +++ b/pluto_build/.python-version @@ -0,0 +1 @@ +3.8.12 diff --git a/pluto_build/dbt_project.yml b/pluto_build/dbt_project.yml new file mode 100644 index 00000000..7a5f1772 --- /dev/null +++ b/pluto_build/dbt_project.yml @@ -0,0 +1,13 @@ +name: pluto +config-version: 2 +version: '0.1' + +profile: nycplanning + +model-paths: ["models"] + +models: + pluto: + staging: + +materialized: table + schema: staging \ No newline at end of file diff --git a/pluto_build/instructions.md b/pluto_build/instructions.md new file mode 100644 index 00000000..445e12a3 --- /dev/null +++ b/pluto_build/instructions.md @@ -0,0 +1,38 @@ +# Instructions + +## Create local Postgres-Postgis database +0. make sure you have docker installed +1. `docker run -d -p 5432:5432 -e POSTGRES_PASSWORD=postgres postgis/postgis:12-3.0-alpine` +> this will create a postgres/postgis docker container locally for development purposes + +## Install DBT +0. navigate to `pluto_build` +1. create virtualenv `python -m virtualenv .venv` +2. activate virtualenv `source .venv/bin/activate` +3. install dependencies `pip install -r requirements.txt` +4. install dbt packages `dbt deps` + +## Set up DBT profile +0. first time setup -> `touch ~/.dbt/profiles.yml` +1. add the following to the `profiles.yml` +```yml +nycplanning: + target: pluto-dev + outputs: + pluto-dev: + type: postgres + host: localhost + user: postgres + password: postgres + port: 5432 + dbname: postgres + schema: public + threads: 4 +``` + +## Example Commands +1. `dbt run -s staging.stg_pts` + +## Execution Order +1. you will still need to run `bash 01_dataloading.sh` to import source data +2. once dataloading is complete, you can then proceed to run dbt commands \ No newline at end of file diff --git a/pluto_build/models/sources.yml b/pluto_build/models/sources.yml new file mode 100644 index 00000000..72b9f902 --- /dev/null +++ b/pluto_build/models/sources.yml @@ -0,0 +1,47 @@ +version: 2 + +sources: + - name: public + tables: + - name: dcp_cb2010_wi + - name: dcp_cb2020_wi + - name: dcp_cdboundaries_wi + - name: dcp_colp + - name: dcp_commercialoverlay + - name: dcp_councildistricts_wi + - name: dcp_ct2010_wi + - name: dcp_ct2020_wi + - name: dcp_edesignation + - name: dcp_firecompanies + - name: dcp_healthareas + - name: dcp_healthcenters + - name: dcp_limitedheight + - name: dcp_policeprecincts + - name: dcp_school_districts + - name: dcp_specialpurpose + - name: dcp_specialpurposesubdistricts + - name: dcp_zoning_maxfar + - name: dcp_zoningdistricts + - name: dcp_zoningmapamendments + - name: dcp_zoningmapindex + - name: dof_condo + - name: dof_dtm + - name: dof_shoreline + - name: doitt_zipcodeboundaries + - name: dpr_greenthumb + - name: dsny_frequencies + - name: fema_firms2007_100yr + - name: fema_pfirms2015_100yr + - name: lpc_historic_districts + - name: lpc_landmarks + - name: pluto_corrections + - name: pluto_input_bsmtcode + - name: pluto_input_cama_dof + - name: pluto_input_condo_bldgclass + - name: pluto_input_condolot_descriptiveattributes + - name: pluto_input_geocodes + - name: pluto_input_landuse_bldgclass + - name: pluto_input_numbldgs + - name: pluto_input_research + - name: pluto_pts + - name: source_data_versions diff --git a/pluto_build/models/staging/schema.yml b/pluto_build/models/staging/schema.yml new file mode 100644 index 00000000..bc579f8b --- /dev/null +++ b/pluto_build/models/staging/schema.yml @@ -0,0 +1,34 @@ +version: 2 + +models: + - name: stg_pts + description: cleaned and casted table for PTS + + - name: stg_cama + description: cleaned and casted table for CAMA + + - name: stg_rpad + description: Convert PTS to RPAD (legacy format) + + - name: stg_geocodes + description: cleaned table for geocodes + + - name: stg_bbl + description: cleaned table for BBL + columns: + - name: boro + - name: tb + description: Tax Block + - name: tl + description: Tax Lot + - name: bbl + description: >- + Borough Block Lot (BBL) from stg_pts, + not that this field can contain letter E for easement + - name: billingbbl + description: >- + Billing BBL from stg_geocodes, this is coming from geosupport + - name: primebbl + description: >- + Prime BBL, used for calculation purposes usually. + multiple lots can shared the same prime bbl (condo lots) diff --git a/pluto_build/models/staging/stg_bbl.sql b/pluto_build/models/staging/stg_bbl.sql new file mode 100644 index 00000000..cf01a829 --- /dev/null +++ b/pluto_build/models/staging/stg_bbl.sql @@ -0,0 +1,14 @@ +SELECT + a.boro, + a.tb, + a.tl, + a.bbl, + b.billingbbl, + (CASE + WHEN NULLIF(b.billingbbl, '0000000000') IS NOT NULL + THEN b.billingbbl + ELSE a.boro||a.tb||a.tl + END) AS primebbl +FROM {{ ref('stg_pts') }} a +LEFT JOIN {{ ref('stg_geocodes') }} b +ON a.boro||a.tb||a.tl = b.bbl \ No newline at end of file diff --git a/pluto_build/models/staging/stg_cama.sql b/pluto_build/models/staging/stg_cama.sql new file mode 100644 index 00000000..613f96dc --- /dev/null +++ b/pluto_build/models/staging/stg_cama.sql @@ -0,0 +1,44 @@ +SELECT + a.bbl, + parcelcard, + bldgnum, + bldgclass, + primaryusecode, + NULLIF(developmentname, '') AS developmentname, + streettype, + lottype, + residarea::integer, + officearea::integer, + retailarea::integer, + garagearea::integer, + storagearea::integer, + factoryarea::integer, + otherarea::integer, + grossarea::integer, + ownerarea::integer, + grossvolume, + commercialarea::integer, + proxcode, + bsmnt_type, + bsmntgradient, + bsmntarea::integer, + firstfloorarea::integer, + secondfloorarea::integer, + upperfloorarea::integer, + partresfloorarea::integer, + unfinishedfloorarea::integer, + finishedfloorarea::integer, + nonresidfloorarea::integer, + NULLIF(residconstrtype, '') AS residconstrtype, + NULLIF(commercialconstrtype, '') AS commercialconstrtype, + NULLIF(condomainconstrtype, '') AS condomainconstrtype, + NULLIF(condounitsconstrtype, '') AS condounitsconstrtype,, + b.billingbbl, + (CASE + WHEN b.billingbbl IS NOT NULL AND b.billingbbl <> '0000000000' + THEN b.billingbbl + ELSE LEFT(a.bbl,10) + END) AS primebbl +FROM {{ source('public', 'pluto_input_cama_dof') }} a +LEFT JOIN {{ source('public', 'pluto_input_geocodes') }} b +ON LEFT(a.bbl,10)=b.borough||lpad(b.block,5,'0')||lpad(b.lot,4,'0') \ No newline at end of file diff --git a/pluto_build/models/staging/stg_geocodes.sql b/pluto_build/models/staging/stg_geocodes.sql new file mode 100644 index 00000000..ac63661d --- /dev/null +++ b/pluto_build/models/staging/stg_geocodes.sql @@ -0,0 +1,55 @@ +WITH backfill_geom AS ( + SELECT + ogc_fid, + COALESCE( + wkb_geometry, + ST_SetSRID( + ST_MakePoint( + longitude::double precision, + latitude::double precision + ), + 4326 + ) + ) as geom + FROM {{ source('public', 'pluto_input_geocodes') }} +) +SELECT + a.ogc_fid, + bbl as geo_bbl, + borough||lpad(block,5,'0')||lpad(lot,4,'0') as bbl, + ST_X(ST_TRANSFORM(geom, 2263))::integer as xcoord, + ST_Y(ST_TRANSFORM(geom, 2263))::integer as ycoord, + (CASE WHEN ct2010::numeric = 0 THEN NULL ELSE ct2010 END) as ct2010, + numberofexistingstructures as numberOfExistingStructuresOnLot, + geom AS wkb_geometry, + billingbbl, + cd, + cb2010, + ct2020, + cb2020, + schooldist, + council, + zipcode, + firecomp, + policeprct, + healthcenterdistrict, + healtharea, + sanitdistrict, + sanitsub, + boepreferredstreetname, + taxmap, + sanbornmapidentifier, + latitude, + longitude, + grc, + grc2, + msg, + msg2, + borough, + block, + lot, + easement, + input_hnum, + input_sname +FROM {{ source('public', 'pluto_input_geocodes') }} a +LEFT JOIN backfill_geom b ON a.ogc_fid = b.ogc_fid diff --git a/pluto_build/models/staging/stg_pts.sql b/pluto_build/models/staging/stg_pts.sql new file mode 100644 index 00000000..4bbc189c --- /dev/null +++ b/pluto_build/models/staging/stg_pts.sql @@ -0,0 +1,53 @@ +SELECT + boro, + block as tb, + lot as tl, + parid as BBL, + street_name, + housenum_lo, + housenum_hi, + aptno, + zip_code as zip, + bldg_class as BLDGCL, + ease, + av_owner as owner, + REPLACE(land_area, '+', '')::double precision as LAND_AREA, + REPLACE(gross_sqft, '+', '')::double precision as GROSS_SQFT, + REPLACE(residential_area_gross, '+', '')::double precision as RESIDAREA, + REPLACE(office_area_gross, '+', '')::double precision as OFFICEAREA, + REPLACE(retail_area_gross, '+', '')::double precision as RETAILAREA, + REPLACE(garage_area, '+', '')::double precision as GARAGEAREA, + REPLACE(storage_area_gross, '+', '')::double precision as STORAGEAREA, + REPLACE(factory_area_gross, '+', '')::double precision as FACTORYAREA, + REPLACE(other_area_gross, '+', '')::double precision as OTHERAREA, + REPLACE(num_bldgs, '+', '')::double precision as BLDGS, + REPLACE(bld_story, '+', '')::double precision as STORY, + REPLACE(coop_apts, '+', '')::double precision as COOP_APTS, + REPLACE(units, '+', '')::double precision as UNITS, + round(REPLACE(lot_frt, '+', '')::numeric, 2) as LFFT, + round(REPLACE(lot_dep, '+', '')::numeric, 2) as LDFT, + round(REPLACE(bld_frt, '+', '')::numeric, 2) as BFFT, + round(REPLACE(bld_dep, '+', '')::numeric, 2) as BDFT, + bld_ext as EXT, + lot_irreg as IRREG, + -- current values contain the most up to date public values. + -- June to January current values have the Final value from the prior year. + -- January to May current values contain the Tentative values. + -- After May current values contain the Final values. + -- After May 25th (the date the final roll is released) it will contain the final values + REPLACE(curactland, '+', '')::double precision as CURAVL_ACT, + -- pyactland + REPLACE(curacttot, '+', '')::double precision as CURAVT_ACT, + -- pyacttot + REPLACE(curactextot, '+', '')::double precision as CUREXT_ACT, + -- pyactextot + yrbuilt, + yralt1, + yralt2, + condo_number, + appt_boro as AP_BORO, + appt_block as AP_BLOCK, + appt_lot as AP_LOT, + appt_ease as AP_EASE, + appt_date as AP_DATE +FROM {{ source('public', 'pluto_pts') }} \ No newline at end of file diff --git a/pluto_build/models/staging/stg_rpad.sql b/pluto_build/models/staging/stg_rpad.sql new file mode 100644 index 00000000..8732d75d --- /dev/null +++ b/pluto_build/models/staging/stg_rpad.sql @@ -0,0 +1,66 @@ +WITH pluto_rpad_rownum AS ( + SELECT + a.*, + ROW_NUMBER() OVER ( + PARTITION BY boro||tb||tl + ORDER BY + curavt_act DESC + ,land_area DESC + ,ease ASC + ) AS rn, + (curavl_act = curavt_act AND upper(bldgcl) LIKE 'V%') AS dcp_edit_flag + FROM {{ ref('stg_pts') }} a +) +SELECT + boro, + tb, + tl, + bbl, + street_name, + housenum_lo, + housenum_hi, + aptno, + zip, + bldgcl, + ease, + owner, + (CASE + WHEN (land_area IS NULL OR land_area = 0) + AND irreg <> 'I' + AND lfft > 0 + AND ldft > 0 + THEN lfft * ldft + ELSE land_area + END) AS land_area, + gross_sqft, + residarea, + officearea, + retailarea, + garagearea, + storagearea, + factoryarea, + otherarea, + (CASE WHEN dcp_edit_flag THEN 0 ELSE bldgs END) AS bldgs, + (CASE WHEN dcp_edit_flag THEN 0 ELSE story END) AS story, + coop_apts, + units, + lfft, + ldft, + (CASE WHEN dcp_edit_flag THEN 0 ELSE bfft END) AS bfft, + (CASE WHEN dcp_edit_flag THEN 0 ELSE bdft END) AS bdft, + ext, + irreg, + curavl_act, + curavt_act, + curext_act, + yrbuilt, + yralt1, + yralt2, + condo_number, + ap_boro, + ap_block, + ap_lot, + ap_ease, + ap_date +FROM pluto_rpad_rownum +WHERE rn = 1 \ No newline at end of file diff --git a/pluto_build/packages.yml b/pluto_build/packages.yml new file mode 100644 index 00000000..9d3f0b6e --- /dev/null +++ b/pluto_build/packages.yml @@ -0,0 +1,5 @@ +packages: + - package: dbt-labs/codegen + version: 0.9.0 + - package: dbt-labs/dbt_utils + version: 0.9.6 \ No newline at end of file diff --git a/pluto_build/requirements.txt b/pluto_build/requirements.txt new file mode 100644 index 00000000..f0e7ec56 --- /dev/null +++ b/pluto_build/requirements.txt @@ -0,0 +1 @@ +dbt-postgres \ No newline at end of file diff --git a/pluto_build/sql/preprocessing.sql b/pluto_build/sql/preprocessing.sql index 1adf57dd..f8f9c5df 100644 --- a/pluto_build/sql/preprocessing.sql +++ b/pluto_build/sql/preprocessing.sql @@ -38,10 +38,7 @@ ALTER TABLE pluto_corrections DROP COLUMN IF EXISTS v, DROP COLUMN IF EXISTS ogc_fid; -DROP TABLE IF EXISTS pluto_input_geocodes_tmp; -CREATE TABLE pluto_input_geocodes_tmp as ( +CREATE OR REPLACE TABLE pluto_input_geocodes as ( SELECT DISTINCT ON (borough, block, lot) * - FROM pluto_input_geocodes); -DROP TABLE IF EXISTS pluto_input_geocodes; -ALTER TABLE pluto_input_geocodes_tmp -RENAME TO pluto_input_geocodes; \ No newline at end of file + FROM pluto_input_geocodes +); \ No newline at end of file From c10fa567417cdf9789af0c73c22db468d41814c5 Mon Sep 17 00:00:00 2001 From: Baiyue Cao Date: Tue, 29 Nov 2022 08:32:48 -0500 Subject: [PATCH 2/5] adding documentation --- pluto_build/instructions.md | 5 ++++- pluto_build/models/staging/stg_rpad.sql | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pluto_build/instructions.md b/pluto_build/instructions.md index 445e12a3..6117c8d0 100644 --- a/pluto_build/instructions.md +++ b/pluto_build/instructions.md @@ -31,7 +31,10 @@ nycplanning: ``` ## Example Commands -1. `dbt run -s staging.stg_pts` +1. `dbt run -s staging.stg_pts` to run a specific model +2. `dbt compile` to compile sql to `target` without executing sql +3. `dbt docs generate` to generate documentation +4. ` dbt docs serve --port 8000` to open docs in browser (`http://localhost:8000`) ## Execution Order 1. you will still need to run `bash 01_dataloading.sh` to import source data diff --git a/pluto_build/models/staging/stg_rpad.sql b/pluto_build/models/staging/stg_rpad.sql index 8732d75d..41cf3cd4 100644 --- a/pluto_build/models/staging/stg_rpad.sql +++ b/pluto_build/models/staging/stg_rpad.sql @@ -61,6 +61,7 @@ SELECT ap_block, ap_lot, ap_ease, - ap_date + ap_date, + to_char(to_date(ap_date, 'MM/DD/YY'), 'MM/DD/YYYY') AS ap_datef FROM pluto_rpad_rownum WHERE rn = 1 \ No newline at end of file From 6a382f9f5765be063302eaf656c1fbcc40852c11 Mon Sep 17 00:00:00 2001 From: Baiyue Cao Date: Tue, 29 Nov 2022 08:33:18 -0500 Subject: [PATCH 3/5] update gitignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5e5aef62..aafdd45f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,8 @@ __pycache__/ !example.env !version.env pluto_build/pts/ -pluto_build/.library/ \ No newline at end of file +pluto_build/.library/ +pluto_build/.venv/ +pluto_build/target/ +pluto_build/dbt_packages/ +pluto_build/logs/ \ No newline at end of file From 4ba2b4abdf80a99229d3d0ee301afdcafb472c6c Mon Sep 17 00:00:00 2001 From: Baiyue Cao Date: Tue, 29 Nov 2022 08:35:20 -0500 Subject: [PATCH 4/5] revert back --- pluto_build/sql/preprocessing.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pluto_build/sql/preprocessing.sql b/pluto_build/sql/preprocessing.sql index f8f9c5df..1adf57dd 100644 --- a/pluto_build/sql/preprocessing.sql +++ b/pluto_build/sql/preprocessing.sql @@ -38,7 +38,10 @@ ALTER TABLE pluto_corrections DROP COLUMN IF EXISTS v, DROP COLUMN IF EXISTS ogc_fid; -CREATE OR REPLACE TABLE pluto_input_geocodes as ( +DROP TABLE IF EXISTS pluto_input_geocodes_tmp; +CREATE TABLE pluto_input_geocodes_tmp as ( SELECT DISTINCT ON (borough, block, lot) * - FROM pluto_input_geocodes -); \ No newline at end of file + FROM pluto_input_geocodes); +DROP TABLE IF EXISTS pluto_input_geocodes; +ALTER TABLE pluto_input_geocodes_tmp +RENAME TO pluto_input_geocodes; \ No newline at end of file From b44612f645057baf5b0ec8312403ae65c7c69834 Mon Sep 17 00:00:00 2001 From: Baiyue Cao Date: Sat, 3 Dec 2022 15:53:46 -0500 Subject: [PATCH 5/5] calculate areas --- .gitignore | 3 +- pluto_build/dbt_project.yml | 9 +++- pluto_build/instructions.md | 8 +++- .../models/calculation/calc_allocated.sql | 23 +++++++++ pluto_build/models/calculation/calc_areas.sql | 47 +++++++++++++++++++ .../models/calculation/calc_areas_cama.sql | 14 ++++++ .../models/calculation/calc_areas_rpad.sql | 21 +++++++++ pluto_build/models/calculation/schema.yml | 15 ++++++ pluto_build/models/staging/schema.yml | 24 +++++++++- pluto_build/models/staging/stg_bbl.sql | 4 +- pluto_build/models/staging/stg_cama.sql | 2 +- pluto_build/models/staging/stg_geocodes.sql | 2 - pluto_build/models/staging/stg_numbldgs.sql | 8 ++++ pluto_build/models/staging/stg_pts.sql | 6 +-- 14 files changed, 173 insertions(+), 13 deletions(-) create mode 100644 pluto_build/models/calculation/calc_allocated.sql create mode 100644 pluto_build/models/calculation/calc_areas.sql create mode 100644 pluto_build/models/calculation/calc_areas_cama.sql create mode 100644 pluto_build/models/calculation/calc_areas_rpad.sql create mode 100644 pluto_build/models/calculation/schema.yml create mode 100644 pluto_build/models/staging/stg_numbldgs.sql diff --git a/.gitignore b/.gitignore index aafdd45f..35cfa001 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ __pycache__/ pluto_build/pts/ pluto_build/.library/ pluto_build/.venv/ -pluto_build/target/ +pluto_build/.postgres/ +pluto_build/.target/ pluto_build/dbt_packages/ pluto_build/logs/ \ No newline at end of file diff --git a/pluto_build/dbt_project.yml b/pluto_build/dbt_project.yml index 7a5f1772..5f316595 100644 --- a/pluto_build/dbt_project.yml +++ b/pluto_build/dbt_project.yml @@ -5,9 +5,16 @@ version: '0.1' profile: nycplanning model-paths: ["models"] +target-path: ".target" models: + +persist_docs: + relation: true + columns: true pluto: staging: +materialized: table - schema: staging \ No newline at end of file + schema: staging + calculation: + +materialized: table + schema: calculation \ No newline at end of file diff --git a/pluto_build/instructions.md b/pluto_build/instructions.md index 6117c8d0..6ad95000 100644 --- a/pluto_build/instructions.md +++ b/pluto_build/instructions.md @@ -2,7 +2,13 @@ ## Create local Postgres-Postgis database 0. make sure you have docker installed -1. `docker run -d -p 5432:5432 -e POSTGRES_PASSWORD=postgres postgis/postgis:12-3.0-alpine` +1. ``` +docker run -d \ + -p 5432:5432 \ + -e POSTGRES_PASSWORD=postgres \ + -v $(pwd)/.postgres:/var/lib/postgresql/data \ + postgis/postgis:15-3.3-alpine +``` > this will create a postgres/postgis docker container locally for development purposes ## Install DBT diff --git a/pluto_build/models/calculation/calc_allocated.sql b/pluto_build/models/calculation/calc_allocated.sql new file mode 100644 index 00000000..991cfdd2 --- /dev/null +++ b/pluto_build/models/calculation/calc_allocated.sql @@ -0,0 +1,23 @@ +SELECT + DISTINCT + bbl.primebbl, + rpad.bldgcl as bldgclass, + rpad.story as numfloors, + rpad.lfft as lotfront, + rpad.ldft as lotdepth, + rpad.bfft as bldgfront, + rpad.bdft as bldgdepth, + rpad.ext as ext, + rpad.condo_number as condono, + rpad.land_area as lotarea, + rpad.gross_sqft as bldgarea, + rpad.yrbuilt as yearbuilt, + rpad.yralt1 as yearalter1, + rpad.yralt2 as yearalter2, + rpad.owner as ownername, + rpad.irreg as irrlotcode, + concat(rpad.housenum_lo,' ',rpad.street_name) as address, + ap_datef as appdate +FROM {{ ref('stg_bbl') }} as bbl + LEFT JOIN {{ ref('stg_rpad') }} as rpad ON bbl.primebbl = rpad.bbl +WHERE LENGTH(bbl.bbl) = 10 \ No newline at end of file diff --git a/pluto_build/models/calculation/calc_areas.sql b/pluto_build/models/calculation/calc_areas.sql new file mode 100644 index 00000000..716c488f --- /dev/null +++ b/pluto_build/models/calculation/calc_areas.sql @@ -0,0 +1,47 @@ +SELECT + COALESCE(rpad.primebbl, cama.primebbl) as primebbl, + COALESCE( + NULLIF(rpad.commercial_area, 0), + NULLIF(cama.commercial_area, 0) + ) as commercial_area, + COALESCE( + NULLIF(rpad.residential_area, 0), + NULLIF(cama.residential_area, 0) + ) as residential_area, + COALESCE( + NULLIF(rpad.office_area, 0), + NULLIF(cama.office_area, 0) + ) as office_area, + COALESCE( + NULLIF(rpad.retail_area, 0), + NULLIF(cama.retail_area, 0) + ) as retail_area, + COALESCE( + NULLIF(rpad.garage_area, 0), + NULLIF(cama.garage_area, 0) + ) as garage_area, + COALESCE( + NULLIF(rpad.storage_area, 0), + NULLIF(cama.storage_area, 0) + ) as storage_area, + COALESCE( + NULLIF(rpad.factory_area, 0), + NULLIF(cama.factory_area, 0) + ) as factory_area, + COALESCE( + NULLIF(rpad.other_area, 0), + NULLIF(cama.other_area, 0) + ) as other_area, + COALESCE( + NULLIF(rpad.gross_area, 0), + NULLIF(cama.gross_area, 0), + rpad._gross_area + ) as bldg_area, + COALESCE( + CASE WHEN NULLIF(rpad.gross_area, 0) IS NOT NULL THEN '2' END, + CASE WHEN NULLIF(cama.gross_area, 0) IS NOT NULL THEN '7' END, + CASE WHEN NULLIF(rpad._gross_area, 0) IS NOT NULL THEN '5' END + ) as area_source +FROM {{ ref('calc_areas_rpad') }} rpad +FULL OUTER JOIN {{ ref('calc_areas_cama') }} cama +ON rpad.primebbl = cama.primebbl \ No newline at end of file diff --git a/pluto_build/models/calculation/calc_areas_cama.sql b/pluto_build/models/calculation/calc_areas_cama.sql new file mode 100644 index 00000000..e90aa237 --- /dev/null +++ b/pluto_build/models/calculation/calc_areas_cama.sql @@ -0,0 +1,14 @@ +SELECT + cama.primebbl, + SUM(commercialarea) as commercial_area, + SUM(residarea) as residential_area, + SUM(officearea) as office_area, + SUM(retailarea) as retail_area, + SUM(garagearea) as garage_area, + SUM(storagearea) as storage_area, + SUM(factoryarea) as factory_area, + SUM(otherarea) as other_area, + SUM(grossarea) as gross_area +FROM {{ ref('stg_cama') }} as cama +WHERE bldgnum = '1' +GROUP BY cama.primebbl \ No newline at end of file diff --git a/pluto_build/models/calculation/calc_areas_rpad.sql b/pluto_build/models/calculation/calc_areas_rpad.sql new file mode 100644 index 00000000..bb9872fc --- /dev/null +++ b/pluto_build/models/calculation/calc_areas_rpad.sql @@ -0,0 +1,21 @@ +SELECT + bbl as primebbl, + ( + rpad.officearea + + rpad.retailarea + + rpad.garagearea + + rpad.storagearea + + rpad.factoryarea + + rpad.otherarea + ) as commercial_area, + rpad.residarea as residential_area, + rpad.officearea as office_area, + rpad.retailarea as retail_area, + rpad.garagearea as garage_area, + rpad.storagearea as storage_area, + rpad.factoryarea as factory_area, + rpad.otherarea as other_area, + rpad.gross_sqft as gross_area, + rpad.lfft * rpad.ldft * rpad.story as _gross_area +FROM {{ ref('stg_rpad') }} as rpad +WHERE RIGHT(rpad.bbl, 4) NOT LIKE '75%' \ No newline at end of file diff --git a/pluto_build/models/calculation/schema.yml b/pluto_build/models/calculation/schema.yml new file mode 100644 index 00000000..c21aaa77 --- /dev/null +++ b/pluto_build/models/calculation/schema.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: calc_allocated + columns: + - name: primebbl + tests: + - unique + - not_null + + - name: calc_areas_cama + description: | + building area related fields comming from cama + > **Note:** that we only take building number (bldgnum) 1 + as the main source of building area \ No newline at end of file diff --git a/pluto_build/models/staging/schema.yml b/pluto_build/models/staging/schema.yml index bc579f8b..17bb2a09 100644 --- a/pluto_build/models/staging/schema.yml +++ b/pluto_build/models/staging/schema.yml @@ -9,10 +9,25 @@ models: - name: stg_rpad description: Convert PTS to RPAD (legacy format) + columns: + - name: bbl + tests: + - unique + - not_null - name: stg_geocodes description: cleaned table for geocodes - + columns: + - name: bbl + tests: + - unique + + - name: borough + tests: + - not_null + - accepted_values: + values: [1, 2, 3, 4, 5] + - name: stg_bbl description: cleaned table for BBL columns: @@ -24,10 +39,15 @@ models: - name: bbl description: >- Borough Block Lot (BBL) from stg_pts, - not that this field can contain letter E for easement + not that this field can contain letter starting from E (H, I, J, K ...) + tests: + - unique + - not_null + - name: billingbbl description: >- Billing BBL from stg_geocodes, this is coming from geosupport + - name: primebbl description: >- Prime BBL, used for calculation purposes usually. diff --git a/pluto_build/models/staging/stg_bbl.sql b/pluto_build/models/staging/stg_bbl.sql index cf01a829..e6fc27f7 100644 --- a/pluto_build/models/staging/stg_bbl.sql +++ b/pluto_build/models/staging/stg_bbl.sql @@ -3,8 +3,8 @@ SELECT a.tb, a.tl, a.bbl, - b.billingbbl, - (CASE + NULLIF(b.billingbbl, '0000000000') as billingbbl, + (CASE WHEN NULLIF(b.billingbbl, '0000000000') IS NOT NULL THEN b.billingbbl ELSE a.boro||a.tb||a.tl diff --git a/pluto_build/models/staging/stg_cama.sql b/pluto_build/models/staging/stg_cama.sql index 613f96dc..c3a54377 100644 --- a/pluto_build/models/staging/stg_cama.sql +++ b/pluto_build/models/staging/stg_cama.sql @@ -32,7 +32,7 @@ SELECT NULLIF(residconstrtype, '') AS residconstrtype, NULLIF(commercialconstrtype, '') AS commercialconstrtype, NULLIF(condomainconstrtype, '') AS condomainconstrtype, - NULLIF(condounitsconstrtype, '') AS condounitsconstrtype,, + NULLIF(condounitsconstrtype, '') AS condounitsconstrtype, b.billingbbl, (CASE WHEN b.billingbbl IS NOT NULL AND b.billingbbl <> '0000000000' diff --git a/pluto_build/models/staging/stg_geocodes.sql b/pluto_build/models/staging/stg_geocodes.sql index ac63661d..407aa973 100644 --- a/pluto_build/models/staging/stg_geocodes.sql +++ b/pluto_build/models/staging/stg_geocodes.sql @@ -14,8 +14,6 @@ WITH backfill_geom AS ( FROM {{ source('public', 'pluto_input_geocodes') }} ) SELECT - a.ogc_fid, - bbl as geo_bbl, borough||lpad(block,5,'0')||lpad(lot,4,'0') as bbl, ST_X(ST_TRANSFORM(geom, 2263))::integer as xcoord, ST_Y(ST_TRANSFORM(geom, 2263))::integer as ycoord, diff --git a/pluto_build/models/staging/stg_numbldgs.sql b/pluto_build/models/staging/stg_numbldgs.sql new file mode 100644 index 00000000..574c19a6 --- /dev/null +++ b/pluto_build/models/staging/stg_numbldgs.sql @@ -0,0 +1,8 @@ +SELECT + geo.bbl, + coalesce ( + numbldgs.count::integer, + numberOfExistingStructuresOnLot::integer + ) as numbldgs +FROM {{ ref('stg_geocodes') }} geo + LEFT JOIN {{ source('public', 'pluto_input_numbldgs') }} numbldgs ON geo.bbl::bigint = numbldgs.bbl::bigint \ No newline at end of file diff --git a/pluto_build/models/staging/stg_pts.sql b/pluto_build/models/staging/stg_pts.sql index 4bbc189c..45376a85 100644 --- a/pluto_build/models/staging/stg_pts.sql +++ b/pluto_build/models/staging/stg_pts.sql @@ -41,9 +41,9 @@ SELECT -- pyacttot REPLACE(curactextot, '+', '')::double precision as CUREXT_ACT, -- pyactextot - yrbuilt, - yralt1, - yralt2, + NULLIF(yrbuilt::integer, 0) AS yrbuilt, + NULLIF(yralt1::integer, 0) AS yralt1, + NULLIF(yralt2::integer, 0) AS yralt2, condo_number, appt_boro as AP_BORO, appt_block as AP_BLOCK,