Skip to content

Commit

Permalink
more on this
Browse files Browse the repository at this point in the history
  • Loading branch information
pat-alt committed Jan 14, 2024
1 parent 4e3ead2 commit 2337ee9
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
13 changes: 9 additions & 4 deletions dev/get_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@ using Artifacts, ArtifactUtils
import CodecZlib, Downloads, Tar

# Raw data folder:
raw_data_dir = "dev/data/fomc-hawkish-dovish-main/data"
raw_data_root = "dev/data/raw"
raw_data_dir = joinpath(raw_data_root, "fomc-hawkish-dovish-main/data")
OVERWRITE = true

if !isdir(raw_data_dir)
if !isdir(raw_data_dir) || OVERWRITE
if isdir(raw_data_root)
rm(raw_data_root; recursive=true)
end
tgz = Downloads.download("https://github.com/gtfintechlab/fomc-hawkish-dovish/archive/refs/heads/main.tar.gz")
open(GzipDecompressorStream, tgz) do io
Tar.extract(
x -> contains(x.path, "fomc-hawkish-dovish-main/data"),
io, "dev/data"
x -> contains(x.path, "fomc-hawkish-dovish-main/data") || contains(x.path, "training_data"),
io, "dev/data/raw"
)
end
end
20 changes: 18 additions & 2 deletions dev/pre_process.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,24 @@
using Dates

# Get data from the web and save it to a file
include("get_data.jl")

# Utils:
include("utils.jl")

# Meetin minutes data:
include("pre_process_mm.jl")
# Meeting minutes data:
include("pre_process_mm.jl")

# Speech data:
include("pre_process_speech.jl")

# Press conference data:
include("pre_process_pc.jl")

# Merge:
df = vcat(df_mm, df_speech, df_pc, cols=:union)
df.date = Dates.Date.(df.date, "yyyymmdd")

# Save:
isdir("dev/data/cleaned/") || mkdir("dev/data/cleaned/")
CSV.write("dev/data/cleaned/all_data.csv", df)

0 comments on commit 2337ee9

Please sign in to comment.