JuliaText · oxinabox · Feb 8, 2020 · Jan 10, 2020 · Jan 22, 2020 · Jan 22, 2020
diff --git a/Project.toml b/Project.toml
@@ -7,11 +7,13 @@ version = "0.4.1"
 AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f"
 DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+GoogleDrive = "91feb7a0-3508-11ea-1e8e-afea2c1c9a19"
 
 [compat]
 AutoHashEquals = "0.2"
 DataDeps = "0.5, 0.6, 0.7"
 julia = "1"
+GoogleDrive="0.1.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/REQUIRE b/REQUIRE
@@ -1,3 +1,4 @@
 julia 0.7
 DataDeps 0.5.1
 AutoHashEquals
+GoogleDrive
diff --git a/src/Embeddings.jl b/src/Embeddings.jl
@@ -5,15 +5,17 @@ using DataDeps
 using AutoHashEquals
 
 export load_embeddings, language_files
-export Word2Vec, GloVe, FastText_Text
+export Word2Vec, GloVe, FastText_Text, Paragram
 
 abstract type EmbeddingSystem{LANG} end
 include("fasttext.jl")
 include("glove.jl")
 include("word2vec.jl")
+include("Paragram.jl")
+include("common.jl")
 
 function __init__()
-    for T in [Word2Vec, GloVe, FastText]
+    for T in [Word2Vec, GloVe, FastText, Paragram]
         init(T)
     end
 end

diff --git a/src/Paragram.jl b/src/Paragram.jl
@@ -0,0 +1,44 @@
+abstract type Paragram{LANG} <: EmbeddingSystem{LANG} end
+using GoogleDrive 
+
+function init(::Type{Paragram})
+    vectors = [("paragram_300_ws353",
+                " Paragram-WS353 300 dimensional Paragram embeddings tuned on WordSim353 dataset. 1.7 GB download.",
+                "8ed9a19f8bc400cdbca5dae7f024c0310a3e5711a46ba48036a7542614440721",
+              "https://drive.google.com/uc?id=0B9w48e1rj-MOLVdZRzFfTlNsem8&export=download"),
+                # "https://drive.google.com/file/d/0B9w48e1rj-MOLVdZRzFfTlNsem8/view"),
+               ("paragram_300_sl999",
+                "Paragram-SL999 300 dimensional Paragram embeddings tuned on SimLex999 dataset. 1.7 GB download.",
+                "9a16adc7d620642f863278451db4c03a2646016440ccea7e30a37ba17868781d",
+               "https://drive.google.com/uc?id=0B9w48e1rj-MOck1fRGxaZW1LU2M&export=download"),
+]
+#https://drive.google.com/file/d/0B9w48e1rj-MOLVdZRzFfTlNsem8/view?usp=sharing
+    for (depname, description, sha, link) in vectors
+        register(DataDep(depname,
+                         """
+                         Pretrained Paragram word embeddings.
+                         Website: https://www.cs.cmu.edu/~jwieting/
+                         Author: John Wieting
+                         Year: 2015
+                         Licence: Open Data Commons Public Domain Dedication and License (PDDL)
+
+                         Paper:
+                             John Wieting, Mohit Bansal, Kevin Gimpel, Karen Livescu, Dan Roth. From Paraphrase Database to Compositional Paraphrase Model and Back.
+
+                         $description
+                         """,
+                         link,
+                         sha,
+                         fetch_method = google_download,
+			 post_fetch_method = unpack))
+
+            append!(language_files(Paragram{:en}), ["$(depname)/$(depname).txt"])
+
+
+
+    end
+end
+
+_load_embeddings(::Type{<:Paragram}, embedding_file, max_vocab_size, keep_words) = _load_embeddings_csv( embedding_file, max_vocab_size, keep_words)
+
+
diff --git a/src/common.jl b/src/common.jl
@@ -0,0 +1,28 @@
+
+function _load_embeddings_csv( embedding_file, max_vocab_size, keep_words)
+    local LL, indexed_words, index
+    if length(keep_words) > 0
+        max_vocab_size = length(keep_words)
+    end
+    indexed_words = Vector{String}()
+    LL = Vector{Vector{Float32}}()
+    open(embedding_file) do f
+        index = 1
+        for line in eachline(f)
+            xs = split(line, ' ')
+            word = xs[1]
+            if length(keep_words) == 0 || (word in keep_words)
+                index > max_vocab_size && break
+                push!(indexed_words, word)
+                try
+                    push!(LL, parse.(Float32, @view(xs[2:end])))
+                catch err
+                    err isa ArgumentError || rethrow()
+                    @warn "Could not parse word vector" index word exception=err
+                end
+                index += 1
+            end
+        end
+    end
+    return reduce(hcat, LL), indexed_words
+end
diff --git a/src/glove.jl b/src/glove.jl
@@ -44,30 +44,5 @@ function init(::Type{GloVe})
     end
 end
 
-function _load_embeddings(::Type{<:GloVe}, embedding_file, max_vocab_size, keep_words)
-    local LL, indexed_words, index
-    if length(keep_words) > 0
-        max_vocab_size = length(keep_words)
-    end
-    indexed_words = Vector{String}()
-    LL = Vector{Vector{Float32}}()
-    open(embedding_file) do f
-        index = 1
-        for line in eachline(f)
-            xs = split(line, ' ')
-            word = xs[1]
-            if length(keep_words) == 0 || (word in keep_words)
-                index > max_vocab_size && break
-                push!(indexed_words, word)
-                try
-                    push!(LL, parse.(Float32, @view(xs[2:end])))
-                catch err
-                    err isa ArgumentError || rethrow()
-                    @warn "Could not parse word vector" index word exception=err
-                end
-                index += 1
-            end
-        end
-    end
-    return reduce(hcat, LL), indexed_words
-end
+_load_embeddings(::Type{<:GloVe}, embedding_file, max_vocab_size, keep_words) = _load_embeddings_csv(embedding_file, max_vocab_size, keep_words)
+