Skip to content

Commit

Permalink
setup.py to support packaging and replace throw with exit
Browse files Browse the repository at this point in the history
Summary: setup.py in main directory to enable packaging and replacing exit with throw to prevent Python from quitting on fastText library errors.

Reviewed By: ebetica

Differential Revision: D6414897

fbshipit-source-id: 0d614f0dec4b603c4083a9ef8d3bc0a657f1cd99
  • Loading branch information
cpuhrsch authored and facebook-github-bot committed Nov 28, 2017
1 parent aabf5ed commit a0fa139
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 28 deletions.
5 changes: 5 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include LICENSE
include PATENTS

recursive-include python *.md *.rst
recursive-include src *.h
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ For the word-similarity evaluation script you will need:
* python 2.6 or newer
* numpy & scipy

For the python bindings (see the subdirectory python) you will need:

* python 2.7 or newer
* numpy & scipy
* [pybind11](https://github.com/pybind/pybind11)

## Building fastText

In order to build `fastText`, use the following:
Expand All @@ -35,6 +41,18 @@ $ make
This will produce object files for all the classes as well as the main binary `fasttext`.
If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).

## Building fastText for Python

Alternatively you can also use the Python bindings.

```
$ git clone https://github.com/facebookresearch/fastText.git
$ cd fastText
$ python setup.py install
```

For further information and introduction see python/README.md

## Example use cases

This library has two main use cases: word representation learning and text classification.
Expand Down
File renamed without changes.
11 changes: 6 additions & 5 deletions python/setup.py → setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
import setuptools
import os

__version__ = '0.0.1'
FASTTEXT_SRC = "../src"
__version__ = '0.0.2'
FASTTEXT_SRC = "src"

# Based on https://github.com/pybind/python_example

Expand All @@ -37,7 +37,7 @@ def __str__(self):
return pybind11.get_include(self.user)


fasttext_src_files = os.listdir(FASTTEXT_SRC)
fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))

fasttext_src_cc = list(
Expand All @@ -48,7 +48,7 @@ def __str__(self):
Extension(
str('fasttext_pybind'),
[
str('fastText/pybind/fasttext_pybind.cc'),
str('python/fastText/pybind/fasttext_pybind.cc'),
] + fasttext_src_cc,
include_dirs=[
# Path to pybind11 headers
Expand Down Expand Up @@ -131,8 +131,9 @@ def build_extensions(self):
ext_modules=ext_modules,
url='https://github.com/facebookresearch/fastText',
license='BSD',
install_requires=['pybind11>=2.2'],
install_requires=['pybind11>=2.2', "setuptools >= 0.7.0"],
cmdclass={'build_ext': BuildExt},
packages=[str('fastText')],
package_dir={str(''): str('python')},
zip_safe=False
)
5 changes: 2 additions & 3 deletions src/dictionary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,8 @@ void Dictionary::readFromFile(std::istream& in) {
std::cerr << "Number of labels: " << nlabels_ << std::endl;
}
if (size_ == 0) {
std::cerr << "Empty vocabulary. Try a smaller -minCount value."
<< std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(
"Empty vocabulary. Try a smaller -minCount value.");
}
}

Expand Down
37 changes: 17 additions & 20 deletions src/fasttext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ void FastText::getSubwordVector(Vector& vec, const std::string& subword)
void FastText::saveVectors() {
std::ofstream ofs(args_->output + ".vec");
if (!ofs.is_open()) {
std::cerr << "Error opening file for saving vectors." << std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(
args_->output + ".vec" + " cannot be opened for saving vectors!");
}
ofs << dict_->nwords() << " " << args_->dim << std::endl;
Vector vec(args_->dim);
Expand All @@ -102,13 +102,12 @@ void FastText::saveVectors() {
void FastText::saveOutput() {
std::ofstream ofs(args_->output + ".output");
if (!ofs.is_open()) {
std::cerr << "Error opening file for saving vectors." << std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(
args_->output + ".output" + " cannot be opened for saving vectors!");
}
if (quant_) {
std::cerr << "Option -saveOutput is not supported for quantized models."
<< std::endl;
return;
throw std::invalid_argument(
"Option -saveOutput is not supported for quantized models.");
}
int32_t n = (args_->model == model_name::sup) ? dict_->nlabels()
: dict_->nwords();
Expand Down Expand Up @@ -216,10 +215,10 @@ void FastText::loadModel(std::istream& in) {
}

if (!quant_input && dict_->isPruned()) {
std::cerr << "Invalid model file.\n"
<< "Please download the updated model from www.fasttext.cc.\n"
<< "See issue #332 on Github for more information.\n";
exit(1);
throw std::invalid_argument(
"Invalid model file.\n"
"Please download the updated model from www.fasttext.cc.\n"
"See issue #332 on Github for more information.\n");
}

in.read((char*) &args_->qout, sizeof(bool));
Expand Down Expand Up @@ -601,14 +600,13 @@ void FastText::loadVectors(std::string filename) {
std::shared_ptr<Matrix> mat; // temp. matrix for pretrained vectors
int64_t n, dim;
if (!in.is_open()) {
std::cerr << "Pretrained vectors file cannot be opened!" << std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(filename + " cannot be opened for loading!");
}
in >> n >> dim;
if (dim != args_->dim) {
std::cerr << "Dimension of pretrained vectors does not match -dim option"
<< std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(
"Dimension of pretrained vectors (" + std::to_string(dim) +
") does not match dimension (" + std::to_string(args_->dim) + ")!");
}
mat = std::make_shared<Matrix>(n, dim);
for (size_t i = 0; i < n; i++) {
Expand Down Expand Up @@ -640,13 +638,12 @@ void FastText::train(std::shared_ptr<Args> args) {
dict_ = std::make_shared<Dictionary>(args_);
if (args_->input == "-") {
// manage expectations
std::cerr << "Cannot use stdin for training!" << std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument("Cannot use stdin for training!");
}
std::ifstream ifs(args_->input);
if (!ifs.is_open()) {
std::cerr << "Input file cannot be opened!" << std::endl;
exit(EXIT_FAILURE);
throw std::invalid_argument(
args_->input + " cannot be opened for training!");
}
dict_->readFromFile(ifs);
ifs.close();
Expand Down

0 comments on commit a0fa139

Please sign in to comment.