-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenizer_zhcn.cpp
47 lines (40 loc) · 1.05 KB
/
tokenizer_zhcn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <fstream>
#include <string>
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <map>
#include <stdlib.h>
#include <string.h>
#include "sphinx.h"
#if USE_MMSEG || USE_CRFSEG
#include "SegmenterManager.h"
#include "Segmenter.h"
#include "tokenizer_zhcn.h"
////////////////////////////////////////////////////////////
CSphTokenizer_zh_CN_UTF8_Private::CSphTokenizer_zh_CN_UTF8_Private()
:m_seg(NULL), m_mgr(NULL)
#if USE_LIBICONV
, m_iconv(NULL), m_iconv_out(NULL)
#endif
{
if(!m_lower)
m_lower = css::ToLower::Get();
if(!m_tagger)
m_tagger = css::ChineseCharTagger::Get();
}
css::Segmenter* CSphTokenizer_zh_CN_UTF8_Private::GetSegmenter(const char* dict_path)
{
int nRet = 0;
if(!m_mgr) {
m_mgr = SegmenterManagerSingleInstance::Get();
if(dict_path)
nRet = m_mgr->init(dict_path);
}
if(nRet == 0 && !m_seg)
m_seg = m_mgr->getSegmenter(false);
return m_seg;
}
css::ToLowerImpl* CSphTokenizer_zh_CN_UTF8_Private::m_lower = NULL;
css::ChineseCharTaggerImpl* CSphTokenizer_zh_CN_UTF8_Private::m_tagger = NULL;
#endif