25 _data_adj(
new data_file),
26 _data_adv(
new data_file),
27 _data_noun(
new data_file),
28 _data_verb(
new verb_data_file),
29 _index_adj(
new index_file),
30 _index_adv(
new index_file),
31 _index_noun(
new index_file),
32 _index_verb(
new index_file)
34 auto make_path = [&](
const char * sAddend){
39 auto t1 = std::async(std::launch::async, [&]() {
40 return _data_adj->load(make_path(
"data.adj"));
42 auto t2 = std::async(std::launch::async, [&]() {
43 return _data_adv->load(make_path(
"data.adv"));
45 auto t3 = std::async(std::launch::async, [&]() {
46 return _data_noun->load(make_path(
"data.noun"));
48 auto t4 = std::async(std::launch::async, [&]() {
49 return _data_verb->load(make_path(
"data.verb"));
51 auto t5 = std::async(std::launch::async, [&]() {
52 return _index_adj->load(make_path(
"index.adj"));
54 auto t6 = std::async(std::launch::async, [&]() {
55 return _index_adv->load(make_path(
"index.adv"));
57 auto t7 = std::async(std::launch::async, [&]() {
58 return _index_noun->load(make_path(
"index.noun"));
60 auto t8 = std::async(std::launch::async, [&]() {
61 return _index_verb->load(make_path(
"index.verb"));
79 template <
typename _RecordT,
typename _ContainerT>
bool load(
const xtd::filesystem::path& oPath, _ContainerT& oRecords){
80 std::ifstream in(oPath);
81 in.exceptions(std::ios::badbit | std::ios::failbit);
82 xtd::string sFile((std::istreambuf_iterator<char>(in)), (std::istreambuf_iterator<char>()));
84 for (; i < sFile.size(); ++i){
85 if (
' ' == sFile[i] &&
' ' == sFile[1 + i]){
86 for (;
'\n' != sFile[i] && i < sFile.size(); ++i);
91 for (; i < sFile.size(); ++i){
93 if (!oRecord.load(sFile, i)){
96 oRecords.insert(std::make_pair(oRecord.synset_offset, oRecord));
102 struct index_file : file{
104 using pointer = std::shared_ptr<index_file>;
107 enum SyntacticCategory :
char{
113 using vector = std::vector<record>;
114 using map = std::map<uint32_t, record>;
115 std::vector<std::string> ptr_symbol;
118 SyntacticCategory pos;
119 uint32_t synset_offset, synset_cnt, sense_cnt, tagsense_cnt;
122 std::stringstream oSS;
123 auto x = sz.find(
'\n', i);
124 xtd::string spos, ssynset_offset, p_cnt, ssynset_cnt, ssense_cnt, stagsense_cnt, sLine(&sz[i], &sz[x]);
126 oSS >> lemma >> spos >> ssynset_cnt >> p_cnt;
127 pos =
static_cast<SyntacticCategory
>(spos[0]);
128 synset_cnt = atoi(ssynset_cnt.c_str());
129 for (
auto t = atoi(p_cnt.c_str()); t; --t){
132 ptr_symbol.push_back(sTemp);
134 oSS >> ssense_cnt >> stagsense_cnt >> ssynset_offset;
135 sense_cnt = atoi(ssense_cnt.c_str());
136 tagsense_cnt = atoi(stagsense_cnt.c_str());
137 synset_offset = atoi(ssynset_offset.c_str());
138 lemma.replace({
'_' },
' ');
147 return file::load<record>(oPath, records);
153 struct data_file : file{
154 using pointer = std::shared_ptr<data_file>;
155 enum SynsetType :
char{
164 using vector = std::vector<record>;
165 using map = std::map<uint32_t, record>;
168 using vector = std::vector<word_index>;
174 enum SyntacticCategory :
char{
180 SyntacticCategory pos;
182 uint32_t synset_offset;
183 using vector = std::vector<ptr>;
185 : pointer_symbol(spointer_symbol), synset_offset(atoi(ssynset_offset.c_str())), pos(spos), source_target(ssource_target){}
190 for (;
'\n' != sFile[iEnd] && iEnd < sFile.size(); ++iEnd);
194 synset_offset = atoi(oItems[x++].c_str());
195 lex_filenum = atoi(oItems[x++].c_str());
196 ss_type =
static_cast<SynsetType
>(oItems[x++][0]);
198 for (
auto t = atoi(w_cnt.c_str()); t; --t){
199 auto p1 = oItems[x++];
200 auto p2 = oItems[x++];
201 p1.replace({
'_' },
' ');
202 p2.replace({
'_' },
' ');
203 words.emplace_back(p1, p1);
206 for (
auto t = atoi(p_cnt.c_str()); t; --t){
207 auto p1 = oItems[x++];
208 auto p2 = oItems[x++];
209 auto p3 =
static_cast<ptr::SyntacticCategory
>(oItems[x++][0]);
210 auto p4 = oItems[x++];
211 pointers.emplace_back(p1, p2, p3, p4);
213 for (;
'|' != sFile[i] && i < iEnd; ++i);
219 uint32_t synset_offset, lex_filenum;
222 word_index::vector words;
223 ptr::vector pointers;
228 return file::load<record>(oPath, records);
237 struct verb_data_file : data_file{
239 using pointer = std::shared_ptr<verb_data_file>;
241 struct record : data_file::record{
243 struct generic_frame{
244 using vector = std::vector<generic_frame>;
251 for (;
'\n' != sFile[iEnd] && iEnd < sFile.size(); ++iEnd);
255 synset_offset = atoi(oItems[x++].c_str());
256 lex_filenum = atoi(oItems[x++].c_str());
257 ss_type =
static_cast<SynsetType
>(oItems[x++][0]);
259 for (
auto t = atoi(w_cnt.c_str()); t; --t){
260 auto p1 = oItems[x++];
261 auto p2 = oItems[x++];
262 p1.replace({
'_' },
' ');
263 p2.replace({
'_' },
' ');
264 words.emplace_back(p1, p2);
267 for (
auto t = atoi(p_cnt.c_str()); t; --t){
268 auto p1 = oItems[x++];
269 auto p2 = oItems[x++];
270 auto p3 =
static_cast<ptr::SyntacticCategory
>(oItems[x++][0]);
271 auto p4 = oItems[x++];
272 pointers.emplace_back(p1, p2, p3, p4);
275 for (
auto t = atoi(f_cnt.c_str()); t; --t){
276 auto p1 = oItems[x++];
277 auto p2 = oItems[x++];
278 auto p3 = oItems[x++];
279 generic_frames.emplace_back(p1, p2, p3);
281 for (;
'|' != sFile[i] && i < iEnd; ++i);
288 generic_frame::vector generic_frames;
293 return file::load<record>(oPath, records);
300 data_file::pointer _data_adj;
301 data_file::pointer _data_adv;
302 data_file::pointer _data_noun;
303 verb_data_file::pointer _data_verb;
304 index_file::pointer _index_adj;
305 index_file::pointer _index_adv;
306 index_file::pointer _index_noun;
307 index_file::pointer _index_verb;
std::vector< xstring< _ChT > > split(const std::initializer_list< _ChT > &delimiters, bool trimEmpty=false) const
splits the string by the specified delmiters into constituent elements
host, target and build configurations and settings Various components are purpose built for specific ...
handle necessary filesystem and path functionality until C++17 is finalized