XTL  0.1
eXtended Template Library
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
moby.hpp
Go to the documentation of this file.
1 
5 #pragma once
6 
7 #include <xtd/xtd.hpp>
8 #include <fstream>
9 #include <xtd/nlp/nlp.hpp>
10 #include <xtd/filesystem.hpp>
11 
12 namespace xtd {
13  namespace nlp {
14  namespace moby {
15 
16  class database {
17 
18  class record {
19  public:
20  using vector = std::vector<record>;
21  using map = std::map<std::string, record>;
22  xtd::string _word;
23  part_of_speech _pos;
24  record(const std::string& sWord) : _word(sWord), _pos(part_of_speech::unknown) {}
25  };
26 
27  record::map records;
28 
29  database() {
30 
31  xtd::filesystem::path oPath(XTD_ASSETS_DIR "/mpos/mobyposi.i");
32  std::ifstream in(oPath.string());
33  in.exceptions(std::ios::badbit | std::ios::failbit);
34  xtd::string sFile((std::istreambuf_iterator<char>(in)), (std::istreambuf_iterator<char>()));
35  for (auto sBegin = sFile.begin(); sBegin < sFile.end(); ++sBegin) {
36  for (; ('\r' == *sBegin || '\n' == *sBegin) && sBegin < sFile.end(); ++sBegin);
37  auto sEnd = sBegin;
38  for (; (char)0xd7 != *sEnd && sEnd < sFile.end(); ++sEnd);
39  record r(std::string(sBegin, sEnd));
40  records.insert(std::make_pair(r._word, r));
41  for (++sEnd; '\r' != *sEnd && '\n' != *sEnd && sEnd < sFile.end(); ++sEnd) {
42  part_of_speech iPOS = part_of_speech::unknown;
43  switch (*sEnd) {
44  case 'N':
45  iPOS = part_of_speech::noun; break;
46  case 'p':
47  iPOS = part_of_speech::noun_plural; break;
48  case 'h':
49  iPOS = part_of_speech::noun_phrase; break;
50  case 'V':
51  iPOS = part_of_speech::verb_participle; break;
52  case 't':
53  iPOS = part_of_speech::verb_transitive; break;
54  case 'i':
55  iPOS = part_of_speech::verb_intransitive; break;
56  case 'A':
57  iPOS = part_of_speech::adj; break;
58  case 'v':
59  iPOS = part_of_speech::adv; break;
60  case 'C':
61  iPOS = part_of_speech::conjunction; break;
62  case 'P':
63  iPOS = part_of_speech::preposition; break;
64  case '!':
65  iPOS = part_of_speech::interjection; break;
66  case 'r':
67  iPOS = part_of_speech::pronoun; break;
68  case 'D':
69  iPOS = part_of_speech::definite_article; break;
70  case 'I':
71  iPOS = part_of_speech::indefinite_article; break;
72  case 'o':
73  iPOS = part_of_speech::nominative; break;
74  }
75  r._pos = static_cast<part_of_speech>(static_cast<uint64_t>(iPOS) | static_cast<uint64_t>(r._pos));
76  }
77  sBegin = sEnd;
78  }
79  }
80  public:
81  static database& get() {
82  static database _database;
83  return _database;
84  }
85 
86  nlp::part_of_speech get_pos(const xtd::string& sWord) const {
87  auto oItem = records.find(sWord);
88  if (records.end() == oItem) return part_of_speech::unknown;
89  return oItem->second._pos;
90  }
91 
92  };
93  }
94  }
95 }
host, target and build configurations and settings Various components are purpose built for specific ...
natural language processing
handle necessary filesystem and path functionality until C++17 is finalized