| #include <algorithm> |
| #include <functional> |
| #include <utility> |
| |
| #include "tail.h" |
| |
| namespace marisa { |
| |
| Tail::Tail() : buf_() {} |
| |
| void Tail::build(const Vector<String> &keys, |
| Vector<UInt32> *offsets, int mode) { |
| switch (mode) { |
| case MARISA_BINARY_TAIL: { |
| build_binary_tail(keys, offsets); |
| return; |
| } |
| case MARISA_TEXT_TAIL: { |
| if (!build_text_tail(keys, offsets)) { |
| build_binary_tail(keys, offsets); |
| } |
| return; |
| } |
| default: { |
| MARISA_THROW(MARISA_PARAM_ERROR); |
| } |
| } |
| } |
| |
| void Tail::mmap(Mapper *mapper, const char *filename, |
| long offset, int whence) { |
| if (mapper == NULL) { |
| MARISA_THROW(MARISA_PARAM_ERROR); |
| } |
| Mapper temp_mapper; |
| temp_mapper.open(filename, offset, whence); |
| map(temp_mapper); |
| temp_mapper.swap(mapper); |
| } |
| |
| void Tail::map(const void *ptr, std::size_t size) { |
| Mapper mapper(ptr, size); |
| map(mapper); |
| } |
| |
| void Tail::map(Mapper &mapper) { |
| Tail temp; |
| temp.buf_.map(mapper); |
| temp.swap(this); |
| } |
| |
| void Tail::load(const char *filename, long offset, int whence) { |
| Reader reader; |
| reader.open(filename, offset, whence); |
| read(reader); |
| } |
| |
| void Tail::fread(::FILE *file) { |
| Reader reader(file); |
| read(reader); |
| } |
| |
| void Tail::read(int fd) { |
| Reader reader(fd); |
| read(reader); |
| } |
| |
| void Tail::read(std::istream &stream) { |
| Reader reader(&stream); |
| read(reader); |
| } |
| |
| void Tail::read(Reader &reader) { |
| Tail temp; |
| temp.buf_.read(reader); |
| temp.swap(this); |
| } |
| |
| void Tail::save(const char *filename, bool trunc_flag, |
| long offset, int whence) const { |
| Writer writer; |
| writer.open(filename, trunc_flag, offset, whence); |
| write(writer); |
| } |
| |
| void Tail::fwrite(::FILE *file) const { |
| Writer writer(file); |
| write(writer); |
| } |
| |
| void Tail::write(int fd) const { |
| Writer writer(fd); |
| write(writer); |
| } |
| |
| void Tail::write(std::ostream &stream) const { |
| Writer writer(&stream); |
| write(writer); |
| } |
| |
| void Tail::write(Writer &writer) const { |
| buf_.write(writer); |
| } |
| |
| void Tail::clear() { |
| Tail().swap(this); |
| } |
| |
| void Tail::swap(Tail *rhs) { |
| buf_.swap(&rhs->buf_); |
| } |
| |
| void Tail::build_binary_tail(const Vector<String> &keys, |
| Vector<UInt32> *offsets) { |
| if (keys.empty()) { |
| build_empty_tail(offsets); |
| return; |
| } |
| |
| Vector<UInt8> buf; |
| buf.push_back('\0'); |
| |
| Vector<UInt32> temp_offsets; |
| temp_offsets.resize(keys.size() + 1); |
| |
| for (std::size_t i = 0; i < keys.size(); ++i) { |
| temp_offsets[i] = (UInt32)buf.size(); |
| for (std::size_t j = 0; j < keys[i].length(); ++j) { |
| buf.push_back(keys[i][j]); |
| } |
| } |
| temp_offsets.back() = (UInt32)buf.size(); |
| buf.shrink(); |
| |
| if (offsets != NULL) { |
| temp_offsets.swap(offsets); |
| } |
| buf_.swap(&buf); |
| } |
| |
| bool Tail::build_text_tail(const Vector<String> &keys, |
| Vector<UInt32> *offsets) { |
| if (keys.empty()) { |
| build_empty_tail(offsets); |
| return true; |
| } |
| |
| typedef std::pair<RString, UInt32> KeyIdPair; |
| Vector<KeyIdPair> pairs; |
| pairs.resize(keys.size()); |
| for (std::size_t i = 0; i < keys.size(); ++i) { |
| for (std::size_t j = 0; j < keys[i].length(); ++j) { |
| if (keys[i][j] == '\0') { |
| return false; |
| } |
| } |
| pairs[i].first = RString(keys[i]); |
| pairs[i].second = (UInt32)i; |
| } |
| std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>()); |
| |
| Vector<UInt8> buf; |
| buf.push_back('T'); |
| |
| Vector<UInt32> temp_offsets; |
| temp_offsets.resize(pairs.size(), 1); |
| |
| const KeyIdPair dummy_key; |
| const KeyIdPair *last = &dummy_key; |
| for (std::size_t i = 0; i < pairs.size(); ++i) { |
| const KeyIdPair &cur = pairs[i]; |
| std::size_t match = 0; |
| while ((match < cur.first.length()) && (match < last->first.length()) && |
| last->first[match] == cur.first[match]) { |
| ++match; |
| } |
| if ((match == cur.first.length()) && (last->first.length() != 0)) { |
| temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second] |
| + (last->first.length() - match)); |
| } else { |
| temp_offsets[cur.second] = (UInt32)buf.size(); |
| for (std::size_t j = 1; j <= cur.first.length(); ++j) { |
| buf.push_back(cur.first[cur.first.length() - j]); |
| } |
| buf.push_back('\0'); |
| } |
| last = &cur; |
| } |
| buf.shrink(); |
| |
| if (offsets != NULL) { |
| temp_offsets.swap(offsets); |
| } |
| buf_.swap(&buf); |
| return true; |
| } |
| |
| void Tail::build_empty_tail(Vector<UInt32> *offsets) { |
| buf_.clear(); |
| if (offsets != NULL) { |
| offsets->clear(); |
| } |
| } |
| |
| } // namespace marisa |