blob: eb8b11e04727214d8a7a11d8f2ab81a2f4899e1f [file] [log] [blame]
#include <algorithm>
#include <functional>
#include <utility>
#include "tail.h"
namespace marisa {
Tail::Tail() : buf_() {}
void Tail::build(const Vector<String> &keys,
Vector<UInt32> *offsets, int mode) {
switch (mode) {
case MARISA_BINARY_TAIL: {
build_binary_tail(keys, offsets);
return;
}
case MARISA_TEXT_TAIL: {
if (!build_text_tail(keys, offsets)) {
build_binary_tail(keys, offsets);
}
return;
}
default: {
MARISA_THROW(MARISA_PARAM_ERROR);
}
}
}
void Tail::mmap(Mapper *mapper, const char *filename,
long offset, int whence) {
if (mapper == NULL) {
MARISA_THROW(MARISA_PARAM_ERROR);
}
Mapper temp_mapper;
temp_mapper.open(filename, offset, whence);
map(temp_mapper);
temp_mapper.swap(mapper);
}
void Tail::map(const void *ptr, std::size_t size) {
Mapper mapper(ptr, size);
map(mapper);
}
void Tail::map(Mapper &mapper) {
Tail temp;
temp.buf_.map(mapper);
temp.swap(this);
}
void Tail::load(const char *filename, long offset, int whence) {
Reader reader;
reader.open(filename, offset, whence);
read(reader);
}
void Tail::fread(::FILE *file) {
Reader reader(file);
read(reader);
}
void Tail::read(int fd) {
Reader reader(fd);
read(reader);
}
void Tail::read(std::istream &stream) {
Reader reader(&stream);
read(reader);
}
void Tail::read(Reader &reader) {
Tail temp;
temp.buf_.read(reader);
temp.swap(this);
}
void Tail::save(const char *filename, bool trunc_flag,
long offset, int whence) const {
Writer writer;
writer.open(filename, trunc_flag, offset, whence);
write(writer);
}
void Tail::fwrite(::FILE *file) const {
Writer writer(file);
write(writer);
}
void Tail::write(int fd) const {
Writer writer(fd);
write(writer);
}
void Tail::write(std::ostream &stream) const {
Writer writer(&stream);
write(writer);
}
void Tail::write(Writer &writer) const {
buf_.write(writer);
}
void Tail::clear() {
Tail().swap(this);
}
void Tail::swap(Tail *rhs) {
buf_.swap(&rhs->buf_);
}
void Tail::build_binary_tail(const Vector<String> &keys,
Vector<UInt32> *offsets) {
if (keys.empty()) {
build_empty_tail(offsets);
return;
}
Vector<UInt8> buf;
buf.push_back('\0');
Vector<UInt32> temp_offsets;
temp_offsets.resize(keys.size() + 1);
for (std::size_t i = 0; i < keys.size(); ++i) {
temp_offsets[i] = (UInt32)buf.size();
for (std::size_t j = 0; j < keys[i].length(); ++j) {
buf.push_back(keys[i][j]);
}
}
temp_offsets.back() = (UInt32)buf.size();
buf.shrink();
if (offsets != NULL) {
temp_offsets.swap(offsets);
}
buf_.swap(&buf);
}
bool Tail::build_text_tail(const Vector<String> &keys,
Vector<UInt32> *offsets) {
if (keys.empty()) {
build_empty_tail(offsets);
return true;
}
typedef std::pair<RString, UInt32> KeyIdPair;
Vector<KeyIdPair> pairs;
pairs.resize(keys.size());
for (std::size_t i = 0; i < keys.size(); ++i) {
for (std::size_t j = 0; j < keys[i].length(); ++j) {
if (keys[i][j] == '\0') {
return false;
}
}
pairs[i].first = RString(keys[i]);
pairs[i].second = (UInt32)i;
}
std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>());
Vector<UInt8> buf;
buf.push_back('T');
Vector<UInt32> temp_offsets;
temp_offsets.resize(pairs.size(), 1);
const KeyIdPair dummy_key;
const KeyIdPair *last = &dummy_key;
for (std::size_t i = 0; i < pairs.size(); ++i) {
const KeyIdPair &cur = pairs[i];
std::size_t match = 0;
while ((match < cur.first.length()) && (match < last->first.length()) &&
last->first[match] == cur.first[match]) {
++match;
}
if ((match == cur.first.length()) && (last->first.length() != 0)) {
temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second]
+ (last->first.length() - match));
} else {
temp_offsets[cur.second] = (UInt32)buf.size();
for (std::size_t j = 1; j <= cur.first.length(); ++j) {
buf.push_back(cur.first[cur.first.length() - j]);
}
buf.push_back('\0');
}
last = &cur;
}
buf.shrink();
if (offsets != NULL) {
temp_offsets.swap(offsets);
}
buf_.swap(&buf);
return true;
}
void Tail::build_empty_tail(Vector<UInt32> *offsets) {
buf_.clear();
if (offsets != NULL) {
offsets->clear();
}
}
} // namespace marisa