| // Copyright 2008 Google Inc. |
| // Author: Lincoln Smith |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <config.h> |
| #include "vcdiffengine.h" |
| #include <string.h> // memset, strlen |
| #include <algorithm> |
| #include <string> |
| #include <vector> |
| #include "addrcache.h" |
| #include "blockhash.h" |
| #include "encodetable.h" |
| #include "google/output_string.h" |
| #include "rolling_hash.h" |
| #include "testing.h" |
| #include "varint_bigendian.h" |
| #include "vcdiff_defs.h" |
| |
| namespace open_vcdiff { |
| |
| namespace { |
| |
| class VCDiffEngineTestBase : public testing::Test { |
| protected: |
| typedef std::string string; |
| |
| // Some common definitions and helper functions used in the various tests |
| // for VCDiffEngine. |
| static const int kBlockSize = VCDiffEngine::kMinimumMatchSize; |
| |
| VCDiffEngineTestBase() : interleaved_(false), |
| diff_output_string_(&diff_), |
| verify_position_(0), |
| saved_total_size_position_(0), |
| saved_delta_encoding_position_(0), |
| saved_section_sizes_position_(0), |
| data_bytes_(0), |
| instruction_bytes_(0), |
| address_bytes_(0) { } |
| |
| virtual ~VCDiffEngineTestBase() { } |
| |
| virtual void TearDown() { |
| VerifyMatchCounts(); |
| } |
| |
| // Copy string_without_spaces into newly allocated result buffer, |
| // but pad its contents with space characters so that every character |
| // in string_without_spaces corresponds to (block_size - 1) |
| // spaces in the result, followed by that character. |
| // For example: |
| // If string_without_spaces begins "The only thing"... and block_size is 4, |
| // then 3 space characters will be inserted |
| // between each letter in the result, as follows: |
| // " T h e o n l y t h i n g"... |
| // This makes testing simpler, because finding a block_size-byte match |
| // between the dictionary and target only depends on the |
| // trailing letter in each block. |
| // If no_initial_padding is true, then the first letter will not have |
| // spaces added in front of it. |
| static void MakeEachLetterABlock(const char* string_without_spaces, |
| const char** result, |
| int block_size, |
| bool no_initial_padding) { |
| const size_t length_without_spaces = strlen(string_without_spaces); |
| char* padded_text = new char[(block_size * length_without_spaces) + 1]; |
| memset(padded_text, ' ', block_size * length_without_spaces); |
| char* padded_text_ptr = padded_text; |
| if (!no_initial_padding) { |
| padded_text_ptr += block_size - 1; |
| } |
| for (size_t i = 0; i < length_without_spaces; ++i) { |
| *padded_text_ptr = string_without_spaces[i]; |
| padded_text_ptr += block_size; |
| } |
| *(padded_text_ptr - block_size + 1) = '\0'; |
| *result = padded_text; |
| } |
| |
| // These functions iterate through the decoded output and expect |
| // simple elements: bytes or variable-length integers. |
| void ExpectByte(char byte) { |
| EXPECT_GT(diff_.size(), verify_position_); |
| EXPECT_EQ(byte, diff_[verify_position_]); |
| ++verify_position_; |
| } |
| |
| size_t ExpectVarint(int32_t expected_value) { |
| EXPECT_GT(diff_.size(), verify_position_); |
| const char* const original_position = &diff_[verify_position_]; |
| const char* new_position = original_position; |
| const size_t expected_length = VarintBE<int32_t>::Length(expected_value); |
| int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), |
| &new_position); |
| EXPECT_LE(0, parsed_value); |
| size_t parsed_length = new_position - original_position; |
| EXPECT_EQ(expected_value, parsed_value); |
| EXPECT_EQ(expected_length, parsed_length); |
| verify_position_ += parsed_length; |
| return parsed_length; |
| } |
| |
| size_t ExpectSize(size_t size) { |
| return ExpectVarint(static_cast<int32_t>(size)); |
| } |
| |
| size_t ExpectStringLength(const char* s) { |
| return ExpectSize(strlen(s)); |
| } |
| |
| void SkipVarint() { |
| EXPECT_GT(diff_.size(), verify_position_); |
| const char* const original_position = &diff_[verify_position_]; |
| const char* new_position = original_position; |
| VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); |
| size_t parsed_length = new_position - original_position; |
| verify_position_ += parsed_length; |
| } |
| |
| void ExpectDataByte(char byte) { |
| ExpectByte(byte); |
| if (interleaved_) { |
| ++instruction_bytes_; |
| } else { |
| ++data_bytes_; |
| } |
| } |
| |
| void ExpectDataString(const char *expected_string) { |
| const char* ptr = expected_string; |
| while (*ptr) { |
| ExpectDataByte(*ptr); |
| ++ptr; |
| } |
| } |
| |
| void ExpectDataStringWithBlockSpacing(const char *expected_string, |
| bool trailing_spaces) { |
| const char* ptr = expected_string; |
| while (*ptr) { |
| for (int i = 0; i < (kBlockSize - 1); ++i) { |
| ExpectDataByte(' '); |
| } |
| ExpectDataByte(*ptr); |
| ++ptr; |
| } |
| if (trailing_spaces) { |
| for (int i = 0; i < (kBlockSize - 1); ++i) { |
| ExpectDataByte(' '); |
| } |
| } |
| } |
| |
| void ExpectInstructionByte(char byte) { |
| ExpectByte(byte); |
| ++instruction_bytes_; |
| } |
| |
| void ExpectInstructionVarint(int32_t value) { |
| instruction_bytes_ += ExpectVarint(value); |
| } |
| |
| void ExpectAddressByte(char byte) { |
| ExpectByte(byte); |
| if (interleaved_) { |
| ++instruction_bytes_; |
| } else { |
| ++address_bytes_; |
| } |
| } |
| |
| void ExpectAddressVarint(int32_t value) { |
| if (interleaved_) { |
| instruction_bytes_ += ExpectVarint(value); |
| } else { |
| address_bytes_ += ExpectVarint(value); |
| } |
| } |
| |
| // The following functions leverage the fact that the encoder uses |
| // the default code table and cache sizes. They are able to search for |
| // instructions of a particular size. The logic for mapping from |
| // instruction type, mode, and size to opcode value is very different here |
| // from the logic used in encodetable.{h,cc}, so hopefully |
| // this version will help validate that the other is correct. |
| // This version uses conditional statements, while encodetable.h |
| // looks up values in a mapping table. |
| void ExpectAddress(int32_t address, int copy_mode) { |
| if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) { |
| ExpectAddressVarint(address); |
| } else { |
| ExpectAddressByte(address); |
| } |
| } |
| |
| void ExpectAddInstruction(int size) { |
| if (size <= 18) { |
| ExpectInstructionByte(0x01 + size); |
| } else { |
| ExpectInstructionByte(0x01); |
| ExpectInstructionVarint(size); |
| } |
| } |
| |
| void ExpectCopyInstruction(int size, int mode) { |
| if ((size >= 4) && (size <= 16)) { |
| ExpectInstructionByte(0x10 + (0x10 * mode) + size); |
| } else { |
| ExpectInstructionByte(0x13 + (0x10 * mode)); |
| ExpectInstructionVarint(size); |
| } |
| ExpectMatch(size); |
| } |
| |
| bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { |
| if (!default_cache_.IsSameMode(copy_mode) && |
| (add_size <= 4) && |
| (copy_size >= 4) && |
| (copy_size <= 6)) { |
| ExpectInstructionByte(0x9C + |
| (0x0C * copy_mode) + |
| (0x03 * add_size) + |
| copy_size); |
| ExpectMatch(copy_size); |
| return true; |
| } else if (default_cache_.IsSameMode(copy_mode) && |
| (add_size <= 4) && |
| (copy_size == 4)) { |
| ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); |
| ExpectMatch(copy_size); |
| return true; |
| } else { |
| ExpectAddInstruction(add_size); |
| return false; |
| } |
| } |
| |
| void ExpectAddInstructionForStringLength(const char* s) { |
| ExpectAddInstruction(static_cast<int>(strlen(s))); |
| } |
| |
| // Call this function before beginning to iterate through the diff string |
| // using the Expect... functions. |
| // text must be NULL-terminated. |
| void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, |
| const char* target_text) { |
| ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) |
| ExpectStringLength(dictionary); |
| ExpectByte(0x00); // Source segment position: start of dictionary |
| saved_total_size_position_ = verify_position_; |
| SkipVarint(); // Length of the delta encoding |
| saved_delta_encoding_position_ = verify_position_; |
| ExpectStringLength(target_text); |
| ExpectByte(0x00); // Delta_indicator (no compression) |
| saved_section_sizes_position_ = verify_position_; |
| SkipVarint(); // length of data for ADDs and RUNs |
| SkipVarint(); // length of instructions section |
| SkipVarint(); // length of addresses for COPYs |
| } |
| |
| // Call this function before beginning to iterating through the entire |
| // diff string using the Expect... functions. It makes sure that the |
| // size totals in the window header match the number of bytes that |
| // were parsed. |
| void VerifySizes() { |
| EXPECT_EQ(verify_position_, diff_.size()); |
| const size_t delta_encoding_size = verify_position_ - |
| saved_delta_encoding_position_; |
| verify_position_ = saved_total_size_position_; |
| ExpectSize(delta_encoding_size); |
| verify_position_ = saved_section_sizes_position_; |
| ExpectSize(data_bytes_); |
| ExpectSize(instruction_bytes_); |
| ExpectSize(address_bytes_); |
| } |
| |
| void ExpectMatch(size_t match_size) { |
| if (match_size >= expected_match_counts_.size()) { |
| // Be generous to avoid resizing again |
| expected_match_counts_.resize(match_size * 2, 0); |
| } |
| ++expected_match_counts_[match_size]; |
| } |
| |
| void VerifyMatchCounts() { |
| EXPECT_TRUE(std::equal(expected_match_counts_.begin(), |
| expected_match_counts_.end(), |
| actual_match_counts_.begin())); |
| } |
| |
| bool interleaved_; |
| string diff_; |
| OutputString<string> diff_output_string_; |
| size_t verify_position_; |
| size_t saved_total_size_position_; |
| size_t saved_delta_encoding_position_; |
| size_t saved_section_sizes_position_; |
| size_t data_bytes_; |
| size_t instruction_bytes_; |
| size_t address_bytes_; |
| VCDiffAddressCache default_cache_; // Used for finding mode values |
| std::vector<int> expected_match_counts_; |
| std::vector<int> actual_match_counts_; |
| }; |
| |
| class VCDiffEngineTest : public VCDiffEngineTestBase { |
| protected: |
| VCDiffEngineTest() : |
| engine_(dictionary_, strlen(dictionary_)) { |
| EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); |
| } |
| |
| virtual ~VCDiffEngineTest() { } |
| |
| |
| static void SetUpTestCase() { |
| MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, |
| kBlockSize, false); |
| MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); |
| } |
| |
| static void TearDownTestCase() { |
| delete[] dictionary_; |
| delete[] target_; |
| } |
| |
| void EncodeNothing(bool interleaved, bool target_matching) { |
| interleaved_ = interleaved; |
| VCDiffCodeTableWriter coder(interleaved); |
| coder.Init(engine_.dictionary_size()); |
| engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); |
| EXPECT_TRUE(diff_.empty()); |
| actual_match_counts_ = coder.match_counts(); |
| } |
| |
| // text must be NULL-terminated |
| void EncodeText(const char* text, bool interleaved, bool target_matching) { |
| interleaved_ = interleaved; |
| VCDiffCodeTableWriter coder(interleaved); |
| coder.Init(engine_.dictionary_size()); |
| engine_.Encode(text, |
| strlen(text), |
| target_matching, |
| &diff_output_string_, |
| &coder); |
| actual_match_counts_ = coder.match_counts(); |
| } |
| |
| void Encode(bool interleaved, bool target_matching) { |
| EncodeText(target_, interleaved, target_matching); |
| VerifyHeader(); |
| } |
| |
| void VerifyHeader() { |
| VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); |
| } |
| |
| static const char dictionary_without_spaces_[]; |
| static const char target_without_spaces_[]; |
| |
| static const char* dictionary_; |
| static const char* target_; |
| |
| const VCDiffEngine engine_; |
| }; |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| typedef VCDiffEngineTest VCDiffEngineDeathTest; |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| const char VCDiffEngineTest::dictionary_without_spaces_[] = |
| "The only thing we have to fear is fear itself"; |
| |
| const char VCDiffEngineTest::target_without_spaces_[] = |
| "What we hear is fearsome"; |
| |
| const char* VCDiffEngineTest::dictionary_ = NULL; |
| const char* VCDiffEngineTest::target_ = NULL; |
| |
| #ifdef GTEST_HAS_DEATH_TEST |
| TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { |
| EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), |
| "twice"); |
| } |
| #endif // GTEST_HAS_DEATH_TEST |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeNothing) { |
| EncodeNothing(/* interleaved = */ false, /* target matching = */ false); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { |
| EncodeNothing(/* interleaved = */ true, /* target matching = */ false); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { |
| EncodeNothing(/* interleaved = */ false, /* target matching = */ true); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { |
| EncodeNothing(/* interleaved = */ true, /* target matching = */ true); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { |
| const char* small_text = " "; |
| EncodeText(small_text, |
| /* interleaved = */ false, |
| /* target matching = */ false); |
| VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); |
| // Data for ADDs |
| ExpectDataString(small_text); |
| // Instructions and sizes |
| ExpectAddInstructionForStringLength(small_text); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { |
| const char* small_text = " "; |
| EncodeText(small_text, |
| /* interleaved = */ true, |
| /* target matching = */ false); |
| VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); |
| // Interleaved section |
| ExpectAddInstructionForStringLength(small_text); |
| ExpectDataString(small_text); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { |
| Encode(/* interleaved = */ false, /* target matching = */ false); |
| // Data for ADDs |
| ExpectDataStringWithBlockSpacing("W", false); |
| ExpectDataByte('t'); |
| ExpectDataByte('s'); |
| ExpectDataByte('m'); |
| // Instructions and sizes |
| if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
| VCD_SELF_MODE)) { |
| ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
| } |
| ExpectAddInstruction(1); |
| ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
| ExpectCopyInstruction(11 * kBlockSize, |
| default_cache_.FirstNearMode()); |
| if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
| ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
| } |
| if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
| ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
| } |
| // Addresses for COPY |
| ExpectAddressVarint(18 * kBlockSize); // "ha" |
| ExpectAddressVarint(14 * kBlockSize); // " we h" |
| ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
| ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
| ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
| VerifySizes(); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { |
| Encode(/* interleaved = */ true, /* target matching = */ false); |
| // Interleaved section |
| if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
| VCD_SELF_MODE)) { |
| ExpectDataStringWithBlockSpacing("W", false); |
| ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
| } else { |
| ExpectDataStringWithBlockSpacing("W", false); |
| } |
| ExpectAddressVarint(18 * kBlockSize); // "ha" |
| ExpectAddInstruction(1); |
| ExpectDataByte('t'); |
| ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
| ExpectAddressVarint(14 * kBlockSize); // " we h" |
| ExpectCopyInstruction(11 * kBlockSize, |
| default_cache_.FirstNearMode()); |
| ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
| if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
| ExpectDataByte('s'); |
| ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
| } else { |
| ExpectDataByte('s'); |
| } |
| ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
| if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
| ExpectDataByte('m'); |
| ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
| } else { |
| ExpectDataByte('m'); |
| } |
| ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
| VerifySizes(); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { |
| Encode(/* interleaved = */ false, /* target matching = */ true); |
| // Data for ADDs |
| ExpectDataStringWithBlockSpacing("W", false); |
| ExpectDataByte('t'); |
| ExpectDataByte('s'); |
| ExpectDataByte('m'); |
| // Instructions and sizes |
| if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
| VCD_SELF_MODE)) { |
| ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
| } |
| ExpectAddInstruction(1); |
| ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
| ExpectCopyInstruction(11 * kBlockSize, |
| default_cache_.FirstNearMode()); |
| if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
| ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
| } |
| if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
| ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
| } |
| // Addresses for COPY |
| ExpectAddressVarint(18 * kBlockSize); // "ha" |
| ExpectAddressVarint(14 * kBlockSize); // " we h" |
| ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
| ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
| ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
| VerifySizes(); |
| } |
| |
| TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { |
| Encode(/* interleaved = */ true, /* target matching = */ false); |
| // Interleaved section |
| if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
| VCD_SELF_MODE)) { |
| ExpectDataStringWithBlockSpacing("W", false); |
| ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
| } else { |
| ExpectDataStringWithBlockSpacing("W", false); |
| } |
| ExpectAddressVarint(18 * kBlockSize); // "ha" |
| ExpectAddInstruction(1); |
| ExpectDataByte('t'); |
| ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
| ExpectAddressVarint(14 * kBlockSize); // " we h" |
| ExpectCopyInstruction(11 * kBlockSize, |
| default_cache_.FirstNearMode()); |
| ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
| if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
| ExpectDataByte('s'); |
| ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
| } else { |
| ExpectDataByte('s'); |
| } |
| ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
| if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
| ExpectDataByte('m'); |
| ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
| } else { |
| ExpectDataByte('m'); |
| } |
| ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
| VerifySizes(); |
| } |
| |
| // This test case takes a dictionary containing several instances of the string |
| // "weasel", and a target string which is identical to the dictionary |
| // except that all instances of "weasel" have been replaced with the string |
| // "moon-pie". It tests that COPY instructions are generated for all |
| // boilerplate text (that is, the text between the "moon-pie" instances in |
| // the target) and, if target matching is enabled, that each instance of |
| // "moon-pie" (except the first one) is encoded using a COPY instruction |
| // rather than an ADD. |
| class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { |
| protected: |
| // kCompressibleTestBlockSize: |
| // The size of the block to create for each letter in the |
| // dictionary and search string for the "compressible text" test. |
| // See MakeEachLetterABlock, below. |
| // If we use kCompressibleTestBlockSize = kBlockSize, then the |
| // encoder will find one match per unique letter in the HTML text. |
| // There are too many examples of "<" in the text for the encoder |
| // to iterate through them all, and some matches are not found. |
| // If we use kCompressibleTextBlockSize = 1, then the boilerplate |
| // text between "weasel" strings in the dictionary and "moon-pie" |
| // strings in the target may not be long enough to be found by |
| // the encoder's block-hash algorithm. A good value, that will give |
| // reproducible results across all block sizes, will be somewhere |
| // in between these extremes. |
| static const int kCompressibleTestBlockSize = kBlockSize / 4; |
| static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; |
| |
| WeaselsToMoonpiesTest() : |
| engine_(dictionary_, strlen(dictionary_)), |
| match_index_(0), |
| search_dictionary_(dictionary_, strlen(dictionary_)), |
| copied_moonpie_address_(0) { |
| EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); |
| weasel_positions_[0] = 0; |
| after_weasel_[0] = 0; |
| moonpie_positions_[0] = 0; |
| after_moonpie_[0] = 0; |
| } |
| |
| virtual ~WeaselsToMoonpiesTest() { } |
| |
| static void SetUpTestCase() { |
| MakeEachLetterABlock(dictionary_without_spaces_, |
| &dictionary_, |
| kCompressibleTestBlockSize, |
| false); |
| MakeEachLetterABlock(target_without_spaces_, |
| &target_, |
| kCompressibleTestBlockSize, |
| false); |
| MakeEachLetterABlock(weasel_text_without_spaces_, |
| &weasel_text_, |
| kCompressibleTestBlockSize, |
| true); |
| MakeEachLetterABlock(moonpie_text_without_spaces_, |
| &moonpie_text_, |
| kCompressibleTestBlockSize, |
| true); |
| } |
| |
| static void TearDownTestCase() { |
| delete[] dictionary_; |
| delete[] target_; |
| delete[] weasel_text_; |
| delete[] moonpie_text_; |
| } |
| |
| // text must be NULL-terminated |
| void EncodeText(const char* text, bool interleaved, bool target_matching) { |
| interleaved_ = interleaved; |
| VCDiffCodeTableWriter coder(interleaved); |
| coder.Init(engine_.dictionary_size()); |
| engine_.Encode(text, |
| strlen(text), |
| target_matching, |
| &diff_output_string_, |
| &coder); |
| actual_match_counts_ = coder.match_counts(); |
| } |
| |
| void Encode(bool interleaved, bool target_matching) { |
| EncodeText(target_, interleaved, target_matching); |
| VerifyHeader(); |
| } |
| |
| void VerifyHeader() { |
| VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); |
| } |
| |
| void ExpectCopyForSize(size_t size, int mode) { |
| ExpectCopyInstruction(static_cast<int>(size), mode); |
| } |
| |
| void ExpectAddForSize(size_t size) { |
| ExpectAddInstruction(static_cast<int>(size)); |
| } |
| |
| void ExpectAddressVarintForSize(size_t value) { |
| ExpectAddressVarint(static_cast<int32_t>(value)); |
| } |
| |
| void FindNextMoonpie(bool include_trailing_spaces) { |
| ++match_index_; |
| SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, |
| AfterLastWeasel())); |
| if (CurrentWeaselPosition() == string::npos) { |
| SetCurrentMoonpiePosition(string::npos); |
| } else { |
| SetCurrentAfterWeaselPosition(CurrentWeaselPosition() |
| + strlen(weasel_text_) |
| + (include_trailing_spaces ? |
| kTrailingSpaces : 0)); |
| SetCurrentMoonpiePosition(AfterLastMoonpie() |
| + CurrentBoilerplateLength()); |
| SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() |
| + strlen(moonpie_text_) |
| + (include_trailing_spaces ? |
| kTrailingSpaces : 0)); |
| } |
| } |
| bool NoMoreMoonpies() const { |
| return CurrentMoonpiePosition() == string::npos; |
| } |
| size_t CurrentWeaselPosition() const { |
| return weasel_positions_[match_index_]; |
| } |
| size_t LastWeaselPosition() const { |
| return weasel_positions_[match_index_ - 1]; |
| } |
| size_t CurrentMoonpiePosition() const { |
| return moonpie_positions_[match_index_]; |
| } |
| size_t LastMoonpiePosition() const { |
| return moonpie_positions_[match_index_ - 1]; |
| } |
| size_t AfterLastWeasel() const { |
| CHECK_GE(match_index_, 1); |
| return after_weasel_[match_index_ - 1]; |
| } |
| size_t AfterPreviousWeasel() const { |
| CHECK_GE(match_index_, 2); |
| return after_weasel_[match_index_ - 2]; |
| } |
| size_t AfterLastMoonpie() const { |
| CHECK_GE(match_index_, 1); |
| return after_moonpie_[match_index_ - 1]; |
| } |
| size_t AfterPreviousMoonpie() const { |
| CHECK_GE(match_index_, 2); |
| return after_moonpie_[match_index_ - 2]; |
| } |
| |
| void SetCurrentWeaselPosition(size_t value) { |
| weasel_positions_[match_index_] = value; |
| } |
| void SetCurrentAfterWeaselPosition(size_t value) { |
| after_weasel_[match_index_] = value; |
| } |
| void SetCurrentMoonpiePosition(size_t value) { |
| moonpie_positions_[match_index_] = value; |
| } |
| void SetCurrentAfterMoonpiePosition(size_t value) { |
| after_moonpie_[match_index_] = value; |
| } |
| |
| // Find the length of the text in between the "weasel" strings in the |
| // compressible dictionary, which is the same as the text between |
| // the "moon-pie" strings in the compressible target. |
| size_t CurrentBoilerplateLength() const { |
| CHECK_GE(match_index_, 1); |
| return CurrentWeaselPosition() - AfterLastWeasel(); |
| } |
| size_t DistanceFromLastWeasel() const { |
| CHECK_GE(match_index_, 1); |
| return CurrentWeaselPosition() - LastWeaselPosition(); |
| } |
| size_t DistanceFromLastMoonpie() const { |
| CHECK_GE(match_index_, 1); |
| return CurrentMoonpiePosition() - LastMoonpiePosition(); |
| } |
| size_t DistanceBetweenLastTwoWeasels() const { |
| CHECK_GE(match_index_, 2); |
| return AfterLastWeasel() - AfterPreviousWeasel(); |
| } |
| size_t DistanceBetweenLastTwoMoonpies() const { |
| CHECK_GE(match_index_, 2); |
| return AfterLastMoonpie() - AfterPreviousMoonpie(); |
| } |
| |
| int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; |
| int UpdateCopyModeForMoonpie(int copy_mode) const; |
| int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; |
| |
| void CopyBoilerplateAndAddMoonpie(int copy_mode); |
| void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); |
| |
| static const char dictionary_without_spaces_[]; |
| static const char target_without_spaces_[]; |
| static const char weasel_text_without_spaces_[]; |
| static const char moonpie_text_without_spaces_[]; |
| |
| static const char* dictionary_; |
| static const char* target_; |
| static const char* weasel_text_; |
| static const char* moonpie_text_; |
| |
| const VCDiffEngine engine_; |
| size_t weasel_positions_[128]; |
| size_t after_weasel_[128]; |
| size_t moonpie_positions_[128]; |
| size_t after_moonpie_[128]; |
| int match_index_; |
| string search_dictionary_; |
| size_t copied_moonpie_address_; |
| }; |
| |
| // Care is taken in the formulation of the dictionary |
| // to ensure that the surrounding letters do not match; for example, |
| // there are not two instances of the string "weasels". Otherwise, |
| // the matching behavior would not be as predictable. |
| const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = |
| "<html>\n" |
| "<head>\n" |
| "<meta content=\"text/html; charset=ISO-8859-1\"\n" |
| "http-equiv=\"content-type\">\n" |
| "<title>All about weasels</title>\n" |
| "</head>\n" |
| "<!-- You will notice that the word \"weasel\" may be replaced" |
| " with something else -->\n" |
| "<body>\n" |
| "<h1>All about the weasel: highly compressible HTML text</h1>" |
| "<ul>\n" |
| "<li>Don\'t look a gift weasel in its mouth.</li>\n" |
| "<li>This item makes sure the next occurrence is found.</li>\n" |
| "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" |
| "</ul>\n" |
| "<br>\n" |
| "</body>\n" |
| "</html>\n"; |
| |
| const char WeaselsToMoonpiesTest::target_without_spaces_[] = |
| "<html>\n" |
| "<head>\n" |
| "<meta content=\"text/html; charset=ISO-8859-1\"\n" |
| "http-equiv=\"content-type\">\n" |
| "<title>All about moon-pies</title>\n" |
| "</head>\n" |
| "<!-- You will notice that the word \"moon-pie\" may be replaced" |
| " with something else -->\n" |
| "<body>\n" |
| "<h1>All about the moon-pie: highly compressible HTML text</h1>" |
| "<ul>\n" |
| "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" |
| "<li>This item makes sure the next occurrence is found.</li>\n" |
| "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" |
| "</ul>\n" |
| "<br>\n" |
| "</body>\n" |
| "</html>\n"; |
| |
| const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; |
| const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; |
| |
| const char* WeaselsToMoonpiesTest::dictionary_ = NULL; |
| const char* WeaselsToMoonpiesTest::target_ = NULL; |
| const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; |
| const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; |
| |
| int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( |
| int copy_mode) const { |
| size_t copy_address = 0; |
| if (default_cache_.IsSelfMode(copy_mode)) { |
| copy_address = AfterLastWeasel(); |
| } else if (default_cache_.IsNearMode(copy_mode)) { |
| copy_address = DistanceBetweenLastTwoWeasels(); |
| } else if (default_cache_.IsSameMode(copy_mode)) { |
| copy_address = AfterLastWeasel() % 256; |
| } |
| return static_cast<int32_t>(copy_address); |
| } |
| |
| int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { |
| if (copy_mode == default_cache_.FirstSameMode()) { |
| return default_cache_.FirstSameMode() |
| + static_cast<int>((copied_moonpie_address_ / 256) % 3); |
| } else { |
| return copy_mode; |
| } |
| } |
| |
| int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( |
| int copy_mode) const { |
| size_t copy_address = 0; |
| if (default_cache_.IsHereMode(copy_mode)) { |
| copy_address = DistanceFromLastMoonpie(); |
| } else if (default_cache_.IsNearMode(copy_mode)) { |
| copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; |
| } else if (default_cache_.IsSameMode(copy_mode)) { |
| copy_address = copied_moonpie_address_ % 256; |
| } |
| return static_cast<int32_t>(copy_address); |
| } |
| |
| // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" |
| // in the encoding. |
| void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { |
| EXPECT_FALSE(NoMoreMoonpies()); |
| ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); |
| ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); |
| ExpectAddInstructionForStringLength(moonpie_text_); |
| ExpectDataString(moonpie_text_); |
| } |
| |
| // Expect one dictionary instance of "weasel" to be replaced with "moon-pie" |
| // in the encoding. The "moon-pie" text will be copied from the previously |
| // encoded target. |
| void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( |
| int copy_mode, |
| int moonpie_copy_mode) { |
| EXPECT_FALSE(NoMoreMoonpies()); |
| ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); |
| ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); |
| moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); |
| ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); |
| ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), |
| moonpie_copy_mode); |
| copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); |
| } |
| |
| TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { |
| Encode(/* interleaved = */ true, /* target matching = */ false); |
| FindNextMoonpie(false); |
| // Expect all five "weasel"s to be replaced with "moon-pie"s |
| CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); |
| FindNextMoonpie(false); |
| CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); |
| FindNextMoonpie(false); |
| CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); |
| FindNextMoonpie(false); |
| CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); |
| FindNextMoonpie(false); |
| CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); |
| FindNextMoonpie(false); |
| EXPECT_TRUE(NoMoreMoonpies()); |
| ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), |
| default_cache_.FirstNearMode()); |
| ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); |
| VerifySizes(); |
| } |
| |
| TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { |
| Encode(/* interleaved = */ true, /* target matching = */ true); |
| // Expect all five "weasel"s to be replaced with "moon-pie"s. |
| // Every "moon-pie" after the first one should be copied from the |
| // previously encoded target text. |
| FindNextMoonpie(false); |
| CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); |
| FindNextMoonpie(true); |
| CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); |
| FindNextMoonpie(true); |
| CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, |
| default_cache_.FirstSameMode()); |
| FindNextMoonpie(true); |
| CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, |
| VCD_HERE_MODE); |
| FindNextMoonpie(true); |
| CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, |
| default_cache_.FirstSameMode()); |
| FindNextMoonpie(true); |
| EXPECT_TRUE(NoMoreMoonpies()); |
| ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), |
| default_cache_.FirstNearMode() + 3); |
| ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); |
| VerifySizes(); |
| } |
| |
| } // anonymous namespace |
| } // namespace open-vcdiff |