| // Copyright 2008 Google Inc. |
| // Author: Lincoln Smith |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef OPEN_VCDIFF_ENCODETABLE_H_ |
| #define OPEN_VCDIFF_ENCODETABLE_H_ |
| |
| #include <config.h> |
| #include <stddef.h> // size_t |
| #include <stdint.h> // int32_t |
| #include <string> |
| #include <vector> |
| #include "addrcache.h" |
| #include "checksum.h" |
| #include "codetable.h" |
| #include "codetablewriter_interface.h" |
| |
| namespace open_vcdiff { |
| |
| class OutputStringInterface; |
| class VCDiffInstructionMap; |
| |
| // The method calls after construction *must* conform |
| // to the following pattern: |
| // {{Add|Copy|Run}* [AddChecksum] Output}* |
| // |
| // When Output has been called in this sequence, a complete target window |
| // (as defined in RFC 3284 section 4.3) will have been appended to |
| // out (unless no calls to Add, Run, or Copy were made, in which |
| // case Output will do nothing.) The output will not be available for use |
| // until after each call to Output(). |
| // |
| // NOT threadsafe. |
| // |
| class VCDiffCodeTableWriter : public CodeTableWriterInterface { |
| public: |
| // This constructor uses the default code table. |
| // If interleaved is true, the encoder writes each delta file window |
| // by interleaving instructions and sizes with their corresponding |
| // addresses and data, rather than placing these elements into three |
| // separate sections. This facilitates providing partially |
| // decoded results when only a portion of a delta file window |
| // is received (e.g. when HTTP over TCP is used as the |
| // transmission protocol.) The interleaved format is |
| // not consistent with the VCDIFF draft standard. |
| // |
| explicit VCDiffCodeTableWriter(bool interleaved); |
| |
| // Uses a non-standard code table and non-standard cache sizes. The caller |
| // must guarantee that code_table_data remains allocated for the lifetime of |
| // the VCDiffCodeTableWriter object. Note that this is different from how |
| // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given |
| // encoder will use either the default code table or a statically-defined |
| // non-standard code table, whereas the decoder must have the ability to read |
| // an arbitrary non-standard code table from a delta file and discard it once |
| // the file has been decoded. |
| // |
| VCDiffCodeTableWriter(bool interleaved, |
| int near_cache_size, |
| int same_cache_size, |
| const VCDiffCodeTableData& code_table_data, |
| unsigned char max_mode); |
| |
| virtual ~VCDiffCodeTableWriter(); |
| |
| // Initializes the constructed object for use. |
| // This method must be called after a VCDiffCodeTableWriter is constructed |
| // and before any of its other methods can be called. It will return |
| // false if there was an error initializing the object, or true if it |
| // was successful. After the object has been initialized and used, |
| // Init() can be called again to restore the initial state of the object. |
| // |
| bool Init(size_t dictionary_size); |
| |
| virtual size_t target_length() const { return target_length_; } |
| |
| // Encode an ADD opcode with the "size" bytes starting at data |
| virtual void Add(const char* data, size_t size); |
| |
| // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes. |
| virtual void Copy(int32_t offset, size_t size); |
| |
| // Encode a RUN opcode for "size" copies of the value "byte". |
| virtual void Run(size_t size, unsigned char byte); |
| |
| void AddChecksum(VCDChecksum checksum) { |
| add_checksum_ = true; |
| checksum_ = checksum; |
| } |
| |
| // Finishes encoding and appends the encoded delta window to the output |
| // string. The output string is not null-terminated and may contain embedded |
| // '\0' characters. |
| virtual void Output(OutputStringInterface* out); |
| |
| const std::vector<int>& match_counts() const { return match_counts_; } |
| |
| private: |
| typedef std::string string; |
| |
| // This is an estimate of the longest match size the encoder expects to find. |
| // It is used to determine the initial size of the vector match_counts_. |
| // If it is too large, then some space will be wasted on vector elements |
| // that are not used. If it is too small, then some time will be wasted |
| // expanding match_counts_ to accommodate larger match sizes. |
| static const size_t kMaxMatchSize = 2000; |
| |
| // The maximum value for the mode of a COPY instruction. |
| const unsigned char max_mode_; |
| |
| // If interleaved is true, sets data_for_add_and_run_ and |
| // addresses_for_copy_ to point at instructions_and_sizes_, |
| // so that instructions, sizes, addresses and data will be |
| // combined into a single interleaved stream. |
| // If interleaved is false, sets data_for_add_and_run_ and |
| // addresses_for_copy_ to point at their corresponding |
| // separate_... strings, so that the three sections will |
| // be generated separately from one another. |
| // |
| void InitSectionPointers(bool interleaved); |
| |
| // Determines the best opcode to encode an instruction, and appends |
| // or substitutes that opcode and its size into the |
| // instructions_and_sizes_ string. |
| // |
| void EncodeInstruction(VCDiffInstructionType inst, |
| size_t size, |
| unsigned char mode); |
| |
| void EncodeInstruction(VCDiffInstructionType inst, size_t size) { |
| return EncodeInstruction(inst, size, 0); |
| } |
| |
| // Calculates the number of bytes needed to store the given size value as a |
| // variable-length integer (VarintBE). |
| static size_t CalculateLengthOfSizeAsVarint(size_t size); |
| |
| // Appends the size value to the string as a variable-length integer. |
| static void AppendSizeToString(size_t size, string* out); |
| |
| // Appends the size value to the output string as a variable-length integer. |
| static void AppendSizeToOutputString(size_t size, OutputStringInterface* out); |
| |
| // Calculates the "Length of the delta encoding" field for the delta window |
| // header, based on the sizes of the sections and of the other header |
| // elements. |
| size_t CalculateLengthOfTheDeltaEncoding() const; |
| |
| // None of the following 'string' objects are null-terminated. |
| |
| // A series of instruction opcodes, each of which may be followed |
| // by one or two Varint values representing the size parameters |
| // of the first and second instruction in the opcode. |
| string instructions_and_sizes_; |
| |
| // A series of data arguments (byte values) used for ADD and RUN |
| // instructions. Depending on whether interleaved output is used |
| // for streaming or not, the pointer may point to |
| // separate_data_for_add_and_run_ or to instructions_and_sizes_. |
| string *data_for_add_and_run_; |
| string separate_data_for_add_and_run_; |
| |
| // A series of Varint addresses used for COPY instructions. |
| // For the SAME mode, a byte value is stored instead of a Varint. |
| // Depending on whether interleaved output is used |
| // for streaming or not, the pointer may point to |
| // separate_addresses_for_copy_ or to instructions_and_sizes_. |
| string *addresses_for_copy_; |
| string separate_addresses_for_copy_; |
| |
| VCDiffAddressCache address_cache_; |
| |
| size_t dictionary_size_; |
| |
| // The number of bytes of target data that has been encoded so far. |
| // Each time Add(), Copy(), or Run() is called, this will be incremented. |
| // The target length is used to compute HERE mode addresses |
| // for COPY instructions, and is also written into the header |
| // of the delta window when Output() is called. |
| // |
| size_t target_length_; |
| |
| const VCDiffCodeTableData* code_table_data_; |
| |
| // The instruction map facilitates finding an opcode quickly given an |
| // instruction inst, size, and mode. This is an alternate representation |
| // of the same information that is found in code_table_data_. |
| // |
| const VCDiffInstructionMap* instruction_map_; |
| |
| // The zero-based index within instructions_and_sizes_ of the byte |
| // that contains the last single-instruction opcode generated by |
| // EncodeInstruction(). (See that function for exhaustive details.) |
| // It is necessary to use an index rather than a pointer for this value |
| // because instructions_and_sizes_ may be resized, which would invalidate |
| // any pointers into its data buffer. The value -1 is reserved to mean that |
| // either no opcodes have been generated yet, or else the last opcode |
| // generated was a double-instruction opcode. |
| // |
| int last_opcode_index_; |
| |
| // If true, an Adler32 checksum of the target window data will be written as |
| // a variable-length integer, just after the size of the addresses section. |
| // |
| bool add_checksum_; |
| |
| // The checksum to be written to the current target window, |
| // if add_checksum_ is true. |
| // This will not be calculated based on the individual calls to Add(), Run(), |
| // and Copy(), which would be unnecessarily expensive. Instead, the code |
| // that uses the VCDiffCodeTableWriter object is expected to calculate |
| // the checksum all at once and to call AddChecksum() with that value. |
| // Must be called sometime before calling Output(), though it can be called |
| // either before or after the calls to Add(), Run(), and Copy(). |
| // |
| VCDChecksum checksum_; |
| |
| // The value of match_counts_[n] is equal to the number of matches |
| // of length n (that is, COPY instructions of size n) found so far. |
| std::vector<int> match_counts_; |
| |
| // Making these private avoids implicit copy constructor & assignment operator |
| VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT |
| void operator=(const VCDiffCodeTableWriter&); |
| }; |
| |
| }; // namespace open_vcdiff |
| |
| #endif // OPEN_VCDIFF_ENCODETABLE_H_ |