blob: 016c6c7148afe5037555c0d726e4967e4b7a3e7e [file] [log] [blame]
// Copyright 2008 Google Inc.
// Author: Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef OPEN_VCDIFF_ENCODETABLE_H_
#define OPEN_VCDIFF_ENCODETABLE_H_
#include <config.h>
#include <stddef.h> // size_t
#include <stdint.h> // int32_t
#include <string>
#include <vector>
#include "addrcache.h"
#include "checksum.h"
#include "codetable.h"
#include "codetablewriter_interface.h"
namespace open_vcdiff {
class OutputStringInterface;
class VCDiffInstructionMap;
// The method calls after construction *must* conform
// to the following pattern:
// {{Add|Copy|Run}* [AddChecksum] Output}*
//
// When Output has been called in this sequence, a complete target window
// (as defined in RFC 3284 section 4.3) will have been appended to
// out (unless no calls to Add, Run, or Copy were made, in which
// case Output will do nothing.) The output will not be available for use
// until after each call to Output().
//
// NOT threadsafe.
//
class VCDiffCodeTableWriter : public CodeTableWriterInterface {
public:
// This constructor uses the default code table.
// If interleaved is true, the encoder writes each delta file window
// by interleaving instructions and sizes with their corresponding
// addresses and data, rather than placing these elements into three
// separate sections. This facilitates providing partially
// decoded results when only a portion of a delta file window
// is received (e.g. when HTTP over TCP is used as the
// transmission protocol.) The interleaved format is
// not consistent with the VCDIFF draft standard.
//
explicit VCDiffCodeTableWriter(bool interleaved);
// Uses a non-standard code table and non-standard cache sizes. The caller
// must guarantee that code_table_data remains allocated for the lifetime of
// the VCDiffCodeTableWriter object. Note that this is different from how
// VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given
// encoder will use either the default code table or a statically-defined
// non-standard code table, whereas the decoder must have the ability to read
// an arbitrary non-standard code table from a delta file and discard it once
// the file has been decoded.
//
VCDiffCodeTableWriter(bool interleaved,
int near_cache_size,
int same_cache_size,
const VCDiffCodeTableData& code_table_data,
unsigned char max_mode);
virtual ~VCDiffCodeTableWriter();
// Initializes the constructed object for use.
// This method must be called after a VCDiffCodeTableWriter is constructed
// and before any of its other methods can be called. It will return
// false if there was an error initializing the object, or true if it
// was successful. After the object has been initialized and used,
// Init() can be called again to restore the initial state of the object.
//
bool Init(size_t dictionary_size);
virtual size_t target_length() const { return target_length_; }
// Encode an ADD opcode with the "size" bytes starting at data
virtual void Add(const char* data, size_t size);
// Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
virtual void Copy(int32_t offset, size_t size);
// Encode a RUN opcode for "size" copies of the value "byte".
virtual void Run(size_t size, unsigned char byte);
void AddChecksum(VCDChecksum checksum) {
add_checksum_ = true;
checksum_ = checksum;
}
// Finishes encoding and appends the encoded delta window to the output
// string. The output string is not null-terminated and may contain embedded
// '\0' characters.
virtual void Output(OutputStringInterface* out);
const std::vector<int>& match_counts() const { return match_counts_; }
private:
typedef std::string string;
// This is an estimate of the longest match size the encoder expects to find.
// It is used to determine the initial size of the vector match_counts_.
// If it is too large, then some space will be wasted on vector elements
// that are not used. If it is too small, then some time will be wasted
// expanding match_counts_ to accommodate larger match sizes.
static const size_t kMaxMatchSize = 2000;
// The maximum value for the mode of a COPY instruction.
const unsigned char max_mode_;
// If interleaved is true, sets data_for_add_and_run_ and
// addresses_for_copy_ to point at instructions_and_sizes_,
// so that instructions, sizes, addresses and data will be
// combined into a single interleaved stream.
// If interleaved is false, sets data_for_add_and_run_ and
// addresses_for_copy_ to point at their corresponding
// separate_... strings, so that the three sections will
// be generated separately from one another.
//
void InitSectionPointers(bool interleaved);
// Determines the best opcode to encode an instruction, and appends
// or substitutes that opcode and its size into the
// instructions_and_sizes_ string.
//
void EncodeInstruction(VCDiffInstructionType inst,
size_t size,
unsigned char mode);
void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
return EncodeInstruction(inst, size, 0);
}
// Calculates the number of bytes needed to store the given size value as a
// variable-length integer (VarintBE).
static size_t CalculateLengthOfSizeAsVarint(size_t size);
// Appends the size value to the string as a variable-length integer.
static void AppendSizeToString(size_t size, string* out);
// Appends the size value to the output string as a variable-length integer.
static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
// Calculates the "Length of the delta encoding" field for the delta window
// header, based on the sizes of the sections and of the other header
// elements.
size_t CalculateLengthOfTheDeltaEncoding() const;
// None of the following 'string' objects are null-terminated.
// A series of instruction opcodes, each of which may be followed
// by one or two Varint values representing the size parameters
// of the first and second instruction in the opcode.
string instructions_and_sizes_;
// A series of data arguments (byte values) used for ADD and RUN
// instructions. Depending on whether interleaved output is used
// for streaming or not, the pointer may point to
// separate_data_for_add_and_run_ or to instructions_and_sizes_.
string *data_for_add_and_run_;
string separate_data_for_add_and_run_;
// A series of Varint addresses used for COPY instructions.
// For the SAME mode, a byte value is stored instead of a Varint.
// Depending on whether interleaved output is used
// for streaming or not, the pointer may point to
// separate_addresses_for_copy_ or to instructions_and_sizes_.
string *addresses_for_copy_;
string separate_addresses_for_copy_;
VCDiffAddressCache address_cache_;
size_t dictionary_size_;
// The number of bytes of target data that has been encoded so far.
// Each time Add(), Copy(), or Run() is called, this will be incremented.
// The target length is used to compute HERE mode addresses
// for COPY instructions, and is also written into the header
// of the delta window when Output() is called.
//
size_t target_length_;
const VCDiffCodeTableData* code_table_data_;
// The instruction map facilitates finding an opcode quickly given an
// instruction inst, size, and mode. This is an alternate representation
// of the same information that is found in code_table_data_.
//
const VCDiffInstructionMap* instruction_map_;
// The zero-based index within instructions_and_sizes_ of the byte
// that contains the last single-instruction opcode generated by
// EncodeInstruction(). (See that function for exhaustive details.)
// It is necessary to use an index rather than a pointer for this value
// because instructions_and_sizes_ may be resized, which would invalidate
// any pointers into its data buffer. The value -1 is reserved to mean that
// either no opcodes have been generated yet, or else the last opcode
// generated was a double-instruction opcode.
//
int last_opcode_index_;
// If true, an Adler32 checksum of the target window data will be written as
// a variable-length integer, just after the size of the addresses section.
//
bool add_checksum_;
// The checksum to be written to the current target window,
// if add_checksum_ is true.
// This will not be calculated based on the individual calls to Add(), Run(),
// and Copy(), which would be unnecessarily expensive. Instead, the code
// that uses the VCDiffCodeTableWriter object is expected to calculate
// the checksum all at once and to call AddChecksum() with that value.
// Must be called sometime before calling Output(), though it can be called
// either before or after the calls to Add(), Run(), and Copy().
//
VCDChecksum checksum_;
// The value of match_counts_[n] is equal to the number of matches
// of length n (that is, COPY instructions of size n) found so far.
std::vector<int> match_counts_;
// Making these private avoids implicit copy constructor & assignment operator
VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT
void operator=(const VCDiffCodeTableWriter&);
};
}; // namespace open_vcdiff
#endif // OPEN_VCDIFF_ENCODETABLE_H_