blob: c69fc28df52864f081df4eb1aa07c8e1751f7be8 [file] [log] [blame]
// Copyright 2006 Google Inc.
// Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef OPEN_VCDIFF_VCDIFFENGINE_H_
#define OPEN_VCDIFF_VCDIFFENGINE_H_
#include <config.h>
#include <stddef.h> // size_t
#include <stdint.h> // uint32_t
namespace open_vcdiff {
class BlockHash;
class OutputStringInterface;
class CodeTableWriterInterface;
// The VCDiffEngine class is used to find the optimal encoding (in terms of COPY
// and ADD instructions) for a given dictionary and target window. To write the
// instructions for this encoding, it calls the Copy() and Add() methods of the
// code table writer object which is passed as an argument to Encode().
class VCDiffEngine {
public:
// The minimum size of a string match that is worth putting into a COPY
// instruction. Since this value is more than twice the block size, the
// encoder will always discover a match of this size, no matter whether it is
// aligned on block boundaries in the dictionary text.
static const size_t kMinimumMatchSize = 32;
VCDiffEngine(const char* dictionary, size_t dictionary_size);
~VCDiffEngine();
// Initializes the object before use.
// This method must be called after constructing a VCDiffEngine object,
// and before any other method may be called. It should not be called
// twice on the same object.
// Returns true if initialization succeeded, or false if an error occurred,
// in which case no other method except the destructor may then be used
// on the object.
// The Init() method is the only one allowed to treat hashed_dictionary_
// as non-const.
bool Init();
size_t dictionary_size() const { return dictionary_size_; }
// Main worker function. Finds the best matches between the dictionary
// (source) and target data, and uses the coder to write a
// delta file window into *diff.
// Because it is a const function, many threads
// can call Encode() at once for the same VCDiffEngine object.
// All thread-specific data will be stored in the coder and diff arguments.
// The coder object must have been fully initialized (by calling its Init()
// method, if any) before calling this function.
//
// look_for_target_matches determines whether to look for matches
// within the previously encoded target data, or just within the source
// (dictionary) data. Please see vcencoder.h for a full explanation
// of this parameter.
void Encode(const char* target_data,
size_t target_size,
bool look_for_target_matches,
OutputStringInterface* diff,
CodeTableWriterInterface* coder) const;
private:
static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) {
return size >= kMinimumMatchSize;
}
// The following two functions use templates to produce two different
// versions of the code depending on the value of the option
// look_for_target_matches. This approach saves a test-and-branch instruction
// within the inner loop of EncodeCopyForBestMatch.
template<bool look_for_target_matches>
void EncodeInternal(const char* target_data,
size_t target_size,
OutputStringInterface* diff,
CodeTableWriterInterface* coder) const;
// If look_for_target_matches is true, then target_hash must point to a valid
// BlockHash object, and cannot be NULL. If look_for_target_matches is
// false, then the value of target_hash is ignored.
template<bool look_for_target_matches>
size_t EncodeCopyForBestMatch(uint32_t hash_value,
const char* target_candidate_start,
const char* unencoded_target_start,
size_t unencoded_target_size,
const BlockHash* target_hash,
CodeTableWriterInterface* coder) const;
void AddUnmatchedRemainder(const char* unencoded_target_start,
size_t unencoded_target_size,
CodeTableWriterInterface* coder) const;
void FinishEncoding(size_t target_size,
OutputStringInterface* diff,
CodeTableWriterInterface* coder) const;
const char* dictionary_; // A copy of the dictionary contents
const size_t dictionary_size_;
// A hash that contains one element for every kBlockSize bytes of dictionary_.
// This can be reused to encode many different target strings using the
// same dictionary, without the need to compute the hash values each time.
const BlockHash* hashed_dictionary_;
// Making these private avoids implicit copy constructor & assignment operator
VCDiffEngine(const VCDiffEngine&);
void operator=(const VCDiffEngine&);
};
} // namespace open_vcdiff
#endif // OPEN_VCDIFF_VCDIFFENGINE_H_