| // Copyright 2007 Google Inc. |
| // Author: Lincoln Smith |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Classes to implement an Encoder for the format described in |
| // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. |
| // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
| // |
| // The RFC describes the possibility of using a secondary compressor |
| // to further reduce the size of each section of the VCDIFF output. |
| // That feature is not supported in this implementation of the encoder |
| // and decoder. |
| // No secondary compressor types have been publicly registered with |
| // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids |
| // in the more than five years since the registry was created, so there |
| // is no standard set of compressor IDs which would be generated by other |
| // encoders or accepted by other decoders. |
| |
| #include <config.h> |
| #include "google/vcencoder.h" |
| #include <vector> |
| #include "checksum.h" |
| #include "encodetable.h" |
| #include "logging.h" |
| #include "google/output_string.h" |
| #include "vcdiffengine.h" |
| |
| namespace open_vcdiff { |
| |
| HashedDictionary::HashedDictionary(const char* dictionary_contents, |
| size_t dictionary_size) |
| : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { } |
| |
| HashedDictionary::~HashedDictionary() { delete engine_; } |
| |
| bool HashedDictionary::Init() { |
| return const_cast<VCDiffEngine*>(engine_)->Init(); |
| } |
| |
| class VCDiffStreamingEncoderImpl { |
| public: |
| VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary, |
| VCDiffFormatExtensionFlags format_extensions, |
| bool look_for_target_matches); |
| |
| // These functions are identical to their counterparts |
| // in VCDiffStreamingEncoder. |
| bool StartEncoding(OutputStringInterface* out); |
| |
| bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out); |
| |
| bool FinishEncoding(OutputStringInterface* out); |
| |
| const std::vector<int>& match_counts() const { |
| return coder_.match_counts(); |
| } |
| |
| private: |
| // Write the header (as defined in section 4.1 of the RFC) to *output. |
| // This includes information that can be gathered |
| // before the first chunk of input is available. |
| void WriteHeader(OutputStringInterface* output) const; |
| |
| const VCDiffEngine* engine_; |
| |
| // This implementation of the encoder uses the default |
| // code table. A VCDiffCodeTableWriter could also be constructed |
| // using a custom code table. |
| VCDiffCodeTableWriter coder_; |
| |
| const VCDiffFormatExtensionFlags format_extensions_; |
| |
| // Determines whether to look for matches within the previously encoded |
| // target data, or just within the source (dictionary) data. Please see |
| // vcencoder.h for a full explanation of this parameter. |
| const bool look_for_target_matches_; |
| |
| // This state variable is used to ensure that StartEncoding(), EncodeChunk(), |
| // and FinishEncoding() are called in the correct order. It will be true |
| // if StartEncoding() has been called, followed by zero or more calls to |
| // EncodeChunk(), but FinishEncoding() has not yet been called. It will |
| // be false initially, and also after FinishEncoding() has been called. |
| bool encode_chunk_allowed_; |
| |
| // Making these private avoids implicit copy constructor & assignment operator |
| VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT |
| void operator=(const VCDiffStreamingEncoderImpl&); |
| }; |
| |
| inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl( |
| const HashedDictionary* dictionary, |
| VCDiffFormatExtensionFlags format_extensions, |
| bool look_for_target_matches) |
| : engine_(dictionary->engine()), |
| coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0), |
| format_extensions_(format_extensions), |
| look_for_target_matches_(look_for_target_matches), |
| encode_chunk_allowed_(false) { } |
| |
| inline void VCDiffStreamingEncoderImpl::WriteHeader( |
| OutputStringInterface* output) const { |
| DeltaFileHeader header_data = { |
| 0xD6, // Header1: "V" | 0x80 |
| 0xC3, // Header2: "C" | 0x80 |
| 0xC4, // Header3: "D" | 0x80 |
| 0x00, // Header4: Draft standard format |
| 0x00 }; // Hdr_Indicator: |
| // No compression, no custom code table |
| if (format_extensions_ != VCD_STANDARD_FORMAT) { |
| header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used |
| } |
| output->append(reinterpret_cast<const char*>(&header_data), |
| sizeof(header_data)); |
| // If custom cache table sizes or a custom code table were used |
| // for encoding, here is where they would be appended to *output. |
| // This implementation of the encoder does not use those features, |
| // although the decoder can understand and interpret them. |
| } |
| |
| inline bool VCDiffStreamingEncoderImpl::StartEncoding( |
| OutputStringInterface* out) { |
| if (!coder_.Init(engine_->dictionary_size())) { |
| LOG(DFATAL) << "Internal error: " |
| "Initialization of code table writer failed" << LOG_ENDL; |
| return false; |
| } |
| WriteHeader(out); |
| encode_chunk_allowed_ = true; |
| return true; |
| } |
| |
| inline bool VCDiffStreamingEncoderImpl::EncodeChunk( |
| const char* data, |
| size_t len, |
| OutputStringInterface* out) { |
| if (!encode_chunk_allowed_) { |
| LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL; |
| return false; |
| } |
| if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) { |
| coder_.AddChecksum(ComputeAdler32(data, len)); |
| } |
| engine_->Encode(data, len, look_for_target_matches_, out, &coder_); |
| return true; |
| } |
| |
| inline bool VCDiffStreamingEncoderImpl::FinishEncoding( |
| OutputStringInterface* /*out*/) { |
| if (!encode_chunk_allowed_) { |
| LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL; |
| return false; |
| } |
| encode_chunk_allowed_ = false; |
| // There should not be any need to output more data |
| // since EncodeChunk() encodes a complete target window |
| // and there is no end-of-delta-file marker. |
| return true; |
| } |
| |
| VCDiffStreamingEncoder::VCDiffStreamingEncoder( |
| const HashedDictionary* dictionary, |
| VCDiffFormatExtensionFlags format_extensions, |
| bool look_for_target_matches) |
| : impl_(new VCDiffStreamingEncoderImpl(dictionary, |
| format_extensions, |
| look_for_target_matches)) { } |
| |
| VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; } |
| |
| bool VCDiffStreamingEncoder::StartEncodingToInterface( |
| OutputStringInterface* out) { |
| return impl_->StartEncoding(out); |
| } |
| |
| bool VCDiffStreamingEncoder::EncodeChunkToInterface( |
| const char* data, |
| size_t len, |
| OutputStringInterface* out) { |
| return impl_->EncodeChunk(data, len, out); |
| } |
| |
| bool VCDiffStreamingEncoder::FinishEncodingToInterface( |
| OutputStringInterface* out) { |
| return impl_->FinishEncoding(out); |
| } |
| |
| void VCDiffStreamingEncoder::GetMatchCounts( |
| std::vector<int>* match_counts) const { |
| if (!match_counts) { |
| LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL; |
| return; |
| } |
| *match_counts = impl_->match_counts(); |
| } |
| |
| bool VCDiffEncoder::EncodeToInterface(const char* target_data, |
| size_t target_len, |
| OutputStringInterface* out) { |
| out->clear(); |
| if (!encoder_) { |
| if (!dictionary_.Init()) { |
| LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL; |
| return false; |
| } |
| encoder_ = new VCDiffStreamingEncoder(&dictionary_, |
| flags_, |
| look_for_target_matches_); |
| } |
| if (!encoder_->StartEncodingToInterface(out)) { |
| return false; |
| } |
| if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) { |
| return false; |
| } |
| return encoder_->FinishEncodingToInterface(out); |
| } |
| |
| } // namespace open_vcdiff |