sdch/open-vcdiff/src/vcencoder.cc - platform/external/chromium - Git at Google

 // Copyright 2007 Google Inc.
 // Author: Lincoln Smith
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 // Classes to implement an Encoder for the format described in
 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
 //
 // The RFC describes the possibility of using a secondary compressor
 // to further reduce the size of each section of the VCDIFF output.
 // That feature is not supported in this implementation of the encoder
 // and decoder.
 // No secondary compressor types have been publicly registered with
 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
 // in the more than five years since the registry was created, so there
 // is no standard set of compressor IDs which would be generated by other
 // encoders or accepted by other decoders.

 #include <config.h>
 #include "google/vcencoder.h"
 #include <vector>
 #include "checksum.h"
 #include "encodetable.h"
 #include "logging.h"
 #include "google/output_string.h"
 #include "vcdiffengine.h"

 namespace open_vcdiff {

 HashedDictionary::HashedDictionary(const char* dictionary_contents,
                                    size_t dictionary_size)
     : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }

 HashedDictionary::~HashedDictionary() { delete engine_; }

 bool HashedDictionary::Init() {
   return const_cast<VCDiffEngine*>(engine_)->Init();
 }

 class VCDiffStreamingEncoderImpl {
  public:
   VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
                              VCDiffFormatExtensionFlags format_extensions,
                              bool look_for_target_matches);

   // These functions are identical to their counterparts
   // in VCDiffStreamingEncoder.
   bool StartEncoding(OutputStringInterface* out);

   bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);

   bool FinishEncoding(OutputStringInterface* out);

   const std::vector<int>& match_counts() const {
     return coder_.match_counts();
   }

  private:
   // Write the header (as defined in section 4.1 of the RFC) to *output.
   // This includes information that can be gathered
   // before the first chunk of input is available.
   void WriteHeader(OutputStringInterface* output) const;

   const VCDiffEngine* engine_;

   // This implementation of the encoder uses the default
   // code table.  A VCDiffCodeTableWriter could also be constructed
   // using a custom code table.
   VCDiffCodeTableWriter coder_;

   const VCDiffFormatExtensionFlags format_extensions_;

   // Determines whether to look for matches within the previously encoded
   // target data, or just within the source (dictionary) data.  Please see
   // vcencoder.h for a full explanation of this parameter.
   const bool look_for_target_matches_;

   // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
   // and FinishEncoding() are called in the correct order.  It will be true
   // if StartEncoding() has been called, followed by zero or more calls to
   // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
   // be false initially, and also after FinishEncoding() has been called.
   bool encode_chunk_allowed_;

   // Making these private avoids implicit copy constructor & assignment operator
   VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
   void operator=(const VCDiffStreamingEncoderImpl&);
 };

 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
     const HashedDictionary* dictionary,
     VCDiffFormatExtensionFlags format_extensions,
     bool look_for_target_matches)
     : engine_(dictionary->engine()),
       coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
       format_extensions_(format_extensions),
       look_for_target_matches_(look_for_target_matches),
       encode_chunk_allowed_(false) { }

 inline void VCDiffStreamingEncoderImpl::WriteHeader(
     OutputStringInterface* output) const {
   DeltaFileHeader header_data = {
     0xD6,  // Header1: "V" | 0x80
     0xC3,  // Header2: "C" | 0x80
     0xC4,  // Header3: "D" | 0x80
     0x00,  // Header4: Draft standard format
     0x00 };  // Hdr_Indicator:
              // No compression, no custom code table
   if (format_extensions_ != VCD_STANDARD_FORMAT) {
     header_data.header4 = 'S';  // Header4: VCDIFF/SDCH, extensions used
   }
   output->append(reinterpret_cast<const char*>(&header_data),
                  sizeof(header_data));
   // If custom cache table sizes or a custom code table were used
   // for encoding, here is where they would be appended to *output.
   // This implementation of the encoder does not use those features,
   // although the decoder can understand and interpret them.
 }

 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
     OutputStringInterface* out) {
   if (!coder_.Init(engine_->dictionary_size())) {
     LOG(DFATAL) << "Internal error: "
                    "Initialization of code table writer failed" << LOG_ENDL;
     return false;
   }
   WriteHeader(out);
   encode_chunk_allowed_ = true;
   return true;
 }

 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
     const char* data,
     size_t len,
     OutputStringInterface* out) {
   if (!encode_chunk_allowed_) {
     LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
     return false;
   }
   if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
     coder_.AddChecksum(ComputeAdler32(data, len));
   }
   engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
   return true;
 }

 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
     OutputStringInterface* /*out*/) {
   if (!encode_chunk_allowed_) {
     LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
     return false;
   }
   encode_chunk_allowed_ = false;
   // There should not be any need to output more data
   // since EncodeChunk() encodes a complete target window
   // and there is no end-of-delta-file marker.
   return true;
 }

 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
     const HashedDictionary* dictionary,
     VCDiffFormatExtensionFlags format_extensions,
     bool look_for_target_matches)
     : impl_(new VCDiffStreamingEncoderImpl(dictionary,
                                            format_extensions,
                                            look_for_target_matches)) { }

 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }

 bool VCDiffStreamingEncoder::StartEncodingToInterface(
     OutputStringInterface* out) {
   return impl_->StartEncoding(out);
 }

 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
     const char* data,
     size_t len,
     OutputStringInterface* out) {
   return impl_->EncodeChunk(data, len, out);
 }

 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
     OutputStringInterface* out) {
   return impl_->FinishEncoding(out);
 }

 void VCDiffStreamingEncoder::GetMatchCounts(
     std::vector<int>* match_counts) const {
   if (!match_counts) {
     LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
     return;
   }
   *match_counts = impl_->match_counts();
 }

 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
                                       size_t target_len,
                                       OutputStringInterface* out) {
   out->clear();
   if (!encoder_) {
     if (!dictionary_.Init()) {
       LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
       return false;
     }
     encoder_ = new VCDiffStreamingEncoder(&dictionary_,
                                           flags_,
                                           look_for_target_matches_);
   }
   if (!encoder_->StartEncodingToInterface(out)) {
     return false;
   }
   if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
     return false;
   }
   return encoder_->FinishEncodingToInterface(out);
 }

 }  // namespace open_vcdiff
	// Copyright 2007 Google Inc.
	// Author: Lincoln Smith
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	// Classes to implement an Encoder for the format described in
	// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
	// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
	//
	// The RFC describes the possibility of using a secondary compressor
	// to further reduce the size of each section of the VCDIFF output.
	// That feature is not supported in this implementation of the encoder
	// and decoder.
	// No secondary compressor types have been publicly registered with
	// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
	// in the more than five years since the registry was created, so there
	// is no standard set of compressor IDs which would be generated by other
	// encoders or accepted by other decoders.

	#include <config.h>
	#include "google/vcencoder.h"
	#include <vector>
	#include "checksum.h"
	#include "encodetable.h"
	#include "logging.h"
	#include "google/output_string.h"
	#include "vcdiffengine.h"

	namespace open_vcdiff {

	HashedDictionary::HashedDictionary(const char* dictionary_contents,
	size_t dictionary_size)
	: engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }

	HashedDictionary::~HashedDictionary() { delete engine_; }

	bool HashedDictionary::Init() {
	return const_cast<VCDiffEngine*>(engine_)->Init();
	}

	class VCDiffStreamingEncoderImpl {
	public:
	VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
	VCDiffFormatExtensionFlags format_extensions,
	bool look_for_target_matches);

	// These functions are identical to their counterparts
	// in VCDiffStreamingEncoder.
	bool StartEncoding(OutputStringInterface* out);

	bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);

	bool FinishEncoding(OutputStringInterface* out);

	const std::vector<int>& match_counts() const {
	return coder_.match_counts();
	}

	private:
	// Write the header (as defined in section 4.1 of the RFC) to *output.
	// This includes information that can be gathered
	// before the first chunk of input is available.
	void WriteHeader(OutputStringInterface* output) const;

	const VCDiffEngine* engine_;

	// This implementation of the encoder uses the default
	// code table. A VCDiffCodeTableWriter could also be constructed
	// using a custom code table.
	VCDiffCodeTableWriter coder_;

	const VCDiffFormatExtensionFlags format_extensions_;

	// Determines whether to look for matches within the previously encoded
	// target data, or just within the source (dictionary) data. Please see
	// vcencoder.h for a full explanation of this parameter.
	const bool look_for_target_matches_;

	// This state variable is used to ensure that StartEncoding(), EncodeChunk(),
	// and FinishEncoding() are called in the correct order. It will be true
	// if StartEncoding() has been called, followed by zero or more calls to
	// EncodeChunk(), but FinishEncoding() has not yet been called. It will
	// be false initially, and also after FinishEncoding() has been called.
	bool encode_chunk_allowed_;

	// Making these private avoids implicit copy constructor & assignment operator
	VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
	void operator=(const VCDiffStreamingEncoderImpl&);
	};

	inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
	const HashedDictionary* dictionary,
	VCDiffFormatExtensionFlags format_extensions,
	bool look_for_target_matches)
	: engine_(dictionary->engine()),
	coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
	format_extensions_(format_extensions),
	look_for_target_matches_(look_for_target_matches),
	encode_chunk_allowed_(false) { }

	inline void VCDiffStreamingEncoderImpl::WriteHeader(
	OutputStringInterface* output) const {
	DeltaFileHeader header_data = {
	0xD6, // Header1: "V" \| 0x80
	0xC3, // Header2: "C" \| 0x80
	0xC4, // Header3: "D" \| 0x80
	0x00, // Header4: Draft standard format
	0x00 }; // Hdr_Indicator:
	// No compression, no custom code table
	if (format_extensions_ != VCD_STANDARD_FORMAT) {
	header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used
	}
	output->append(reinterpret_cast<const char*>(&header_data),
	sizeof(header_data));
	// If custom cache table sizes or a custom code table were used
	// for encoding, here is where they would be appended to *output.
	// This implementation of the encoder does not use those features,
	// although the decoder can understand and interpret them.
	}

	inline bool VCDiffStreamingEncoderImpl::StartEncoding(
	OutputStringInterface* out) {
	if (!coder_.Init(engine_->dictionary_size())) {
	LOG(DFATAL) << "Internal error: "
	"Initialization of code table writer failed" << LOG_ENDL;
	return false;
	}
	WriteHeader(out);
	encode_chunk_allowed_ = true;
	return true;
	}

	inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
	const char* data,
	size_t len,
	OutputStringInterface* out) {
	if (!encode_chunk_allowed_) {
	LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
	return false;
	}
	if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
	coder_.AddChecksum(ComputeAdler32(data, len));
	}
	engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
	return true;
	}

	inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
	OutputStringInterface* /out/) {
	if (!encode_chunk_allowed_) {
	LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
	return false;
	}
	encode_chunk_allowed_ = false;
	// There should not be any need to output more data
	// since EncodeChunk() encodes a complete target window
	// and there is no end-of-delta-file marker.
	return true;
	}

	VCDiffStreamingEncoder::VCDiffStreamingEncoder(
	const HashedDictionary* dictionary,
	VCDiffFormatExtensionFlags format_extensions,
	bool look_for_target_matches)
	: impl_(new VCDiffStreamingEncoderImpl(dictionary,
	format_extensions,
	look_for_target_matches)) { }

	VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }

	bool VCDiffStreamingEncoder::StartEncodingToInterface(
	OutputStringInterface* out) {
	return impl_->StartEncoding(out);
	}

	bool VCDiffStreamingEncoder::EncodeChunkToInterface(
	const char* data,
	size_t len,
	OutputStringInterface* out) {
	return impl_->EncodeChunk(data, len, out);
	}

	bool VCDiffStreamingEncoder::FinishEncodingToInterface(
	OutputStringInterface* out) {
	return impl_->FinishEncoding(out);
	}

	void VCDiffStreamingEncoder::GetMatchCounts(
	std::vector<int>* match_counts) const {
	if (!match_counts) {
	LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
	return;
	}
	*match_counts = impl_->match_counts();
	}

	bool VCDiffEncoder::EncodeToInterface(const char* target_data,
	size_t target_len,
	OutputStringInterface* out) {
	out->clear();
	if (!encoder_) {
	if (!dictionary_.Init()) {
	LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
	return false;
	}
	encoder_ = new VCDiffStreamingEncoder(&dictionary_,
	flags_,
	look_for_target_matches_);
	}
	if (!encoder_->StartEncodingToInterface(out)) {
	return false;
	}
	if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
	return false;
	}
	return encoder_->FinishEncodingToInterface(out);
	}

	} // namespace open_vcdiff