Upgrade to the latest .webm project code.

Change-Id: I33907e0c9ded667e54d31e2f9226c77501731c6c
diff --git a/AUTHORS b/AUTHORS
index 9686ac1..6708d5a 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,4 +1,34 @@
-# Names should be added to this file like so:
-# Name or Organization <email address>
+# This file is automatically generated from the git commit history
+# by tools/gen_authors.sh.
 
+Adrian Grange <agrange@google.com>
+Alex Converse <alex.converse@gmail.com>
+Andres Mejia <mcitadel@gmail.com>
+Fabio Pedretti <fabio.ped@libero.it>
+Frank Galligan <fgalligan@google.com>
+Fredrik Söderquist <fs@opera.com>
+Fritz Koenig <frkoenig@google.com>
+Giuseppe Scrivano <gscrivano@gnu.org>
+Guillermo Ballester Valor <gbvalor@gmail.com>
+James Zern <jzern@google.com>
+Jan Kratochvil <jan.kratochvil@redhat.com>
+Jeff Muizelaar <jmuizelaar@mozilla.com>
+Jim Bankoski <jimbankoski@google.com>
+Johann Koenig <johannkoenig@google.com>
+John Koleszar <jkoleszar@google.com>
+Justin Clift <justin@salasaga.org>
+Justin Lebar <justin.lebar@gmail.com>
+Luca Barbato <lu_zero@gentoo.org>
+Makoto Kato <makoto.kt@gmail.com>
+Michael Kohler <michaelkohler@live.com>
+Paul Wilkins <paulwilkins@google.com>
+Pavol Rusnak <stick@gk2.sk>
+Philip Jägenstedt <philipj@opera.com>
+Scott LaVarnway <slavarnway@google.com>
+Timothy B. Terriberry <tterribe@xiph.org>
+Tom Finegan <tomfinegan@google.com>
+Yaowu Xu <yaowu@google.com>
+Yunqing Wang <yunqingwang@google.com>
 Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
diff --git a/Android.mk b/Android.mk
index 035e831..8af6e5e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -2,18 +2,15 @@
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES = \
-    vpx_codec/src/vpx_codec.c \
-    vpx_codec/src/vpx_decoder.c \
-    vpx_codec/src/vpx_decoder_compat.c \
-    vpx_codec/src/vpx_image.c \
-    vpx_codec/src/vpx_encoder.c \
+    vpx/src/vpx_codec.c \
+    vpx/src/vpx_decoder.c \
+    vpx/src/vpx_image.c \
     vpx_mem/vpx_mem.c \
     vpx_scale/generic/vpxscale.c \
     vpx_scale/generic/yv12config.c \
     vpx_scale/generic/yv12extend.c \
     vpx_scale/generic/scalesystemdependant.c \
     vpx_scale/generic/gen_scalers.c \
-    vp8/common/segmentation_common.c \
     vp8/common/alloccommon.c \
     vp8/common/blockd.c \
     vp8/common/debugmodes.c \
@@ -47,13 +44,14 @@
     vp8/decoder/dboolhuff.c \
     vp8/decoder/decodemv.c \
     vp8/decoder/decodframe.c \
-    vp8/decoder/demode.c \
     vp8/decoder/dequantize.c \
     vp8/decoder/detokenize.c \
     vp8/decoder/generic/dsystemdependent.c \
     vp8/decoder/onyxd_if.c \
     vp8/decoder/threading.c \
+    vp8/vp8_dx_iface.c \
     vpx_config.c \
+    vp8/decoder/arm/neon/idct_blk_neon.c \
 
 LOCAL_CFLAGS := \
     -DHAVE_CONFIG_H=vpx_config.h
@@ -102,9 +100,10 @@
     vp8/common/arm/neon/sixtappredict4x4_neon.s \
     vp8/common/arm/neon/sixtappredict8x4_neon.s \
     vp8/common/arm/neon/sixtappredict8x8_neon.s \
-    vp8/decoder/arm/neon/dequantdcidct_neon.s \
-    vp8/decoder/arm/neon/dequantidct_neon.s \
-    vp8/decoder/arm/neon/dequantizeb_neon.s
+    vp8/common/arm/neon/dc_only_idct_add_neon.s \
+    vp8/decoder/arm/neon/dequantizeb_neon.s \
+    vp8/decoder/arm/neon/dequant_dc_idct_neon.s \
+    vp8/decoder/arm/neon/dequant_idct_neon.s \
 
 # All the assembly sources must be converted from ADS to GAS compatible format
 VPX_GEN := $(addprefix $(intermediates)/, $(ASM_FILES))
diff --git a/CHANGELOG b/CHANGELOG
index d6c8ce8..2b28037 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,68 @@
+2010-09-02 v0.9.2
+  - Enhancements:
+      Disable frame dropping by default
+      Improved multithreaded performance
+      Improved Force Key Frame Behaviour
+      Increased rate control buffer level precision
+      Fix bug in 1st pass motion compensation
+      ivfenc: correct fixed kf interval, --disable-kf
+  - Speed:
+      Changed above and left context data layout
+      Rework idct calling structure.
+      Removed unnecessary MB_MODE_INFO copies
+      x86: SSSE3 sixtap prediction
+      Reworked IDCT to include reconstruction (add) step
+      Swap alt/gold/new/last frame buffer ptrs instead of copying.
+      Improve SSE2 loopfilter functions
+      Change bitreader to use a larger window.
+      Avoid loopfilter reinitialization when possible
+  - Quality:
+      Normalize quantizer's zero bin and rounding factors
+      Add trellis quantization.
+      Make the quantizer exact.
+      Updates to ARNR filtering algorithm
+      Fix breakout thresh computation for golden & AltRef frames
+      Redo the forward 4x4 dct
+      Improve the accuracy of forward walsh-hadamard transform
+      Further adjustment of RD behaviour with Q and Zbin.
+  - Build System:
+      Allow linking of libs built with MinGW to MSVC
+      Fix target auto-detection on mingw32
+      Allow --cpu= to work for x86.
+      configure: pass original arguments through to make dist
+      Fix builds without runtime CPU detection
+      msvs: fix install of codec sources
+      msvs: Change devenv.com command line for better msys support
+      msvs: Add vs9 targets.
+      Add x86_64-linux-icc target
+  - Bugs:
+      Potential crashes on older MinGW builds
+      Fix two-pass framrate for Y4M input.
+      Fixed simple loop filter, other crashes on ARM v6
+      arm: fix missing dependency with --enable-shared
+      configure: support directories containing .o
+      Replace pinsrw (SSE) with MMX instructions
+      apple: include proper mach primatives
+      Fixed rate control bug with long key frame interval.
+      Fix DSO link errors on x86-64 when not using a version script
+      Fixed buffer selection for UV in AltRef filtering
+
+
+2010-06-17 v0.9.1
+  - Enhancements:
+      * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
+      * Speed optimizations
+  - Bugfixes:
+      * Rate control
+      * Prevent out-of-bounds accesses on invalid data
+  - Build system updates:
+      * Detect toolchain to be used automatically for native builds
+      * Support building shared libraries
+      * Better autotools emulation (--prefix, --libdir, DESTDIR)
+  - Updated LICENSE
+      * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
+
+
 2010-05-18 v0.9.0
   - Initial open source release. Welcome to WebM and VP8!
 
diff --git a/CleanSpec.mk b/CleanSpec.mk
deleted file mode 100644
index b84e1b6..0000000
--- a/CleanSpec.mk
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (C) 2007 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# If you don't need to do a full clean build but would like to touch
-# a file or delete some intermediate files, add a clean step to the end
-# of the list.  These steps will only be run once, if they haven't been
-# run before.
-#
-# E.g.:
-#     $(call add-clean-step, touch -c external/sqlite/sqlite3.h)
-#     $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates)
-#
-# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with
-# files that are missing or have been moved.
-#
-# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory.
-# Use $(OUT_DIR) to refer to the "out" directory.
-#
-# If you need to re-do something that's already mentioned, just copy
-# the command and add it to the bottom of the list.  E.g., if a change
-# that you made last week required touching a file and a change you
-# made today requires touching the same file, just copy the old
-# touch step and add it to the end of the list.
-#
-# ************************************************
-# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
-# ************************************************
-
-# For example:
-#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/APPS/AndroidTests_intermediates)
-#$(call add-clean-step, rm -rf $(OUT_DIR)/target/common/obj/JAVA_LIBRARIES/core_intermediates)
-#$(call add-clean-step, find $(OUT_DIR) -type f -name "IGTalkSession*" -print0 | xargs -0 rm -f)
-#$(call add-clean-step, rm -rf $(PRODUCT_OUT)/data/*)
-
-# ************************************************
-# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
-# ************************************************
diff --git a/LICENSE b/LICENSE
index 6b0e867..7a6f995 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,22 +1,20 @@
-Copyright (c) 2010, Google, Inc.
-
-All rights reserved.
+Copyright (c) 2010, Google Inc. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
+modification, are permitted provided that the following conditions are
+met:
 
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
 
-- Redistributions in binary form must reproduce the above
-  copyright notice, this list of conditions and the following
-  disclaimer in the documentation and/or other materials provided
-  with the distribution.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
 
-- Neither the name of Google nor the names of its contributors may
-  be used to endorse or promote products derived from this software
-  without specific prior written permission.
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -30,19 +28,3 @@
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-Subject to the terms and conditions of the above License, Google
-hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this
-section) patent license to make, have made, use, offer to sell, sell,
-import, and otherwise transfer this implementation of VP8, where such
-license applies only to those patent claims, both currently owned by
-Google and acquired in the future, licensable by Google that are
-necessarily infringed by this implementation of VP8. If You or your
-agent or exclusive licensee institute or order or agree to the
-institution of patent litigation against any entity (including a
-cross-claim or counterclaim in a lawsuit) alleging that this
-implementation of VP8 or any code incorporated within this
-implementation of VP8 constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any rights
-granted to You under this License for this implementation of VP8
-shall terminate as of the date such litigation is filed.
diff --git a/PATENTS b/PATENTS
new file mode 100644
index 0000000..4414d83
--- /dev/null
+++ b/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer, and otherwise run, modify and propagate the contents of this
+implementation of VP8, where such license applies only to those patent
+claims, both currently owned by Google and acquired in the future,
+licensable by Google that are necessarily infringed by this
+implementation of VP8. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of VP8 or any code incorporated within this
+implementation of VP8 constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of VP8
+shall terminate as of the date such litigation is filed.
diff --git a/README b/README
index cfaf4cc..f0625d3 100644
--- a/README
+++ b/README
@@ -9,18 +9,18 @@
   the application.
 
   1. Prerequisites
-  
+
     * All x86 targets require the Yasm[1] assembler be installed.
     * All Windows builds require that Cygwin[2] be installed.
     * Building the documentation requires PHP[3] and Doxygen[4]. If you do not
       have these packages, you must pass --disable-install-docs to the
       configure script.
-    
+
     [1]: http://www.tortall.net/projects/yasm
     [2]: http://www.cygwin.com
     [3]: http://php.net
     [4]: http://www.doxygen.org
-    
+
   2. Out-of-tree builds
   Out of tree builds are a supported method of building the application. For
   an out of tree build, the source tree is kept separate from the object
diff --git a/args.c b/args.c
index f2ad697..5365e91 100644
--- a/args.c
+++ b/args.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/args.h b/args.h
index c063f53..4fafcf8 100644
--- a/args.h
+++ b/args.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/build/.gitattributes b/build/.gitattributes
new file mode 100644
index 0000000..03db79b
--- /dev/null
+++ b/build/.gitattributes
@@ -0,0 +1,2 @@
+*-vs8/*.rules -crlf
+*-msvs/*.rules -crlf
diff --git a/build/arm-wince-vs8/.gitattributes b/build/arm-wince-vs8/.gitattributes
deleted file mode 100644
index be1eeb9..0000000
--- a/build/arm-wince-vs8/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-*.rules -crlf
diff --git a/build/arm-wince-vs8/obj_int_extract.bat b/build/arm-wince-vs8/obj_int_extract.bat
index e58bdd6..a361fc3 100644
--- a/build/arm-wince-vs8/obj_int_extract.bat
+++ b/build/arm-wince-vs8/obj_int_extract.bat
@@ -1,12 +1,13 @@
 @echo off
-REM   Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-REM 
-REM   Use of this source code is governed by a BSD-style license and patent
-REM   grant that can be found in the LICENSE file in the root of the source
-REM   tree. All contributing project authors may be found in the AUTHORS
-REM   file in the root of the source tree.
+REM   Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+REM
+REM   Use of this source code is governed by a BSD-style license
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
 echo on
 
 
-cl /I ".\\" /I "..\vp6_decoder_sdk" /I "..\vp6_decoder_sdk\vpx_ports" /D "NDEBUG" /D "_WIN32_WCE=0x420" /D "UNDER_CE" /D "WIN32_PLATFORM_PSPC" /D "WINCE" /D "_LIB" /D "ARM" /D "_ARM_" /D "_UNICODE" /D "UNICODE" /D "HAVE_CONFIG_H" /FD /EHsc /MT /GS- /fp:fast /GR- /Fo"Pocket_PC_2003__ARMV4_\%1/" /Fd"Pocket_PC_2003__ARMV4_\%1/vc80.pdb" /W3 /nologo /c /TC ..\vp6_decoder_sdk\vp6_decoder\algo\common\arm\dec_asm_offsets_arm.c

+cl /I ".\\" /I "..\vp6_decoder_sdk" /I "..\vp6_decoder_sdk\vpx_ports" /D "NDEBUG" /D "_WIN32_WCE=0x420" /D "UNDER_CE" /D "WIN32_PLATFORM_PSPC" /D "WINCE" /D "_LIB" /D "ARM" /D "_ARM_" /D "_UNICODE" /D "UNICODE" /FD /EHsc /MT /GS- /fp:fast /GR- /Fo"Pocket_PC_2003__ARMV4_\%1/" /Fd"Pocket_PC_2003__ARMV4_\%1/vc80.pdb" /W3 /nologo /c /TC ..\vp6_decoder_sdk\vp6_decoder\algo\common\arm\dec_asm_offsets_arm.c

 obj_int_extract.exe rvds "Pocket_PC_2003__ARMV4_\%1/dec_asm_offsets_arm.obj"

diff --git a/build/arm-wince-vs8/vpx_decoder.sln b/build/arm-wince-vs8/vpx.sln
similarity index 97%
rename from build/arm-wince-vs8/vpx_decoder.sln
rename to build/arm-wince-vs8/vpx.sln
index 2262057..3e49929 100644
--- a/build/arm-wince-vs8/vpx_decoder.sln
+++ b/build/arm-wince-vs8/vpx.sln
@@ -8,7 +8,7 @@
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_int_extract", "obj_int_extract.vcproj", "{E1360C65-D375-4335-8057-7ED99CC3F9B2}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx_decoder", "vpx_decoder.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx", "vpx.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}"
 	ProjectSection(ProjectDependencies) = postProject
 		{E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2}
 	EndProjectSection
diff --git a/build/make/Makefile b/build/make/Makefile
index 412629e..1ca747a 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
@@ -18,6 +19,7 @@
         done
 all: .DEFAULT
 clean:: .DEFAULT
+install:: .DEFAULT
 
 
 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -29,7 +31,7 @@
 md5sum := $(if $(filter %openssl,$(md5sum)),$(md5sum) dgst -md5,$(md5sum))
 
 TGT_CC:=$(word 3, $(subst -, ,$(TOOLCHAIN)))
-install:
+dist:
 	@for t in $(ALL_TARGETS); do \
 	     $(MAKE) --no-print-directory target=$$t $(MAKECMDGOALS) || exit $$?;\
         done
@@ -37,13 +39,8 @@
 	@if [ -d "$(DIST_DIR)/src" ]; then \
             mkdir -p "$(DIST_DIR)/build"; \
             cd "$(DIST_DIR)/build"; \
-            if [ $(TGT_CC) = "rvct" ] ; then \
-				echo "../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC)"; \
-				../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC); \
-			else \
-				echo "../src/configure --target=$(TOOLCHAIN)"; \
-				../src/configure --target=$(TOOLCHAIN); \
-			fi; \
+            echo "Rerunning configure $(CONFIGURE_ARGS)"; \
+            ../src/configure $(CONFIGURE_ARGS); \
             $(if $(filter vs%,$(TGT_CC)),make NO_LAUNCH_DEVENV=1;) \
         fi
 	@if [ -d "$(DIST_DIR)" ]; then \
@@ -55,7 +52,6 @@
         fi
 
 
-svnstat: ALL_TARGETS:=$(firstword $(ALL_TARGETS))
 endif
 
 ifneq ($(target),)
@@ -94,10 +90,10 @@
 	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
 	rm -f $(CLEAN-OBJS)
 
+.PHONY: dist
+dist:
 .PHONY: install
-install:
-.PHONY: install-helper
-install-helper:
+install::
 
 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
@@ -218,6 +214,20 @@
 	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
 endef
 
+define so_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+#
+# This needs further abstraction for dealing with non-GNU linkers.
+$(1):
+	$(if $(quiet),@echo "    [LD] $$@")
+	$(qexec)$$(LD) -shared $$(LDFLAGS) \
+            -Wl,--no-undefined -Wl,-soname,$$(SONAME) \
+            -Wl,--version-script,$$(SO_VERSION_SCRIPT) -o $$@ \
+            $$(filter %.o,$$?) $$(extralibs)
+endef
+
 define lipo_lib_template
 $(1): $(addsuffix /$(1),$(FAT_ARCHS))
 	$(if $(quiet),@echo "    [LIPO] $$@")
@@ -254,6 +264,9 @@
 	@touch $@
 
 INSTALL-DOCS=$(call cond_enabled,CONFIG_INSTALL_DOCS,INSTALL-DOCS)
+ifeq ($(MAKECMDGOALS),dist)
+INSTALL-DOCS+=$(call cond_enabled,CONFIG_INSTALL_DOCS,DIST-DOCS)
+endif
 .install-docs: .docs $(addprefix $(DIST_DIR)/,$(INSTALL-DOCS))
 	@touch $@
 
@@ -265,6 +278,9 @@
 	@touch $@
 
 INSTALL-BINS=$(call cond_enabled,CONFIG_INSTALL_BINS,INSTALL-BINS)
+ifeq ($(MAKECMDGOALS),dist)
+INSTALL-BINS+=$(call cond_enabled,CONFIG_INSTALL_BINS,DIST-BINS)
+endif
 .install-bins: .bins $(addprefix $(DIST_DIR)/,$(INSTALL-BINS))
 	@touch $@
 
@@ -275,8 +291,12 @@
 .libs: $(LIBS)
 	@touch $@
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
+$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
 
 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
+ifeq ($(MAKECMDGOALS),dist)
+INSTALL-LIBS+=$(call cond_enabled,CONFIG_INSTALL_LIBS,DIST-LIBS)
+endif
 .install-libs: .libs $(addprefix $(DIST_DIR)/,$(INSTALL-LIBS))
 	@touch $@
 
@@ -289,6 +309,9 @@
 	@touch $@
 
 INSTALL-PROJECTS=$(call cond_enabled,CONFIG_INSTALL_PROJECTS,INSTALL-PROJECTS)
+ifeq ($(MAKECMDGOALS),dist)
+INSTALL-PROJECTS+=$(call cond_enabled,CONFIG_INSTALL_PROJECTS,DIST-PROJECTS)
+endif
 .install-projects: .projects $(addprefix $(DIST_DIR)/,$(INSTALL-PROJECTS))
 	@touch $@
 
@@ -296,26 +319,30 @@
 	rm -f .projects .install-projects $(PROJECTS)
 endif
 
-# If there are any source files to be installed, then include the build
+# If there are any source files to be distributed, then include the build
 # system too.
-ifneq ($(call enabled,INSTALL-SRCS),)
-    INSTALL-SRCS-yes            += configure
-    INSTALL-SRCS-yes            += build/make/configure.sh
-    INSTALL-SRCS-yes            += build/make/gen_asm_deps.sh
-    INSTALL-SRCS-yes            += build/make/Makefile
-    INSTALL-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_def.sh
-    INSTALL-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_proj.sh
-    INSTALL-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
-    INSTALL-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
+ifneq ($(call enabled,DIST-SRCS),)
+    DIST-SRCS-yes            += configure
+    DIST-SRCS-yes            += build/make/configure.sh
+    DIST-SRCS-yes            += build/make/gen_asm_deps.sh
+    DIST-SRCS-yes            += build/make/Makefile
+    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_def.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_proj.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/yasm.rules
+    DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
     #
     # This isn't really ARCH_ARM dependent, it's dependant on whether we're
     # using assembly code or not (CONFIG_OPTIMIZATIONS maybe). Just use
     # this for now.
-    INSTALL-SRCS-$(ARCH_ARM)    += build/make/obj_int_extract.c
-    INSTALL-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
-    INSTALL-SRCS-yes            += $(target:-$(TOOLCHAIN)=).mk
+    DIST-SRCS-$(ARCH_ARM)    += build/make/obj_int_extract.c
+    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
+    DIST-SRCS-yes            += $(target:-$(TOOLCHAIN)=).mk
 endif
 INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS)
+ifeq ($(MAKECMDGOALS),dist)
+INSTALL-SRCS += $(call cond_enabled,CONFIG_INSTALL_SRCS,DIST-SRCS)
+endif
 .install-srcs: $(addprefix $(DIST_DIR)/src/,$(INSTALL-SRCS))
 	@touch $@
 
@@ -329,13 +356,5 @@
 BUILD_TARGETS += .docs .libs .bins
 INSTALL_TARGETS += .install-docs .install-srcs .install-libs .install-bins
 all-$(target): $(BUILD_TARGETS)
-install: $(INSTALL_TARGETS)
-
-#
-# Development helper targets
-#
-ifneq ($(SRC_PATH_BARE),)
-.PHONY: svnstat
-svnstat:
-	svn stat $(SRC_PATH_BARE)
-endif
+install:: $(INSTALL_TARGETS)
+dist: $(INSTALL_TARGETS)
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index 6fcba84..3dff048 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -1,11 +1,12 @@
 #!/usr/bin/perl
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 569c3e7..5014c61 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -1,11 +1,12 @@
 #!/usr/bin/env perl
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh
index dcaa82c..571e46e 100755
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/configure.sh b/build/make/configure.sh
index ed3a34f..e9f8a2b 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  configure.sh
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  This script is sourced by the main configure script and contains
+##  utility functions and other common bits that aren't strictly libvpx
+##  related.
 ##
-
+##  This build system is based in part on the FFmpeg configure script.
+##
 
 
 #
@@ -120,8 +120,8 @@
 
 show_targets() {
     while [ -n "$*" ]; do
-        if [ "${1%%-*}" == "${2%%-*}" ]; then
-            if [ "${2%%-*}" == "${3%%-*}" ]; then
+        if [ "${1%%-*}" = "${2%%-*}" ]; then
+            if [ "${2%%-*}" = "${3%%-*}" ]; then
                 printf "    %-24s %-24s %-24s\n" "$1" "$2" "$3"
                 shift; shift; shift
             else
@@ -348,7 +348,6 @@
 }
 
 write_common_target_config_mk() {
-    [ -n "$2" ] && local have_config_h="-DHAVE_CONFIG_H=\"${2##*/}\""
     local CC=${CC}
     enabled ccache && CC="ccache ${CC}"
 
@@ -367,7 +366,7 @@
 STRIP=${STRIP}
 NM=${NM}
 
-CFLAGS  = ${CFLAGS} ${have_config_h}
+CFLAGS  = ${CFLAGS}
 ARFLAGS = -rus\$(if \$(quiet),c,v)
 LDFLAGS = ${LDFLAGS}
 ASFLAGS = ${ASFLAGS}
@@ -379,7 +378,7 @@
 fmt_deps = sed -e 's;^__image.axf;\$(dir \$@)\$(notdir \$<).o \$@;' #hide
 EOF
     else cat >> $1 << EOF
-fmt_deps = sed -e 's;^\(.*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;' #hide
+fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;'
 EOF
     fi
 
@@ -396,8 +395,6 @@
 write_common_target_config_h() {
     cat > ${TMP_H} << EOF
 /* This file automatically generated by configure. Do not edit! */
-#define INLINE      ${INLINE}
-#define FORCEINLINE ${FORCEINLINE:-${INLINE}}
 #define RESTRICT    ${RESTRICT}
 EOF
     print_config_h ARCH   "${TMP_H}" ${ARCH_LIST}
@@ -443,7 +440,13 @@
         disable builtin_libc
         alt_libc="${optval}"
         ;;
-        --libc)
+        --prefix=*)
+        prefix="${optval}"
+        ;;
+        --libdir=*)
+        libdir="${optval}"
+        ;;
+        --libc|--prefix|--libdir)
         die "Option ${opt} requires argument"
         ;;
         --help|-h) show_help
@@ -464,6 +467,18 @@
     done
 }
 
+
+post_process_common_cmdline() {
+    prefix="${prefix:-/usr/local}"
+    prefix="${prefix%/}"
+    libdir="${libdir:-${prefix}/lib}"
+    libdir="${libdir%/}"
+    if [ "${libdir#${prefix}}" = "${libdir}" ]; then
+        die "Libdir ${libdir} must be a subdirectory of ${prefix}"
+    fi
+}
+
+
 post_process_cmdline() {
     true;
 }
@@ -479,6 +494,43 @@
 }
 
 process_common_toolchain() {
+    if [ -z "$toolchain" ]; then
+        gcctarget="$(gcc -dumpmachine 2> /dev/null)"
+
+        # detect tgt_isa
+        case "$gcctarget" in
+            *x86_64*|*amd64*)
+                tgt_isa=x86_64
+                ;;
+            *i[3456]86*)
+                tgt_isa=x86
+                ;;
+        esac
+
+        # detect tgt_os
+        case "$gcctarget" in
+            *darwin8*)
+                tgt_isa=universal
+                tgt_os=darwin8
+                ;;
+            *darwin9*)
+                tgt_isa=universal
+                tgt_os=darwin9
+                ;;
+            *mingw32*|*cygwin*)
+                [ -z "$tgt_isa" ] && tgt_isa=x86
+                tgt_os=win32
+                ;;
+            *linux*|*bsd*)
+                tgt_os=linux
+                ;;
+        esac
+
+        if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then
+            toolchain=${tgt_isa}-${tgt_os}-gcc
+        fi
+    fi
+
     toolchain=${toolchain:-generic-gnu}
 
     is_in ${toolchain} ${all_platforms} || enabled force_toolchain \
@@ -730,6 +782,9 @@
         soft_enable ssse3
 
         case  ${tgt_os} in
+            win*)
+                enabled gcc && add_cflags -fno-common
+                ;;
             solaris*)
                 CC=${CC:-${CROSS}gcc}
                 LD=${LD:-${CROSS}gcc}
@@ -744,11 +799,23 @@
                 setup_gnu_toolchain
                 add_cflags -use-msasm -use-asm
                 add_ldflags -i-static
+                enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3
+                enabled x86_64 && AR=xiar
+                case ${tune_cpu} in
+                    atom*)
+                        tune_cflags="-x"
+                        tune_cpu="SSE3_ATOM"
+                    ;;
+                    *)
+                        tune_cflags="-march="
+                    ;;
+                esac
                 ;;
             gcc*)
                 add_cflags  -m${bits}
                 add_ldflags -m${bits}
                 link_with_cc=gcc
+                tune_cflags="-march="
             setup_gnu_toolchain
                 ;;
         esac
@@ -781,6 +848,7 @@
     ;;
     universal*|*-gcc|generic-gnu)
         link_with_cc=gcc
+        enable gcc
     setup_gnu_toolchain
     ;;
     esac
@@ -834,8 +902,8 @@
 
     # glibc needs these
     if enabled linux; then
-	add_cflags -D_LARGEFILE_SOURCE
-	add_cflags -D_FILE_OFFSET_BITS=64
+        add_cflags -D_LARGEFILE_SOURCE
+        add_cflags -D_FILE_OFFSET_BITS=64
     fi
 }
 
@@ -844,8 +912,8 @@
 }
 
 print_config_mk() {
-    prefix=$1
-    makefile=$2
+    local prefix=$1
+    local makefile=$2
     shift 2
     for cfg; do
         upname="`toupper $cfg`"
@@ -856,8 +924,8 @@
 }
 
 print_config_h() {
-    prefix=$1
-    header=$2
+    local prefix=$1
+    local header=$2
     shift 2
     for cfg; do
         upname="`toupper $cfg`"
@@ -888,6 +956,7 @@
     else
         echo "# ${self} $@" > ${logfile}
     fi
+    post_process_common_cmdline
     post_process_cmdline
     process_toolchain
     process_detect
diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
index c1118e1..7c6c5d5 100755
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_msvs_def.sh b/build/make/gen_msvs_def.sh
index 68b2406..4defcc2 100755
--- a/build/make/gen_msvs_def.sh
+++ b/build/make/gen_msvs_def.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index eb482a4..584477f 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -1,16 +1,18 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 self=$0
 self_basename=${self##*/}
+self_dirname=$(dirname "$0")
 EOL=$'\n'
 
 show_help() {
@@ -204,7 +206,7 @@
     ;;
     --ver=*) vs_ver="$optval"
              case $optval in
-             [78])
+             [789])
              ;;
              *) die Unrecognized Visual Studio Version in $opt
              ;;
@@ -246,6 +248,8 @@
     ;;
     8) vs_ver_id="8.00"
     ;;
+    9) vs_ver_id="9.00"
+    ;;
 esac
 
 [ -n "$name" ] || die "Project name (--name) must be specified!"
@@ -291,8 +295,8 @@
     x86*)
         platforms[0]="Win32"
         # these are only used by vs7
-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} \$(InputPath)"
-        asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} \$(InputPath)"
+        asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} &quot;\$(InputPath)&quot;"
+        asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} &quot;\$(InputPath)&quot;"
     ;;
     arm*|iwmmx*)
         case "${name}" in
@@ -342,19 +346,19 @@
 
     open_tag  ToolFiles
     case "$target" in
-        x86*) $uses_asm && tag DefaultToolFile FileName="yasm.rules"
+        x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules"
         ;;
         arm*|iwmmx*)
-            if [ "$name" == "vpx_decoder" ];then
+            if [ "$name" == "vpx" ];then
             case "$target" in
                 armv5*)
-                    tag DefaultToolFile FileName="armasmv5.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules"
                 ;;
                 armv6*)
-                    tag DefaultToolFile FileName="armasmv6.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv6.rules"
                 ;;
                 iwmmxt*)
-                    tag DefaultToolFile FileName="armasmxscale.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmxscale.rules"
                 ;;
             esac
             fi
@@ -374,7 +378,7 @@
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag Tool \
+                vpx)         tag Tool \
                              Name="VCPreBuildEventTool" \
                              CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
                              tag Tool \
@@ -385,7 +389,7 @@
                              ExecutionBucket="7" \
                              Optimization="0" \
                              AdditionalIncludeDirectories="$incs" \
-                             PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;HAVE_CONFIG_H" \
+                             PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
                              MinimalRebuild="true" \
                              RuntimeLibrary="1" \
                              BufferSecurityCheck="false" \
@@ -404,7 +408,7 @@
                              ExecutionBucket="7" \
                              Optimization="0" \
                              AdditionalIncludeDirectories="$incs" \
-                             PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;HAVE_CONFIG_H" \
+                             PreprocessorDefinitions="_DEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;DEBUG;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
                              MinimalRebuild="true" \
                              RuntimeLibrary="1" \
                              BufferSecurityCheck="false" \
@@ -435,7 +439,7 @@
                 Name="VCCLCompilerTool" \
                 Optimization="0" \
                 AdditionalIncludeDirectories="$incs" \
-                PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \
+                PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
                 RuntimeLibrary="$debug_runtime" \
                 UsePrecompiledHeader="0" \
                 WarningLevel="3" \
@@ -508,7 +512,7 @@
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag DeploymentTool \
+                vpx)         tag DeploymentTool \
                              ForceDirty="-1" \
                              RegisterOutput="0"
                                 ;;
@@ -532,7 +536,7 @@
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag Tool \
+                vpx)         tag Tool \
                                      Name="VCPreBuildEventTool" \
                                      CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
                              tag Tool \
@@ -544,7 +548,7 @@
                                              Optimization="2" \
                                              FavorSizeOrSpeed="1" \
                                              AdditionalIncludeDirectories="$incs" \
-                                             PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;HAVE_CONFIG_H" \
+                                             PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_LIB;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
                                              RuntimeLibrary="0" \
                                              BufferSecurityCheck="false" \
                                              UsePrecompiledHeader="0" \
@@ -563,7 +567,7 @@
                              Optimization="2" \
                              FavorSizeOrSpeed="1" \
                              AdditionalIncludeDirectories="$incs" \
-                             PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;HAVE_CONFIG_H" \
+                             PreprocessorDefinitions="NDEBUG;_WIN32_WCE=\$(CEVER);UNDER_CE;\$(PLATFORMDEFINES);WINCE;_CONSOLE;\$(ARCHFAM);\$(_ARCHFAM_);_UNICODE;UNICODE;" \
                              RuntimeLibrary="0" \
                              BufferSecurityCheck="false" \
                              UsePrecompiledHeader="0" \
@@ -593,7 +597,7 @@
         x86*) tag       Tool \
                       Name="VCCLCompilerTool" \
                       AdditionalIncludeDirectories="$incs" \
-                      PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \
+                      PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
                       RuntimeLibrary="$release_runtime" \
                       UsePrecompiledHeader="0" \
                       WarningLevel="3" \
@@ -670,7 +674,7 @@
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag DeploymentTool \
+                vpx)         tag DeploymentTool \
                              ForceDirty="-1" \
                              RegisterOutput="0"
                 ;;
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index b670ec5..9cf0900 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
@@ -24,7 +25,7 @@
 Options:
     --help                      Print this message
     --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8) of visual studio to generate for
+    --ver=version               Version (7,8,9) of visual studio to generate for
     --target=isa-os-cc          Target specifier
 EOF
     exit 1
@@ -192,11 +193,11 @@
 ifneq (\$(found_devenv),)
   ifeq (\$(CONFIG_VS_VERSION),7)
 $nows_sln_config: $outfile
-${TAB}devenv.com $outfile /build "$config"
+${TAB}devenv.com $outfile -build "$config"
 
   else
 $nows_sln_config: $outfile
-${TAB}devenv.com $outfile /build "$sln_config"
+${TAB}devenv.com $outfile -build "$sln_config"
 
   endif
 else
@@ -223,7 +224,7 @@
     ;;
     --ver=*) vs_ver="$optval"
              case $optval in
-             [78])
+             [789])
              ;;
              *) die Unrecognized Visual Studio Version in $opt
              ;;
@@ -234,7 +235,7 @@
              7) sln_vers="8.00"
                 sln_vers_str="Visual Studio .NET 2003"
              ;;
-             8)
+             [89])
              ;;
              *) die "Unrecognized Visual Studio Version '$optval' in $opt"
              ;;
@@ -256,6 +257,9 @@
     8) sln_vers="9.00"
        sln_vers_str="Visual Studio 2005"
     ;;
+    9) sln_vers="10.00"
+       sln_vers_str="Visual Studio 2008"
+    ;;
 esac
 
 for f in "${file_list[@]}"; do
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index cef14e7..e01870f 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,7 +17,7 @@
 #if defined(_MSC_VER)
 #include <io.h>
 #include <share.h>
-#include "vpx_ports/vpx_integer.h"
+#include "vpx/vpx_integer.h"
 #else
 #include <stdint.h>
 #include <unistd.h>
diff --git a/build/make/version.sh b/build/make/version.sh
index 2bda701..3efb956 100755
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/x86-msvs/yasm.rules b/build/x86-msvs/yasm.rules
new file mode 100644
index 0000000..ee1fefb
--- /dev/null
+++ b/build/x86-msvs/yasm.rules
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="utf-8"?>

+<VisualStudioToolFile

+	Name="Yasm"

+	Version="8.00"

+	>

+  <Rules>

+    <CustomBuildRule

+			Name="YASM"

+			DisplayName="Yasm Assembler"

+			CommandLine="yasm -Xvc -f $(PlatformName) [AllOptions] [AdditionalOptions] [Inputs]"

+			Outputs="[$ObjectFileName]"

+			FileExtensions="*.asm"

+			ExecutionDescription="Assembling $(InputFileName)"

+			ShowOnlyRuleProperties="false"

+			>

+      <Properties>

+        <StringProperty

+					Name="Defines"

+					DisplayName="Definitions"

+					Category="Pre-Defined Symbols"

+					Description="Specify pre-defined symbols (&apos;symbol&apos; or &apos;symbol = value&apos;) "

+					Switch="-D [value]"

+					Delimited="true"

+					Inheritable="true"

+				/>

+        <StringProperty

+					Name="IncludePaths"

+					DisplayName="Include Paths"

+					Category="Configuration"

+					Description="Set the paths for any additional include files"

+					Switch="-I [value]"

+					Delimited="true"

+					Inheritable="true"

+				/>

+        <StringProperty

+					Name="UnDefines"

+					DisplayName="Remove Definitions"

+					Category="Pre-Defined Symbols"

+					Description="Remove pre-defined symbols "

+					Switch="-U [value]"

+					Delimited="true"

+					Inheritable="true"

+				/>

+        <StringProperty

+					Name="ObjectFileName"

+					DisplayName="Object File Name"

+					Category="Output"

+					Description="Select the output file name"

+					Switch="-o [value]"

+					DefaultValue="$(IntDir)\$(InputName).obj"

+				/>

+        <StringProperty

+					Name="ListFileName"

+					DisplayName="List File Name"

+					Category="Output"

+					Description="Select an output listing by setting its file name"

+					Switch="-l [value]"

+				/>

+        <StringProperty

+					Name="PreIncludeFile"

+					DisplayName="Pre Include File"

+					Category="Configuration"

+					Description="Select a pre-included file by setting its name"

+					Switch="-P [value]"

+				/>

+        <BooleanProperty

+					Name="Debug"

+					DisplayName="Debug Information"

+					Category="Output"

+					Description="Generate debugging information"

+					Switch="-g cv8"

+				/>

+        <EnumProperty

+					Name="PreProc"

+					DisplayName="Pre-Processor"

+					Category="Configuration"

+					Description="Select the pre-processor (&apos;nasm&apos; or &apos;raw&apos;)"

+					>

+          <Values>

+            <EnumValue

+							Value="0"

+							Switch="-rnasm"

+							DisplayName="Nasm "

+						/>

+            <EnumValue

+							Value="1"

+							Switch="-rraw"

+							DisplayName="Raw"

+						/>

+          </Values>

+        </EnumProperty>

+        <EnumProperty

+					Name="Parser"

+					DisplayName="Parser"

+					Category="Configuration"

+					Description="Select the parser for Intel (&apos;nasm&apos;) or AT&amp;T ( &apos;gas&apos;) syntax"

+					>

+          <Values>

+            <EnumValue

+							Value="0"

+							Switch="-pnasm"

+							DisplayName="Nasm"

+						/>

+            <EnumValue

+							Value="1"

+							Switch="-pgas"

+							DisplayName="Gas"

+						/>

+          </Values>

+        </EnumProperty>

+      </Properties>

+    </CustomBuildRule>

+  </Rules>

+</VisualStudioToolFile>

+

diff --git a/configure b/configure
index 3083343..5d6964e 100755
--- a/configure
+++ b/configure
@@ -1,13 +1,16 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  configure
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  This script is the front-end to the build system. It provides a similar
+##  interface to standard configure scripts with some extra bits for dealing
+##  with toolchains that differ from the standard POSIX interface and
+##  for extracting subsets of the source tree. In theory, reusable parts
+##  of this script were intended to live in build/make/configure.sh,
+##  but in practice, the line is pretty blurry.
 ##
-
+##  This build system is based in part on the FFmpeg configure script.
+##
 
 #source_path="`dirname \"$0\"`"
 source_path=${0%/*}
@@ -24,18 +27,18 @@
                                   supported by hardware [auto]
   ${toggle_codec_srcs}            in/exclude codec library source code
   ${toggle_debug_libs}            in/exclude debug version of libraries
-  ${toggle_eval_limit}            enable limited evaluation build
   ${toggle_md5}                   support for output of checksum data
   ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
   ${toggle_vp8}                   VP8 codec support
   ${toggle_psnr}                  output of PSNR data, if supported (encoders)
   ${toggle_mem_tracker}           track memory usage
-  ${toggle_eval_limit}            decoder limitted to 500 frames
   ${toggle_postproc}              postprocessing
   ${toggle_multithread}           multithreaded encoding and decoding.
   ${toggle_spatial_resampling}    spatial sampling (scaling) support
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_runtime_cpu_detect}    runtime cpu detection
+  ${toggle_shared}                shared library support
+  ${toggle_arm_asm_detok}         assembly version of the detokenizer (ARM platforms only)
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -105,10 +108,13 @@
 all_platforms="${all_platforms} x86-win32-gcc"
 all_platforms="${all_platforms} x86-win32-vs7"
 all_platforms="${all_platforms} x86-win32-vs8"
+all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
+all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"
 all_platforms="${all_platforms} x86_64-win64-vs8"
+all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} universal-darwin8-gcc"
 all_platforms="${all_platforms} universal-darwin9-gcc"
 all_platforms="${all_platforms} generic-gnu"
@@ -135,11 +141,12 @@
     [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
 fi
 
-# install everything, by default
+# install everything except the sources, by default. sources will have
+# to be enabled when doing dist builds, since that's no longer a common
+# case.
 enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
 enable install_bins
 enable install_libs
-enable install_srcs
 
 enable optimizations
 enable fast_unaligned #allow unaligned accesses, if supported by hw
@@ -157,8 +164,8 @@
 [ -d ${source_path}/vp8 ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
 else
 # customer environment
-[ -f ${source_path}/../include/vp8cx.h ] && CODECS="${CODECS} vp8_encoder"
-[ -f ${source_path}/../include/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
+[ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp8_encoder"
+[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
 
 [ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
 fi
@@ -224,11 +231,8 @@
 
     dequant_tokens
     dc_recon
-    new_tokens
-    eval_limit
     runtime_cpu_detect
     postproc
-    postproc_generic
     multithread
     psnr
     ${CODECS}
@@ -238,6 +242,8 @@
     static_msvcrt
     spatial_resampling
     realtime_only
+    shared
+    arm_asm_detok
 "
 CMDLINE_SELECT="
     extra_warnings
@@ -264,10 +270,7 @@
 
     dequant_tokens
     dc_recon
-    new_tokens
-    eval_limit
     postproc
-    postproc_generic
     multithread
     psnr
     ${CODECS}
@@ -276,6 +279,8 @@
     mem_tracker
     spatial_resampling
     realtime_only
+    shared
+    arm_asm_detok
 "
 
 process_cmdline() {
@@ -354,7 +359,6 @@
     enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
     ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
     ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
-    enabled eval_limit && DIST_DIR="${DIST_DIR}-eval"
     ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
     DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
     case "${tgt_os}" in
@@ -365,8 +369,27 @@
     if [ -f "${source_path}/build/make/version.sh" ]; then
         local ver=`"$source_path/build/make/version.sh" --bare $source_path`
         DIST_DIR="${DIST_DIR}-${ver}"
+        ver=${ver%%-*}
+        VERSION_PATCH=${ver##*.}
+        ver=${ver%.*}
+        VERSION_MINOR=${ver##*.}
+        ver=${ver#v}
+        VERSION_MAJOR=${ver%.*}
     fi
-    enabled child || echo "DIST_DIR?=${DIST_DIR}" >> config.mk
+    enabled child || cat <<EOF >> config.mk
+ifeq (\$(MAKECMDGOALS),dist)
+DIST_DIR?=${DIST_DIR}
+else
+DIST_DIR?=\$(DESTDIR)${prefix}
+endif
+LIBSUBDIR=${libdir##${prefix}/}
+
+VERSION_MAJOR=${VERSION_MAJOR}
+VERSION_MINOR=${VERSION_MINOR}
+VERSION_PATCH=${VERSION_PATCH}
+
+CONFIGURE_ARGS=${CONFIGURE_ARGS}
+EOF
     enabled child || echo "CONFIGURE_ARGS?=${CONFIGURE_ARGS}" >> config.mk
 
     #
@@ -385,6 +408,12 @@
 }
 
 process_detect() {
+    if enabled shared; then
+        # Can only build shared libs on a subset of platforms. Doing this check
+        # here rather than at option parse time because the target auto-detect
+        # magic happens after the command line has been parsed.
+        enabled linux || die "--enable-shared only supported on ELF for now"
+    fi
     if [ -z "$CC" ]; then
         echo "Bypassing toolchain for environment detection."
         enable external_build
@@ -419,7 +448,7 @@
     check_header pthread.h
     check_header sys/mman.h
 
-    check_header vpx_ports/vpx_integer.h -I${source_path} && enable vpx_ports
+    check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
 }
 
 process_toolchain() {
@@ -485,7 +514,7 @@
     enabled gcc || soft_disable ccache
     if enabled mips; then
         enable dequant_tokens
-	enable dc_recon
+        enable dc_recon
     fi
 
     # Enable the postbuild target if building for visual studio.
@@ -494,8 +523,6 @@
              enable solution
              vs_version=${tgt_cc##vs}
              all_targets="${all_targets} solution"
-             INLINE=__inline
-             FORCEINLINE=__forceinline
         ;;
     esac
 
diff --git a/docs.mk b/docs.mk
index 3e7b5cd..28df9d2 100644
--- a/docs.mk
+++ b/docs.mk
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
@@ -47,8 +48,8 @@
 	@doxygen $<
 DOCS-yes += docs/html/index.html
 
-INSTALL-DOCS-yes = $(wildcard docs/html/*)
-INSTALL-DOCS-$(CONFIG_CODEC_SRCS) += $(addprefix src/,$(CODEC_DOX))
-INSTALL-DOCS-$(CONFIG_CODEC_SRCS) += src/libs.doxy_template
-INSTALL-DOCS-yes                  += CHANGELOG
-INSTALL-DOCS-yes                  += README
+DIST-DOCS-yes = $(wildcard docs/html/*)
+DIST-DOCS-$(CONFIG_CODEC_SRCS) += $(addprefix src/,$(CODEC_DOX))
+DIST-DOCS-$(CONFIG_CODEC_SRCS) += src/libs.doxy_template
+DIST-DOCS-yes                  += CHANGELOG
+DIST-DOCS-yes                  += README
diff --git a/example_xma.c b/example_xma.c
index cc4e591..72eb470 100644
--- a/example_xma.c
+++ b/example_xma.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -15,10 +16,10 @@
 #include <string.h>
 #define VPX_CODEC_DISABLE_COMPAT 1
 #include "vpx_config.h"
-#include "vpx_decoder.h"
-#include "vpx_integer.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_integer.h"
 #if CONFIG_VP8_DECODER
-#include "vp8dx.h"
+#include "vpx/vp8dx.h"
 #endif
 
 static char *exec_name;
diff --git a/examples.mk b/examples.mk
index 036a423..00ffc70 100644
--- a/examples.mk
+++ b/examples.mk
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
@@ -14,18 +15,21 @@
 UTILS-$(CONFIG_DECODERS)    += ivfdec.c
 ivfdec.SRCS                 += md5_utils.c md5_utils.h
 ivfdec.SRCS                 += vpx_ports/vpx_timer.h
-ivfdec.SRCS                 += vpx_ports/vpx_integer.h
+ivfdec.SRCS                 += vpx/vpx_integer.h
 ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
 ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 ivfdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
-ivfenc.SRCS                 += args.c args.h vpx_ports/config.h
-ivfenc.SRCS                 += vpx_ports/mem_ops.h vpx_ports/mem_ops_aligned.h
+ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
+ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 ivfenc.DESCRIPTION           = Full featured encoder
-UTILS-$(CONFIG_DECODERS)    += example_xma.c
-example_xma.GUID             = A955FC4A-73F1-44F7-135E-30D84D32F022
-example_xma.DESCRIPTION      = External Memory Allocation mode usage
+
+# XMA example disabled for now, not used in VP8
+#UTILS-$(CONFIG_DECODERS)    += example_xma.c
+#example_xma.GUID             = A955FC4A-73F1-44F7-135E-30D84D32F022
+#example_xma.DESCRIPTION      = External Memory Allocation mode usage
 
 GEN_EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
 simple_decoder.GUID              = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
@@ -84,8 +88,6 @@
     INC_PATH := $(SRC_PATH_BARE)/../include
 else
     LIB_PATH-yes                     += $(if $(BUILD_PFX),$(BUILD_PFX),.)
-    INC_PATH-yes                     += $(SRC_PATH_BARE)/vpx_codec
-    INC_PATH-yes                     += $(SRC_PATH_BARE)/vpx_ports
     INC_PATH-$(CONFIG_VP8_DECODER)   += $(SRC_PATH_BARE)/vp8
     INC_PATH-$(CONFIG_VP8_ENCODER)   += $(SRC_PATH_BARE)/vp8
     LIB_PATH := $(call enabled,LIB_PATH)
@@ -99,6 +101,7 @@
 UTILS           = $(call enabled,UTILS)
 GEN_EXAMPLES    = $(call enabled,GEN_EXAMPLES)
 ALL_EXAMPLES    = $(UTILS) $(GEN_EXAMPLES)
+UTIL_SRCS       = $(foreach ex,$(UTILS),$($(ex:.c=).SRCS))
 ALL_SRCS        = $(foreach ex,$(ALL_EXAMPLES),$($(ex:.c=).SRCS))
 CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
 
@@ -120,8 +123,10 @@
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
-INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(ALL_EXAMPLES:.c=))
-INSTALL-SRCS-yes           += $(ALL_SRCS)
+DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=))
+INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=))
+DIST-SRCS-yes              += $(ALL_SRCS)
+INSTALL-SRCS-yes           += $(UTIL_SRCS)
 OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
 BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=))
 
diff --git a/examples/decode_to_md5.txt b/examples/decode_to_md5.txt
index 0599b13..b3dd568 100644
--- a/examples/decode_to_md5.txt
+++ b/examples/decode_to_md5.txt
@@ -26,21 +26,21 @@
 values.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_DX
 unsigned char  md5_sum[16];
-md5_ctx_t      md5;
+MD5Context     md5;
 int            i;
 
-md5_init(&md5);
+MD5Init(&md5);
 
 for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
     for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        md5_update(&md5, buf, img->d_w >> (plane?1:0));
+        MD5Update(&md5, buf, img->d_w >> (plane?1:0));
         buf += img->stride[plane];
     }
 }
 
-md5_finalize(&md5, md5_sum);
+MD5Final(md5_sum, &md5);
 for(i=0; i<16; i++)
     fprintf(outfile, "%02x",md5_sum[i]);
 fprintf(outfile, "  img-%dx%d-%04d.i420\n", img->d_w, img->d_h,
diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index deea449..ba3ac98 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,11 +17,9 @@
 #include <stdarg.h>
 #include <string.h>
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_decoder.h"
-#if CONFIG_VP8_DECODER && !defined(interface)
-#include "vp8dx.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vp8dx.h"
 #define interface (&vpx_codec_vp8_dx_algo)
-#endif
 @EXTRA_INCLUDES
 
 
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index 3287d50..6da38c2 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -1,7 +1,7 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_decoder.h"
-#include "vp8dx.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vp8dx.h"
 #define interface (&vpx_codec_vp8_dx_algo)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DEC_INCLUDES
 
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index 2df893a..fdfc3af 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,12 +17,10 @@
 #include <stdarg.h>
 #include <string.h>
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_encoder.h"
-#if CONFIG_VP8_ENCODER && !defined(interface)
-#include "vp8cx.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"
 #define interface (&vpx_codec_vp8_cx_algo)
 #define fourcc    0x30385056
-#endif
 @EXTRA_INCLUDES
 
 #define IVF_FILE_HDR_SZ  (32)
@@ -129,7 +128,7 @@
     height = strtol(argv[2], NULL, 0);
     if(width < 16 || width%2 || height <16 || height%2)
         die("Invalid resolution: %ldx%ld", width, height);
-    if(!vpx_img_alloc(&raw, IMG_FMT_YV12, width, height, 1))
+    if(!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 1))
         die("Faile to allocate image", width, height);
     if(!(outfile = fopen(argv[4], "wb")))
         die("Failed to open %s for writing", argv[4]);
diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
index db1ea77..87055ca 100644
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -1,7 +1,7 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES
 #define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_encoder.h"
-#include "vp8cx.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"
 #define interface (&vpx_codec_vp8_cx_algo)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_INCLUDES
 
diff --git a/examples/gen_example_code.sh b/examples/gen_example_code.sh
index f2e45c5..1133b49 100755
--- a/examples/gen_example_code.sh
+++ b/examples/gen_example_code.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/examples/gen_example_doxy.php b/examples/gen_example_doxy.php
index 08beade..701bbd3 100755
--- a/examples/gen_example_doxy.php
+++ b/examples/gen_example_doxy.php
@@ -1,11 +1,12 @@
 #!/usr/bin/env php
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/examples/gen_example_text.sh b/examples/gen_example_text.sh
index 0e1f796..9a8703d 100755
--- a/examples/gen_example_text.sh
+++ b/examples/gen_example_text.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig b/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig
deleted file mode 100644
index b0ed7c9..0000000
--- a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig
+++ /dev/null
@@ -1,2909 +0,0 @@
-<?php
-#
-# Markdown Extra  -  A text-to-HTML conversion tool for web writers
-#
-# PHP Markdown & Extra
-# Copyright (c) 2004-2008 Michel Fortin  
-# <http://www.michelf.com/projects/php-markdown/>
-#
-# Original Markdown
-# Copyright (c) 2004-2006 John Gruber  
-# <http://daringfireball.net/projects/markdown/>
-#
-
-
-define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
-define( 'MARKDOWNEXTRA_VERSION',  "1.2.3" ); # Wed 31 Dec 2008
-
-
-#
-# Global default settings:
-#
-
-# Change to ">" for HTML output
-@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
-
-# Define the width of a tab for code blocks.
-@define( 'MARKDOWN_TAB_WIDTH',     4 );
-
-# Optional title attribute for footnote links and backlinks.
-@define( 'MARKDOWN_FN_LINK_TITLE',         "" );
-@define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
-
-# Optional class attribute for footnote links and backlinks.
-@define( 'MARKDOWN_FN_LINK_CLASS',         "" );
-@define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
-
-
-#
-# WordPress settings:
-#
-
-# Change to false to remove Markdown from posts and/or comments.
-@define( 'MARKDOWN_WP_POSTS',      true );
-@define( 'MARKDOWN_WP_COMMENTS',   true );
-
-
-
-### Standard Function Interface ###
-
-@define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
-
-function Markdown($text) {
-#
-# Initialize the parser and return the result of its transform method.
-#
-  # Setup static parser variable.
-  static $parser;
-  if (!isset($parser)) {
-    $parser_class = MARKDOWN_PARSER_CLASS;
-    $parser = new $parser_class;
-  }
-
-  # Transform text using parser.
-  return $parser->transform($text);
-}
-
-
-### WordPress Plugin Interface ###
-
-/*
-Plugin Name: Markdown Extra
-Plugin URI: http://www.michelf.com/projects/php-markdown/
-Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
-Version: 1.2.2
-Author: Michel Fortin
-Author URI: http://www.michelf.com/
-*/
-
-if (isset($wp_version)) {
-  # More details about how it works here:
-  # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
-  
-  # Post content and excerpts
-  # - Remove WordPress paragraph generator.
-  # - Run Markdown on excerpt, then remove all tags.
-  # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
-  if (MARKDOWN_WP_POSTS) {
-    remove_filter('the_content',     'wpautop');
-        remove_filter('the_content_rss', 'wpautop');
-    remove_filter('the_excerpt',     'wpautop');
-    add_filter('the_content',     'mdwp_MarkdownPost', 6);
-        add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
-    add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
-    add_filter('get_the_excerpt', 'trim', 7);
-    add_filter('the_excerpt',     'mdwp_add_p');
-    add_filter('the_excerpt_rss', 'mdwp_strip_p');
-    
-    remove_filter('content_save_pre',  'balanceTags', 50);
-    remove_filter('excerpt_save_pre',  'balanceTags', 50);
-    add_filter('the_content',     'balanceTags', 50);
-    add_filter('get_the_excerpt', 'balanceTags', 9);
-  }
-  
-  # Add a footnote id prefix to posts when inside a loop.
-  function mdwp_MarkdownPost($text) {
-    static $parser;
-    if (!$parser) {
-      $parser_class = MARKDOWN_PARSER_CLASS;
-      $parser = new $parser_class;
-    }
-    if (is_single() || is_page() || is_feed()) {
-      $parser->fn_id_prefix = "";
-    } else {
-      $parser->fn_id_prefix = get_the_ID() . ".";
-    }
-    return $parser->transform($text);
-  }
-  
-  # Comments
-  # - Remove WordPress paragraph generator.
-  # - Remove WordPress auto-link generator.
-  # - Scramble important tags before passing them to the kses filter.
-  # - Run Markdown on excerpt then remove paragraph tags.
-  if (MARKDOWN_WP_COMMENTS) {
-    remove_filter('comment_text', 'wpautop', 30);
-    remove_filter('comment_text', 'make_clickable');
-    add_filter('pre_comment_content', 'Markdown', 6);
-    add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
-    add_filter('pre_comment_content', 'mdwp_show_tags', 12);
-    add_filter('get_comment_text',    'Markdown', 6);
-    add_filter('get_comment_excerpt', 'Markdown', 6);
-    add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
-  
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    $mdwp_hidden_tags = explode(' ',
-      '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
-    $mdwp_placeholders = explode(' ', str_rot13(
-      'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
-      'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
-  }
-  
-  function mdwp_add_p($text) {
-    if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
-      $text = '<p>'.$text.'</p>';
-      $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
-    }
-    return $text;
-  }
-  
-  function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
-
-  function mdwp_hide_tags($text) {
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
-  }
-  function mdwp_show_tags($text) {
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
-  }
-}
-
-
-### bBlog Plugin Info ###
-
-function identify_modifier_markdown() {
-  return array(
-    'name' => 'markdown',
-    'type' => 'modifier',
-    'nicename' => 'PHP Markdown Extra',
-    'description' => 'A text-to-HTML conversion tool for web writers',
-    'authors' => 'Michel Fortin and John Gruber',
-    'licence' => 'GPL',
-    'version' => MARKDOWNEXTRA_VERSION,
-    'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
-    );
-}
-
-
-### Smarty Modifier Interface ###
-
-function smarty_modifier_markdown($text) {
-  return Markdown($text);
-}
-
-
-### Textile Compatibility Mode ###
-
-# Rename this file to "classTextile.php" and it can replace Textile everywhere.
-
-if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
-  # Try to include PHP SmartyPants. Should be in the same directory.
-  @include_once 'smartypants.php';
-  # Fake Textile class. It calls Markdown instead.
-  class Textile {
-    function TextileThis($text, $lite='', $encode='') {
-      if ($lite == '' && $encode == '')    $text = Markdown($text);
-      if (function_exists('SmartyPants'))  $text = SmartyPants($text);
-      return $text;
-    }
-    # Fake restricted version: restrictions are not supported for now.
-    function TextileRestricted($text, $lite='', $noimage='') {
-      return $this->TextileThis($text, $lite);
-    }
-    # Workaround to ensure compatibility with TextPattern 4.0.3.
-    function blockLite($text) { return $text; }
-  }
-}
-
-
-
-#
-# Markdown Parser Class
-#
-
-class Markdown_Parser {
-
-  # Regex to match balanced [brackets].
-  # Needed to insert a maximum bracked depth while converting to PHP.
-  var $nested_brackets_depth = 6;
-  var $nested_brackets_re;
-  
-  var $nested_url_parenthesis_depth = 4;
-  var $nested_url_parenthesis_re;
-
-  # Table of hash values for escaped characters:
-  var $escape_chars = '\`*_{}[]()>#+-.!';
-  var $escape_chars_re;
-
-  # Change to ">" for HTML output.
-  var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
-  var $tab_width = MARKDOWN_TAB_WIDTH;
-  
-  # Change to `true` to disallow markup or entities.
-  var $no_markup = false;
-  var $no_entities = true;
-  
-  # Predefined urls and titles for reference links and images.
-  var $predef_urls = array();
-  var $predef_titles = array();
-
-
-  function Markdown_Parser() {
-  #
-  # Constructor function. Initialize appropriate member variables.
-  #
-    $this->_initDetab();
-    $this->prepareItalicsAndBold();
-  
-    $this->nested_brackets_re = 
-      str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
-      str_repeat('\])*', $this->nested_brackets_depth);
-  
-    $this->nested_url_parenthesis_re = 
-      str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
-      str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
-    
-    $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
-    
-    # Sort document, block, and span gamut in ascendent priority order.
-    asort($this->document_gamut);
-    asort($this->block_gamut);
-    asort($this->span_gamut);
-  }
-
-
-  # Internal hashes used during transformation.
-  var $urls = array();
-  var $titles = array();
-  var $html_hashes = array();
-  
-  # Status flag to avoid invalid nesting.
-  var $in_anchor = false;
-  
-  
-  function setup() {
-  #
-  # Called before the transformation process starts to setup parser 
-  # states.
-  #
-    # Clear global hashes.
-    $this->urls = $this->predef_urls;
-    $this->titles = $this->predef_titles;
-    $this->html_hashes = array();
-    
-    $in_anchor = false;
-  }
-  
-  function teardown() {
-  #
-  # Called after the transformation process to clear any variable 
-  # which may be taking up memory unnecessarly.
-  #
-    $this->urls = array();
-    $this->titles = array();
-    $this->html_hashes = array();
-  }
-
-
-  function transform($text) {
-  #
-  # Main function. Performs some preprocessing on the input text
-  # and pass it through the document gamut.
-  #
-    $this->setup();
-  
-    # Remove UTF-8 BOM and marker character in input, if present.
-    $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
-
-    # Standardize line endings:
-    #   DOS to Unix and Mac to Unix
-    $text = preg_replace('{\r\n?}', "\n", $text);
-
-    # Make sure $text ends with a couple of newlines:
-    $text .= "\n\n";
-
-    # Convert all tabs to spaces.
-    $text = $this->detab($text);
-
-    # Turn block-level HTML blocks into hash entries
-    $text = $this->hashHTMLBlocks($text);
-
-    # Strip any lines consisting only of spaces and tabs.
-    # This makes subsequent regexen easier to write, because we can
-    # match consecutive blank lines with /\n+/ instead of something
-    # contorted like /[ ]*\n+/ .
-    $text = preg_replace('/^[ ]+$/m', '', $text);
-
-    # Run document gamut methods.
-    foreach ($this->document_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-    
-    $this->teardown();
-
-    return $text . "\n";
-  }
-  
-  var $document_gamut = array(
-    # Strip link definitions, store in hashes.
-    "stripLinkDefinitions" => 20,
-    
-    "runBasicBlockGamut"   => 30,
-    );
-
-
-  function stripLinkDefinitions($text) {
-  #
-  # Strips link definitions from text, stores the URLs and titles in
-  # hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: ^[id]: url "optional title"
-    $text = preg_replace_callback('{
-              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
-                [ ]*
-                \n?       # maybe *one* newline
-                [ ]*
-              <?(\S+?)>?      # url = $2
-                [ ]*
-                \n?       # maybe one newline
-                [ ]*
-              (?:
-                (?<=\s)     # lookbehind for whitespace
-                ["(]
-                (.*?)     # title = $3
-                [")]
-                [ ]*
-              )?  # title is optional
-              (?:\n+|\Z)
-      }xm',
-      array(&$this, '_stripLinkDefinitions_callback'),
-      $text);
-    return $text;
-  }
-  function _stripLinkDefinitions_callback($matches) {
-    $link_id = strtolower($matches[1]);
-    $this->urls[$link_id] = $matches[2];
-    $this->titles[$link_id] =& $matches[3];
-    return ''; # String that will replace the block
-  }
-
-
-  function hashHTMLBlocks($text) {
-    if ($this->no_markup)  return $text;
-
-    $less_than_tab = $this->tab_width - 1;
-
-    # Hashify HTML blocks:
-    # We only want to do this for block-level HTML tags, such as headers,
-    # lists, and tables. That's because we still want to wrap <p>s around
-    # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
-    # phrase emphasis, and spans. The list of tags we're looking for is
-    # hard-coded:
-    #
-    # *  List "a" is made of tags which can be both inline or block-level.
-    #    These will be treated block-level when the start tag is alone on 
-    #    its line, otherwise they're not matched here and will be taken as 
-    #    inline later.
-    # *  List "b" is made of tags which are always block-level;
-    #
-    $block_tags_a_re = 'ins|del';
-    $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
-               'script|noscript|form|fieldset|iframe|math';
-
-    # Regular expression for the content of a block tag.
-    $nested_tags_level = 4;
-    $attr = '
-      (?>       # optional tag attributes
-        \s      # starts with whitespace
-        (?>
-        [^>"/]+   # text outside quotes
-        |
-        /+(?!>)   # slash not followed by ">"
-        |
-        "[^"]*"   # text inside double quotes (tolerate ">")
-        |
-        \'[^\']*\'  # text inside single quotes (tolerate ">")
-        )*
-      )?  
-      ';
-    $content =
-      str_repeat('
-        (?>
-          [^<]+     # content without tag
-        |
-          <\2     # nested opening tag
-          '.$attr.' # attributes
-          (?>
-            />
-          |
-            >', $nested_tags_level).  # end of opening tag
-            '.*?'.          # last level nested tag content
-      str_repeat('
-            </\2\s*>  # closing nested tag
-          )
-          |       
-          <(?!/\2\s*> # other tags with a different name
-          )
-        )*',
-        $nested_tags_level);
-    $content2 = str_replace('\2', '\3', $content);
-
-    # First, look for nested blocks, e.g.:
-    #   <div>
-    #     <div>
-    #     tags for inner block must be indented.
-    #     </div>
-    #   </div>
-    #
-    # The outermost tags must start at the left margin for this to match, and
-    # the inner nested divs must be indented.
-    # We need to do this before the next, more liberal match, because the next
-    # match will start at the first `<div>` and stop at the first `</div>`.
-    $text = preg_replace_callback('{(?>
-      (?>
-        (?<=\n\n)   # Starting after a blank line
-        |       # or
-        \A\n?     # the beginning of the doc
-      )
-      (           # save in $1
-
-        # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
-        # in between.
-          
-            [ ]{0,'.$less_than_tab.'}
-            <('.$block_tags_b_re.')# start tag = $2
-            '.$attr.'>      # attributes followed by > and \n
-            '.$content.'    # content, support nesting
-            </\2>       # the matching end tag
-            [ ]*        # trailing spaces/tabs
-            (?=\n+|\Z)  # followed by a newline or end of document
-
-      | # Special version for tags of group a.
-
-            [ ]{0,'.$less_than_tab.'}
-            <('.$block_tags_a_re.')# start tag = $3
-            '.$attr.'>[ ]*\n  # attributes followed by >
-            '.$content2.'   # content, support nesting
-            </\3>       # the matching end tag
-            [ ]*        # trailing spaces/tabs
-            (?=\n+|\Z)  # followed by a newline or end of document
-          
-      | # Special case just for <hr />. It was easier to make a special 
-        # case than to make the other regex more complicated.
-      
-            [ ]{0,'.$less_than_tab.'}
-            <(hr)       # start tag = $2
-            '.$attr.'     # attributes
-            /?>         # the matching end tag
-            [ ]*
-            (?=\n{2,}|\Z)   # followed by a blank line or end of document
-      
-      | # Special case for standalone HTML comments:
-      
-          [ ]{0,'.$less_than_tab.'}
-          (?s:
-            <!-- .*? -->
-          )
-          [ ]*
-          (?=\n{2,}|\Z)   # followed by a blank line or end of document
-      
-      | # PHP and ASP-style processor instructions (<? and <%)
-      
-          [ ]{0,'.$less_than_tab.'}
-          (?s:
-            <([?%])     # $2
-            .*?
-            \2>
-          )
-          [ ]*
-          (?=\n{2,}|\Z)   # followed by a blank line or end of document
-          
-      )
-      )}Sxmi',
-      array(&$this, '_hashHTMLBlocks_callback'),
-      $text);
-
-    return $text;
-  }
-  function _hashHTMLBlocks_callback($matches) {
-    $text = $matches[1];
-    $key  = $this->hashBlock($text);
-    return "\n\n$key\n\n";
-  }
-  
-  
-  function hashPart($text, $boundary = 'X') {
-  #
-  # Called whenever a tag must be hashed when a function insert an atomic 
-  # element in the text stream. Passing $text to through this function gives
-  # a unique text-token which will be reverted back when calling unhash.
-  #
-  # The $boundary argument specify what character should be used to surround
-  # the token. By convension, "B" is used for block elements that needs not
-  # to be wrapped into paragraph tags at the end, ":" is used for elements
-  # that are word separators and "X" is used in the general case.
-  #
-    # Swap back any tag hash found in $text so we do not have to `unhash`
-    # multiple times at the end.
-    $text = $this->unhash($text);
-    
-    # Then hash the block.
-    static $i = 0;
-    $key = "$boundary\x1A" . ++$i . $boundary;
-    $this->html_hashes[$key] = $text;
-    return $key; # String that will replace the tag.
-  }
-
-
-  function hashBlock($text) {
-  #
-  # Shortcut function for hashPart with block-level boundaries.
-  #
-    return $this->hashPart($text, 'B');
-  }
-
-
-  var $block_gamut = array(
-  #
-  # These are all the transformations that form block-level
-  # tags like paragraphs, headers, and list items.
-  #
-    "doHeaders"         => 10,
-    "doHorizontalRules" => 20,
-    
-    "doLists"           => 40,
-    "doCodeBlocks"      => 50,
-    "doBlockQuotes"     => 60,
-    );
-
-  function runBlockGamut($text) {
-  #
-  # Run block gamut tranformations.
-  #
-    # We need to escape raw HTML in Markdown source before doing anything 
-    # else. This need to be done for each block, and not only at the 
-    # begining in the Markdown function since hashed blocks can be part of
-    # list items and could have been indented. Indented blocks would have 
-    # been seen as a code block in a previous pass of hashHTMLBlocks.
-    $text = $this->hashHTMLBlocks($text);
-    
-    return $this->runBasicBlockGamut($text);
-  }
-  
-  function runBasicBlockGamut($text) {
-  #
-  # Run block gamut tranformations, without hashing HTML blocks. This is 
-  # useful when HTML blocks are known to be already hashed, like in the first
-  # whole-document pass.
-  #
-    foreach ($this->block_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-    
-    # Finally form paragraph and restore hashed blocks.
-    $text = $this->formParagraphs($text);
-
-    return $text;
-  }
-  
-  
-  function doHorizontalRules($text) {
-    # Do Horizontal Rules:
-    return preg_replace(
-      '{
-        ^[ ]{0,3} # Leading space
-        ([-*_])   # $1: First marker
-        (?>     # Repeated marker group
-          [ ]{0,2}  # Zero, one, or two spaces.
-          \1      # Marker character
-        ){2,}   # Group repeated at least twice
-        [ ]*    # Tailing spaces
-        $     # End of line.
-      }mx',
-      "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
-      $text);
-  }
-
-
-  var $span_gamut = array(
-  #
-  # These are all the transformations that occur *within* block-level
-  # tags like paragraphs, headers, and list items.
-  #
-    # Process character escapes, code spans, and inline HTML
-    # in one shot.
-    "parseSpan"           => -30,
-
-    # Process anchor and image tags. Images must come first,
-    # because ![foo][f] looks like an anchor.
-    "doImages"            =>  10,
-    "doAnchors"           =>  20,
-    
-    # Make links out of things like `<http://example.com/>`
-    # Must come after doAnchors, because you can use < and >
-    # delimiters in inline links like [this](<url>).
-    "doAutoLinks"         =>  30,
-    "encodeAmpsAndAngles" =>  40,
-
-    "doItalicsAndBold"    =>  50,
-    "doHardBreaks"        =>  60,
-    );
-
-  function runSpanGamut($text) {
-  #
-  # Run span gamut tranformations.
-  #
-    foreach ($this->span_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-
-    return $text;
-  }
-  
-  
-  function doHardBreaks($text) {
-    # Do hard breaks:
-    return preg_replace_callback('/ {2,}\n/', 
-      array(&$this, '_doHardBreaks_callback'), $text);
-  }
-  function _doHardBreaks_callback($matches) {
-    return $this->hashPart("<br$this->empty_element_suffix\n");
-  }
-
-
-  function doAnchors($text) {
-  #
-  # Turn Markdown link shortcuts into XHTML <a> tags.
-  #
-    if ($this->in_anchor) return $text;
-    $this->in_anchor = true;
-    
-    #
-    # First, handle reference-style links: [link text] [id]
-    #
-    $text = preg_replace_callback('{
-      (         # wrap whole match in $1
-        \[
-        ('.$this->nested_brackets_re.') # link text = $2
-        \]
-
-        [ ]?        # one optional space
-        (?:\n[ ]*)?   # one optional newline followed by spaces
-
-        \[
-        (.*?)   # id = $3
-        \]
-      )
-      }xs',
-      array(&$this, '_doAnchors_reference_callback'), $text);
-
-    #
-    # Next, inline-style links: [link text](url "optional title")
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        \[
-        ('.$this->nested_brackets_re.') # link text = $2
-        \]
-        \(      # literal paren
-        [ ]*
-        (?:
-          <(\S*)> # href = $3
-        |
-          ('.$this->nested_url_parenthesis_re.')  # href = $4
-        )
-        [ ]*
-        (     # $5
-          ([\'"]) # quote char = $6
-          (.*?)   # Title = $7
-          \6    # matching quote
-          [ ]*  # ignore any spaces/tabs between closing quote and )
-        )?      # title is optional
-        \)
-      )
-      }xs',
-      array(&$this, '_DoAnchors_inline_callback'), $text);
-
-    #
-    # Last, handle reference-style shortcuts: [link text]
-    # These must come last in case you've also got [link test][1]
-    # or [link test](/foo)
-    #
-//    $text = preg_replace_callback('{
-//      (         # wrap whole match in $1
-//        \[
-//        ([^\[\]]+)    # link text = $2; can\'t contain [ or ]
-//        \]
-//      )
-//      }xs',
-//      array(&$this, '_doAnchors_reference_callback'), $text);
-
-    $this->in_anchor = false;
-    return $text;
-  }
-  function _doAnchors_reference_callback($matches) {
-    $whole_match =  $matches[1];
-    $link_text   =  $matches[2];
-    $link_id     =& $matches[3];
-
-    if ($link_id == "") {
-      # for shortcut links like [this][] or [this].
-      $link_id = $link_text;
-    }
-    
-    # lower-case and turn embedded newlines into spaces
-    $link_id = strtolower($link_id);
-    $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
-
-    if (isset($this->urls[$link_id])) {
-      $url = $this->urls[$link_id];
-      $url = $this->encodeAttribute($url);
-      
-      $result = "<a href=\"$url\"";
-      if ( isset( $this->titles[$link_id] ) ) {
-        $title = $this->titles[$link_id];
-        $title = $this->encodeAttribute($title);
-        $result .=  " title=\"$title\"";
-      }
-    
-      $link_text = $this->runSpanGamut($link_text);
-      $result .= ">$link_text</a>";
-      $result = $this->hashPart($result);
-    }
-    else {
-      $result = $whole_match;
-    }
-    return $result;
-  }
-  function _doAnchors_inline_callback($matches) {
-    $whole_match  =  $matches[1];
-    $link_text    =  $this->runSpanGamut($matches[2]);
-    $url      =  $matches[3] == '' ? $matches[4] : $matches[3];
-    $title      =& $matches[7];
-
-    $url = $this->encodeAttribute($url);
-
-    $result = "<a href=\"$url\"";
-    if (isset($title)) {
-      $title = $this->encodeAttribute($title);
-      $result .=  " title=\"$title\"";
-    }
-    
-    $link_text = $this->runSpanGamut($link_text);
-    $result .= ">$link_text</a>";
-
-    return $this->hashPart($result);
-  }
-
-
-  function doImages($text) {
-  #
-  # Turn Markdown image shortcuts into <img> tags.
-  #
-    #
-    # First, handle reference-style labeled images: ![alt text][id]
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        !\[
-        ('.$this->nested_brackets_re.')   # alt text = $2
-        \]
-
-        [ ]?        # one optional space
-        (?:\n[ ]*)?   # one optional newline followed by spaces
-
-        \[
-        (.*?)   # id = $3
-        \]
-
-      )
-      }xs', 
-      array(&$this, '_doImages_reference_callback'), $text);
-
-    #
-    # Next, handle inline images:  ![alt text](url "optional title")
-    # Don't forget: encode * and _
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        !\[
-        ('.$this->nested_brackets_re.')   # alt text = $2
-        \]
-        \s?     # One optional whitespace character
-        \(      # literal paren
-        [ ]*
-        (?:
-          <(\S*)> # src url = $3
-        |
-          ('.$this->nested_url_parenthesis_re.')  # src url = $4
-        )
-        [ ]*
-        (     # $5
-          ([\'"]) # quote char = $6
-          (.*?)   # title = $7
-          \6    # matching quote
-          [ ]*
-        )?      # title is optional
-        \)
-      )
-      }xs',
-      array(&$this, '_doImages_inline_callback'), $text);
-
-    return $text;
-  }
-  function _doImages_reference_callback($matches) {
-    $whole_match = $matches[1];
-    $alt_text    = $matches[2];
-    $link_id     = strtolower($matches[3]);
-
-    if ($link_id == "") {
-      $link_id = strtolower($alt_text); # for shortcut links like ![this][].
-    }
-
-    $alt_text = $this->encodeAttribute($alt_text);
-    if (isset($this->urls[$link_id])) {
-      $url = $this->encodeAttribute($this->urls[$link_id]);
-      $result = "<img src=\"$url\" alt=\"$alt_text\"";
-      if (isset($this->titles[$link_id])) {
-        $title = $this->titles[$link_id];
-        $title = $this->encodeAttribute($title);
-        $result .=  " title=\"$title\"";
-      }
-      $result .= $this->empty_element_suffix;
-      $result = $this->hashPart($result);
-    }
-    else {
-      # If there's no such link ID, leave intact:
-      $result = $whole_match;
-    }
-
-    return $result;
-  }
-  function _doImages_inline_callback($matches) {
-    $whole_match  = $matches[1];
-    $alt_text   = $matches[2];
-    $url      = $matches[3] == '' ? $matches[4] : $matches[3];
-    $title      =& $matches[7];
-
-    $alt_text = $this->encodeAttribute($alt_text);
-    $url = $this->encodeAttribute($url);
-    $result = "<img src=\"$url\" alt=\"$alt_text\"";
-    if (isset($title)) {
-      $title = $this->encodeAttribute($title);
-      $result .=  " title=\"$title\""; # $title already quoted
-    }
-    $result .= $this->empty_element_suffix;
-
-    return $this->hashPart($result);
-  }
-
-
-  function doHeaders($text) {
-    # Setext-style headers:
-    #   Header 1
-    #   ========
-    #  
-    #   Header 2
-    #   --------
-    #
-    $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
-      array(&$this, '_doHeaders_callback_setext'), $text);
-
-    # atx-style headers:
-    # # Header 1
-    # ## Header 2
-    # ## Header 2 with closing hashes ##
-    # ...
-    # ###### Header 6
-    #
-    $text = preg_replace_callback('{
-        ^(\#{1,6})  # $1 = string of #\'s
-        [ ]*
-        (.+?)   # $2 = Header text
-        [ ]*
-        \#*     # optional closing #\'s (not counted)
-        \n+
-      }xm',
-      array(&$this, '_doHeaders_callback_atx'), $text);
-
-    return $text;
-  }
-  function _doHeaders_callback_setext($matches) {
-    # Terrible hack to check we haven't found an empty list item.
-    if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
-      return $matches[0];
-    
-    $level = $matches[2]{0} == '=' ? 1 : 2;
-    $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-  function _doHeaders_callback_atx($matches) {
-    $level = strlen($matches[1]);
-    $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-
-
-  function doLists($text) {
-  #
-  # Form HTML ordered (numbered) and unordered (bulleted) lists.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Re-usable patterns to match list item bullets and number markers:
-    $marker_ul_re  = '[*+-]';
-    $marker_ol_re  = '\d+[.]';
-    $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
-
-    $markers_relist = array($marker_ul_re, $marker_ol_re);
-
-    foreach ($markers_relist as $marker_re) {
-      # Re-usable pattern to match any entirel ul or ol list:
-      $whole_list_re = '
-        (               # $1 = whole list
-          (               # $2
-          [ ]{0,'.$less_than_tab.'}
-          ('.$marker_re.')      # $3 = first list item marker
-          [ ]+
-          )
-          (?s:.+?)
-          (               # $4
-            \z
-          |
-            \n{2,}
-            (?=\S)
-            (?!           # Negative lookahead for another list item marker
-            [ ]*
-            '.$marker_re.'[ ]+
-            )
-          )
-        )
-      '; // mx
-      
-      # We use a different prefix before nested lists than top-level lists.
-      # See extended comment in _ProcessListItems().
-    
-      if ($this->list_level) {
-        $text = preg_replace_callback('{
-            ^
-            '.$whole_list_re.'
-          }mx',
-          array(&$this, '_doLists_callback'), $text);
-      }
-      else {
-        $text = preg_replace_callback('{
-            (?:(?<=\n)\n|\A\n?) # Must eat the newline
-            '.$whole_list_re.'
-          }mx',
-          array(&$this, '_doLists_callback'), $text);
-      }
-    }
-
-    return $text;
-  }
-  function _doLists_callback($matches) {
-    # Re-usable patterns to match list item bullets and number markers:
-    $marker_ul_re  = '[*+-]';
-    $marker_ol_re  = '\d+[.]';
-    $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
-    
-    $list = $matches[1];
-    $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
-    
-    $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
-    
-    $list .= "\n";
-    $result = $this->processListItems($list, $marker_any_re);
-    
-    $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
-    return "\n". $result ."\n\n";
-  }
-
-  var $list_level = 0;
-
-  function processListItems($list_str, $marker_any_re) {
-  #
-  # Process the contents of a single ordered or unordered list, splitting it
-  # into individual list items.
-  #
-    # The $this->list_level global keeps track of when we're inside a list.
-    # Each time we enter a list, we increment it; when we leave a list,
-    # we decrement. If it's zero, we're not in a list anymore.
-    #
-    # We do this because when we're not inside a list, we want to treat
-    # something like this:
-    #
-    #   I recommend upgrading to version
-    #   8. Oops, now this line is treated
-    #   as a sub-list.
-    #
-    # As a single paragraph, despite the fact that the second line starts
-    # with a digit-period-space sequence.
-    #
-    # Whereas when we're inside a list (or sub-list), that line will be
-    # treated as the start of a sub-list. What a kludge, huh? This is
-    # an aspect of Markdown's syntax that's hard to parse perfectly
-    # without resorting to mind-reading. Perhaps the solution is to
-    # change the syntax rules such that sub-lists must start with a
-    # starting cardinal number; e.g. "1." or "a.".
-    
-    $this->list_level++;
-
-    # trim trailing blank lines:
-    $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
-
-    $list_str = preg_replace_callback('{
-      (\n)?             # leading line = $1
-      (^[ ]*)             # leading whitespace = $2
-      ('.$marker_any_re.'       # list marker and space = $3
-        (?:[ ]+|(?=\n)) # space only required if item is not empty
-      )
-      ((?s:.*?))            # list item text   = $4
-      (?:(\n+(?=\n))|\n)        # tailing blank line = $5
-      (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
-      }xm',
-      array(&$this, '_processListItems_callback'), $list_str);
-
-    $this->list_level--;
-    return $list_str;
-  }
-  function _processListItems_callback($matches) {
-    $item = $matches[4];
-    $leading_line =& $matches[1];
-    $leading_space =& $matches[2];
-    $marker_space = $matches[3];
-    $tailing_blank_line =& $matches[5];
-
-    if ($leading_line || $tailing_blank_line || 
-      preg_match('/\n{2,}/', $item))
-    {
-      # Replace marker with the appropriate whitespace indentation
-      $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
-      $item = $this->runBlockGamut($this->outdent($item)."\n");
-    }
-    else {
-      # Recursion for sub-lists:
-      $item = $this->doLists($this->outdent($item));
-      $item = preg_replace('/\n+$/', '', $item);
-      $item = $this->runSpanGamut($item);
-    }
-
-    return "<li>" . $item . "</li>\n";
-  }
-
-
-  function doCodeBlocks($text) {
-  #
-  # Process Markdown `<pre><code>` blocks.
-  #
-    $text = preg_replace_callback('{
-        (?:\n\n|\A\n?)
-        (             # $1 = the code block -- one or more lines, starting with a space/tab
-          (?>
-          [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
-          .*\n+
-          )+
-        )
-        ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
-      }xm',
-      array(&$this, '_doCodeBlocks_callback'), $text);
-
-    return $text;
-  }
-  function _doCodeBlocks_callback($matches) {
-    $codeblock = $matches[1];
-
-    $codeblock = $this->outdent($codeblock);
-    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
-
-    # trim leading newlines and trailing newlines
-    $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
-
-    $codeblock = "<pre><code>$codeblock\n</code></pre>";
-    return "\n\n".$this->hashBlock($codeblock)."\n\n";
-  }
-
-
-  function makeCodeSpan($code) {
-  #
-  # Create a code span markup for $code. Called from handleSpanToken.
-  #
-    $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
-    return $this->hashPart("<code>$code</code>");
-  }
-
-
-  var $em_relist = array(
-    ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
-    '*' => '(?<=\S)(?<!\*)\*(?!\*)',
-    '_' => '(?<=\S)(?<!_)_(?!_)',
-    );
-  var $strong_relist = array(
-    ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
-    '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
-    '__' => '(?<=\S)(?<!_)__(?!_)',
-    );
-  var $em_strong_relist = array(
-    ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
-    '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
-    '___' => '(?<=\S)(?<!_)___(?!_)',
-    );
-  var $em_strong_prepared_relist;
-  
-  function prepareItalicsAndBold() {
-  #
-  # Prepare regular expressions for seraching emphasis tokens in any
-  # context.
-  #
-    foreach ($this->em_relist as $em => $em_re) {
-      foreach ($this->strong_relist as $strong => $strong_re) {
-        # Construct list of allowed token expressions.
-        $token_relist = array();
-        if (isset($this->em_strong_relist["$em$strong"])) {
-          $token_relist[] = $this->em_strong_relist["$em$strong"];
-        }
-        $token_relist[] = $em_re;
-        $token_relist[] = $strong_re;
-        
-        # Construct master expression from list.
-        $token_re = '{('. implode('|', $token_relist) .')}';
-        $this->em_strong_prepared_relist["$em$strong"] = $token_re;
-      }
-    }
-  }
-  
-  function doItalicsAndBold($text) {
-    $token_stack = array('');
-    $text_stack = array('');
-    $em = '';
-    $strong = '';
-    $tree_char_em = false;
-    
-    while (1) {
-      #
-      # Get prepared regular expression for seraching emphasis tokens
-      # in current context.
-      #
-      $token_re = $this->em_strong_prepared_relist["$em$strong"];
-      
-      #
-      # Each loop iteration seach for the next emphasis token. 
-      # Each token is then passed to handleSpanToken.
-      #
-      $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
-      $text_stack[0] .= $parts[0];
-      $token =& $parts[1];
-      $text =& $parts[2];
-      
-      if (empty($token)) {
-        # Reached end of text span: empty stack without emitting.
-        # any more emphasis.
-        while ($token_stack[0]) {
-          $text_stack[1] .= array_shift($token_stack);
-          $text_stack[0] .= array_shift($text_stack);
-        }
-        break;
-      }
-      
-      $token_len = strlen($token);
-      if ($tree_char_em) {
-        # Reached closing marker while inside a three-char emphasis.
-        if ($token_len == 3) {
-          # Three-char closing marker, close em and strong.
-          array_shift($token_stack);
-          $span = array_shift($text_stack);
-          $span = $this->runSpanGamut($span);
-          $span = "<strong><em>$span</em></strong>";
-          $text_stack[0] .= $this->hashPart($span);
-          $em = '';
-          $strong = '';
-        } else {
-          # Other closing marker: close one em or strong and
-          # change current token state to match the other
-          $token_stack[0] = str_repeat($token{0}, 3-$token_len);
-          $tag = $token_len == 2 ? "strong" : "em";
-          $span = $text_stack[0];
-          $span = $this->runSpanGamut($span);
-          $span = "<$tag>$span</$tag>";
-          $text_stack[0] = $this->hashPart($span);
-          $$tag = ''; # $$tag stands for $em or $strong
-        }
-        $tree_char_em = false;
-      } else if ($token_len == 3) {
-        if ($em) {
-          # Reached closing marker for both em and strong.
-          # Closing strong marker:
-          for ($i = 0; $i < 2; ++$i) {
-            $shifted_token = array_shift($token_stack);
-            $tag = strlen($shifted_token) == 2 ? "strong" : "em";
-            $span = array_shift($text_stack);
-            $span = $this->runSpanGamut($span);
-            $span = "<$tag>$span</$tag>";
-            $text_stack[0] .= $this->hashPart($span);
-            $$tag = ''; # $$tag stands for $em or $strong
-          }
-        } else {
-          # Reached opening three-char emphasis marker. Push on token 
-          # stack; will be handled by the special condition above.
-          $em = $token{0};
-          $strong = "$em$em";
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $tree_char_em = true;
-        }
-      } else if ($token_len == 2) {
-        if ($strong) {
-          # Unwind any dangling emphasis marker:
-          if (strlen($token_stack[0]) == 1) {
-            $text_stack[1] .= array_shift($token_stack);
-            $text_stack[0] .= array_shift($text_stack);
-          }
-          # Closing strong marker:
-          array_shift($token_stack);
-          $span = array_shift($text_stack);
-          $span = $this->runSpanGamut($span);
-          $span = "<strong>$span</strong>";
-          $text_stack[0] .= $this->hashPart($span);
-          $strong = '';
-        } else {
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $strong = $token;
-        }
-      } else {
-        # Here $token_len == 1
-        if ($em) {
-          if (strlen($token_stack[0]) == 1) {
-            # Closing emphasis marker:
-            array_shift($token_stack);
-            $span = array_shift($text_stack);
-            $span = $this->runSpanGamut($span);
-            $span = "<em>$span</em>";
-            $text_stack[0] .= $this->hashPart($span);
-            $em = '';
-          } else {
-            $text_stack[0] .= $token;
-          }
-        } else {
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $em = $token;
-        }
-      }
-    }
-    return $text_stack[0];
-  }
-
-
-  function doBlockQuotes($text) {
-    $text = preg_replace_callback('/
-        (               # Wrap whole match in $1
-        (?>
-          ^[ ]*>[ ]?      # ">" at the start of a line
-          .+\n          # rest of the first line
-          (.+\n)*         # subsequent consecutive lines
-          \n*           # blanks
-        )+
-        )
-      /xm',
-      array(&$this, '_doBlockQuotes_callback'), $text);
-
-    return $text;
-  }
-  function _doBlockQuotes_callback($matches) {
-    $bq = $matches[1];
-    # trim one level of quoting - trim whitespace-only lines
-    $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
-    $bq = $this->runBlockGamut($bq);    # recurse
-
-    $bq = preg_replace('/^/m', "  ", $bq);
-    # These leading spaces cause problem with <pre> content, 
-    # so we need to fix that:
-    $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
-      array(&$this, '_DoBlockQuotes_callback2'), $bq);
-
-    return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
-  }
-  function _doBlockQuotes_callback2($matches) {
-    $pre = $matches[1];
-    $pre = preg_replace('/^  /m', '', $pre);
-    return $pre;
-  }
-
-
-  function formParagraphs($text) {
-  #
-  # Params:
-  #   $text - string to process with html <p> tags
-  #
-    # Strip leading and trailing lines:
-    $text = preg_replace('/\A\n+|\n+\z/', '', $text);
-
-    $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
-
-    #
-    # Wrap <p> tags and unhashify HTML blocks
-    #
-    foreach ($grafs as $key => $value) {
-      if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
-        # Is a paragraph.
-        $value = $this->runSpanGamut($value);
-        $value = preg_replace('/^([ ]*)/', "<p>", $value);
-        $value .= "</p>";
-        $grafs[$key] = $this->unhash($value);
-      }
-      else {
-        # Is a block.
-        # Modify elements of @grafs in-place...
-        $graf = $value;
-        $block = $this->html_hashes[$graf];
-        $graf = $block;
-//        if (preg_match('{
-//          \A
-//          (             # $1 = <div> tag
-//            <div  \s+
-//            [^>]*
-//            \b
-//            markdown\s*=\s*  ([\'"])  # $2 = attr quote char
-//            1
-//            \2
-//            [^>]*
-//            >
-//          )
-//          (             # $3 = contents
-//          .*
-//          )
-//          (</div>)          # $4 = closing tag
-//          \z
-//          }xs', $block, $matches))
-//        {
-//          list(, $div_open, , $div_content, $div_close) = $matches;
-//
-//          # We can't call Markdown(), because that resets the hash;
-//          # that initialization code should be pulled into its own sub, though.
-//          $div_content = $this->hashHTMLBlocks($div_content);
-//          
-//          # Run document gamut methods on the content.
-//          foreach ($this->document_gamut as $method => $priority) {
-//            $div_content = $this->$method($div_content);
-//          }
-//
-//          $div_open = preg_replace(
-//            '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
-//
-//          $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
-//        }
-        $grafs[$key] = $graf;
-      }
-    }
-
-    return implode("\n\n", $grafs);
-  }
-
-
-  function encodeAttribute($text) {
-  #
-  # Encode text for a double-quoted HTML attribute. This function
-  # is *not* suitable for attributes enclosed in single quotes.
-  #
-    $text = $this->encodeAmpsAndAngles($text);
-    $text = str_replace('"', '&quot;', $text);
-    return $text;
-  }
-  
-  
-  function encodeAmpsAndAngles($text) {
-  #
-  # Smart processing for ampersands and angle brackets that need to 
-  # be encoded. Valid character entities are left alone unless the
-  # no-entities mode is set.
-  #
-    if ($this->no_entities) {
-      $text = str_replace('&', '&amp;', $text);
-    } else {
-      # Ampersand-encoding based entirely on Nat Irons's Amputator
-      # MT plugin: <http://bumppo.net/projects/amputator/>
-      $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
-                '&amp;', $text);;
-    }
-    # Encode remaining <'s
-    $text = str_replace('<', '&lt;', $text);
-
-    return $text;
-  }
-
-
-  function doAutoLinks($text) {
-    $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
-      array(&$this, '_doAutoLinks_url_callback'), $text);
-
-    # Email addresses: <address@domain.foo>
-    $text = preg_replace_callback('{
-      <
-      (?:mailto:)?
-      (
-        [-.\w\x80-\xFF]+
-        \@
-        [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
-      )
-      >
-      }xi',
-      array(&$this, '_doAutoLinks_email_callback'), $text);
-
-    return $text;
-  }
-  function _doAutoLinks_url_callback($matches) {
-    $url = $this->encodeAttribute($matches[1]);
-    $link = "<a href=\"$url\">$url</a>";
-    return $this->hashPart($link);
-  }
-  function _doAutoLinks_email_callback($matches) {
-    $address = $matches[1];
-    $link = $this->encodeEmailAddress($address);
-    return $this->hashPart($link);
-  }
-
-
-  function encodeEmailAddress($addr) {
-  #
-  # Input: an email address, e.g. "foo@example.com"
-  #
-  # Output: the email address as a mailto link, with each character
-  #   of the address encoded as either a decimal or hex entity, in
-  #   the hopes of foiling most address harvesting spam bots. E.g.:
-  #
-  #   <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
-  #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
-  #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
-  #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
-  #
-  # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
-  #   With some optimizations by Milian Wolff.
-  #
-    $addr = "mailto:" . $addr;
-    $chars = preg_split('/(?<!^)(?!$)/', $addr);
-    $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
-    
-    foreach ($chars as $key => $char) {
-      $ord = ord($char);
-      # Ignore non-ascii chars.
-      if ($ord < 128) {
-        $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
-        # roughly 10% raw, 45% hex, 45% dec
-        # '@' *must* be encoded. I insist.
-        if ($r > 90 && $char != '@') /* do nothing */;
-        else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
-        else              $chars[$key] = '&#'.$ord.';';
-      }
-    }
-    
-    $addr = implode('', $chars);
-    $text = implode('', array_slice($chars, 7)); # text without `mailto:`
-    $addr = "<a href=\"$addr\">$text</a>";
-
-    return $addr;
-  }
-
-
-  function parseSpan($str) {
-  #
-  # Take the string $str and parse it into tokens, hashing embeded HTML,
-  # escaped characters and handling code spans.
-  #
-    $output = '';
-    
-    $span_re = '{
-        (
-          \\\\'.$this->escape_chars_re.'
-        |
-          (?<![`\\\\])
-          `+            # code span marker
-      '.( $this->no_markup ? '' : '
-        |
-          <!--    .*?     -->   # comment
-        |
-          <\?.*?\?> | <%.*?%>   # processing instruction
-        |
-          <[/!$]?[-a-zA-Z0-9:]+ # regular tags
-          (?>
-            \s
-            (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
-          )?
-          >
-      ').'
-        )
-        }xs';
-
-    while (1) {
-      #
-      # Each loop iteration seach for either the next tag, the next 
-      # openning code span marker, or the next escaped character. 
-      # Each token is then passed to handleSpanToken.
-      #
-      $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
-      
-      # Create token from text preceding tag.
-      if ($parts[0] != "") {
-        $output .= $parts[0];
-      }
-      
-      # Check if we reach the end.
-      if (isset($parts[1])) {
-        $output .= $this->handleSpanToken($parts[1], $parts[2]);
-        $str = $parts[2];
-      }
-      else {
-        break;
-      }
-    }
-    
-    return $output;
-  }
-  
-  
-  function handleSpanToken($token, &$str) {
-  #
-  # Handle $token provided by parseSpan by determining its nature and 
-  # returning the corresponding value that should replace it.
-  #
-    switch ($token{0}) {
-      case "\\":
-        return $this->hashPart("&#". ord($token{1}). ";");
-      case "`":
-        # Search for end marker in remaining text.
-        if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
-          $str, $matches))
-        {
-          $str = $matches[2];
-          $codespan = $this->makeCodeSpan($matches[1]);
-          return $this->hashPart($codespan);
-        }
-        return $token; // return as text since no ending marker found.
-      default:
-        return $this->hashPart($token);
-    }
-  }
-
-
-  function outdent($text) {
-  #
-  # Remove one level of line-leading tabs or spaces
-  #
-    return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
-  }
-
-
-  # String length function for detab. `_initDetab` will create a function to 
-  # hanlde UTF-8 if the default function does not exist.
-  var $utf8_strlen = 'mb_strlen';
-  
-  function detab($text) {
-  #
-  # Replace tabs with the appropriate amount of space.
-  #
-    # For each line we separate the line in blocks delemited by
-    # tab characters. Then we reconstruct every line by adding the 
-    # appropriate number of space between each blocks.
-    
-    $text = preg_replace_callback('/^.*\t.*$/m',
-      array(&$this, '_detab_callback'), $text);
-
-    return $text;
-  }
-  function _detab_callback($matches) {
-    $line = $matches[0];
-    $strlen = $this->utf8_strlen; # strlen function for UTF-8.
-    
-    # Split in blocks.
-    $blocks = explode("\t", $line);
-    # Add each blocks to the line.
-    $line = $blocks[0];
-    unset($blocks[0]); # Do not add first block twice.
-    foreach ($blocks as $block) {
-      # Calculate amount of space, insert spaces, insert block.
-      $amount = $this->tab_width - 
-        $strlen($line, 'UTF-8') % $this->tab_width;
-      $line .= str_repeat(" ", $amount) . $block;
-    }
-    return $line;
-  }
-  function _initDetab() {
-  #
-  # Check for the availability of the function in the `utf8_strlen` property
-  # (initially `mb_strlen`). If the function is not available, create a 
-  # function that will loosely count the number of UTF-8 characters with a
-  # regular expression.
-  #
-    if (function_exists($this->utf8_strlen)) return;
-    $this->utf8_strlen = create_function('$text', 'return preg_match_all(
-      "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
-      $text, $m);');
-  }
-
-
-  function unhash($text) {
-  #
-  # Swap back in all the tags hashed by _HashHTMLBlocks.
-  #
-    return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
-      array(&$this, '_unhash_callback'), $text);
-  }
-  function _unhash_callback($matches) {
-    return $this->html_hashes[$matches[0]];
-  }
-
-}
-
-
-#
-# Markdown Extra Parser Class
-#
-
-class MarkdownExtra_Parser extends Markdown_Parser {
-
-  # Prefix for footnote ids.
-  var $fn_id_prefix = "";
-  
-  # Optional title attribute for footnote links and backlinks.
-  var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
-  var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
-  
-  # Optional class attribute for footnote links and backlinks.
-  var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
-  var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
-  
-  # Predefined abbreviations.
-  var $predef_abbr = array();
-
-
-  function MarkdownExtra_Parser() {
-  #
-  # Constructor function. Initialize the parser object.
-  #
-    # Add extra escapable characters before parent constructor 
-    # initialize the table.
-    $this->escape_chars .= ':|';
-    
-    # Insert extra document, block, and span transformations. 
-    # Parent constructor will do the sorting.
-    $this->document_gamut += array(
-      "doFencedCodeBlocks" => 5,
-      "stripFootnotes"     => 15,
-      "stripAbbreviations" => 25,
-      "appendFootnotes"    => 50,
-      );
-    $this->block_gamut += array(
-      "doFencedCodeBlocks" => 5,
-      "doTables"           => 15,
-      "doDefLists"         => 45,
-      );
-    $this->span_gamut += array(
-      "doFootnotes"        => 5,
-      "doAbbreviations"    => 70,
-      );
-    
-    parent::Markdown_Parser();
-  }
-  
-  
-  # Extra variables used during extra transformations.
-  var $footnotes = array();
-  var $footnotes_ordered = array();
-  var $abbr_desciptions = array();
-  var $abbr_word_re = '';
-  
-  # Give the current footnote number.
-  var $footnote_counter = 1;
-  
-  
-  function setup() {
-  #
-  # Setting up Extra-specific variables.
-  #
-    parent::setup();
-    
-    $this->footnotes = array();
-    $this->footnotes_ordered = array();
-    $this->abbr_desciptions = array();
-    $this->abbr_word_re = '';
-    $this->footnote_counter = 1;
-    
-    foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
-      if ($this->abbr_word_re)
-        $this->abbr_word_re .= '|';
-      $this->abbr_word_re .= preg_quote($abbr_word);
-      $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
-    }
-  }
-  
-  function teardown() {
-  #
-  # Clearing Extra-specific variables.
-  #
-    $this->footnotes = array();
-    $this->footnotes_ordered = array();
-    $this->abbr_desciptions = array();
-    $this->abbr_word_re = '';
-    
-    parent::teardown();
-  }
-  
-  
-  ### HTML Block Parser ###
-  
-  # Tags that are always treated as block tags:
-  var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
-  
-  # Tags treated as block tags only if the opening tag is alone on it's line:
-  var $context_block_tags_re = 'script|noscript|math|ins|del';
-  
-  # Tags where markdown="1" default to span mode:
-  var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
-  
-  # Tags which must not have their contents modified, no matter where 
-  # they appear:
-  var $clean_tags_re = 'script|math';
-  
-  # Tags that do not need to be closed.
-  var $auto_close_tags_re = 'hr|img';
-  
-
-  function hashHTMLBlocks($text) {
-  #
-  # Hashify HTML Blocks and "clean tags".
-  #
-  # We only want to do this for block-level HTML tags, such as headers,
-  # lists, and tables. That's because we still want to wrap <p>s around
-  # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
-  # phrase emphasis, and spans. The list of tags we're looking for is
-  # hard-coded.
-  #
-  # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
-  # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
-  # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
-  #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
-  # These two functions are calling each other. It's recursive!
-  #
-    #
-    # Call the HTML-in-Markdown hasher.
-    #
-    list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
-    
-    return $text;
-  }
-  function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 
-                    $enclosing_tag_re = '', $span = false)
-  {
-  #
-  # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
-  #
-  # *   $indent is the number of space to be ignored when checking for code 
-  #     blocks. This is important because if we don't take the indent into 
-  #     account, something like this (which looks right) won't work as expected:
-  #
-  #     <div>
-  #         <div markdown="1">
-  #         Hello World.  <-- Is this a Markdown code block or text?
-  #         </div>  <-- Is this a Markdown code block or a real tag?
-  #     <div>
-  #
-  #     If you don't like this, just don't indent the tag on which
-  #     you apply the markdown="1" attribute.
-  #
-  # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
-  #     tag with that name. Nested tags supported.
-  #
-  # *   If $span is true, text inside must treated as span. So any double 
-  #     newline will be replaced by a single newline so that it does not create 
-  #     paragraphs.
-  #
-  # Returns an array of that form: ( processed text , remaining text )
-  #
-    if ($text === '') return array('', '');
-
-    # Regex to check for the presense of newlines around a block tag.
-    $newline_before_re = '/(?:^\n?|\n\n)*$/';
-    $newline_after_re = 
-      '{
-        ^           # Start of text following the tag.
-        (?>[ ]*<!--.*?-->)?   # Optional comment.
-        [ ]*\n          # Must be followed by newline.
-      }xs';
-    
-    # Regex to match any tag.
-    $block_tag_re =
-      '{
-        (         # $2: Capture hole tag.
-          </?         # Any opening or closing tag.
-            (?>       # Tag name.
-              '.$this->block_tags_re.'      |
-              '.$this->context_block_tags_re.'  |
-              '.$this->clean_tags_re.'          |
-              (?!\s)'.$enclosing_tag_re.'
-            )
-            (?:
-              (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
-              (?>
-                ".*?"   | # Double quotes (can contain `>`)
-                \'.*?\'     | # Single quotes (can contain `>`)
-                .+?       # Anything but quotes and `>`.
-              )*?
-            )?
-          >         # End of tag.
-        |
-          <!--    .*?     --> # HTML Comment
-        |
-          <\?.*?\?> | <%.*?%> # Processing instruction
-        |
-          <!\[CDATA\[.*?\]\]> # CData Block
-        |
-          # Code span marker
-          `+
-        '. ( !$span ? ' # If not in span.
-        |
-          # Indented code block
-          (?> ^[ ]*\n? | \n[ ]*\n )
-          [ ]{'.($indent+4).'}[^\n]* \n
-          (?>
-            (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
-          )*
-        |
-          # Fenced code block marker
-          (?> ^ | \n )
-          [ ]{'.($indent).'}~~~+[ ]*\n
-        ' : '' ). ' # End (if not is span).
-        )
-      }xs';
-
-    
-    $depth = 0;   # Current depth inside the tag tree.
-    $parsed = ""; # Parsed text that will be returned.
-
-    #
-    # Loop through every tag until we find the closing tag of the parent
-    # or loop until reaching the end of text if no parent tag specified.
-    #
-    do {
-      #
-      # Split the text using the first $tag_match pattern found.
-      # Text before  pattern will be first in the array, text after
-      # pattern will be at the end, and between will be any catches made 
-      # by the pattern.
-      #
-      $parts = preg_split($block_tag_re, $text, 2, 
-                PREG_SPLIT_DELIM_CAPTURE);
-      
-      # If in Markdown span mode, add a empty-string span-level hash 
-      # after each newline to prevent triggering any block element.
-      if ($span) {
-        $void = $this->hashPart("", ':');
-        $newline = "$void\n";
-        $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
-      }
-      
-      $parsed .= $parts[0]; # Text before current tag.
-      
-      # If end of $text has been reached. Stop loop.
-      if (count($parts) < 3) {
-        $text = "";
-        break;
-      }
-      
-      $tag  = $parts[1]; # Tag to handle.
-      $text = $parts[2]; # Remaining text after current tag.
-      $tag_re = preg_quote($tag); # For use in a regular expression.
-      
-      #
-      # Check for: Code span marker
-      #
-      if ($tag{0} == "`") {
-        # Find corresponding end marker.
-        $tag_re = preg_quote($tag);
-        if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
-          $text, $matches))
-        {
-          # End marker found: pass text unchanged until marker.
-          $parsed .= $tag . $matches[0];
-          $text = substr($text, strlen($matches[0]));
-        }
-        else {
-          # Unmatched marker: just skip it.
-          $parsed .= $tag;
-        }
-      }
-      #
-      # Check for: Indented code block or fenced code block marker.
-      #
-      else if ($tag{0} == "\n" || $tag{0} == "~") {
-        if ($tag{1} == "\n" || $tag{1} == " ") {
-          # Indented code block: pass it unchanged, will be handled 
-          # later.
-          $parsed .= $tag;
-        }
-        else {
-          # Fenced code block marker: find matching end marker.
-          $tag_re = preg_quote(trim($tag));
-          if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, 
-            $matches)) 
-          {
-            # End marker found: pass text unchanged until marker.
-            $parsed .= $tag . $matches[0];
-            $text = substr($text, strlen($matches[0]));
-          }
-          else {
-            # No end marker: just skip it.
-            $parsed .= $tag;
-          }
-        }
-      }
-      #
-      # Check for: Opening Block level tag or
-      #            Opening Context Block tag (like ins and del) 
-      #               used as a block tag (tag is alone on it's line).
-      #
-      else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
-        ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
-          preg_match($newline_before_re, $parsed) &&
-          preg_match($newline_after_re, $text)  )
-        )
-      {
-        # Need to parse tag and following text using the HTML parser.
-        list($block_text, $text) = 
-          $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
-        
-        # Make sure it stays outside of any paragraph by adding newlines.
-        $parsed .= "\n\n$block_text\n\n";
-      }
-      #
-      # Check for: Clean tag (like script, math)
-      #            HTML Comments, processing instructions.
-      #
-      else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
-        $tag{1} == '!' || $tag{1} == '?')
-      {
-        # Need to parse tag and following text using the HTML parser.
-        # (don't check for markdown attribute)
-        list($block_text, $text) = 
-          $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
-        
-        $parsed .= $block_text;
-      }
-      #
-      # Check for: Tag with same name as enclosing tag.
-      #
-      else if ($enclosing_tag_re !== '' &&
-        # Same name as enclosing tag.
-        preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
-      {
-        #
-        # Increase/decrease nested tag count.
-        #
-        if ($tag{1} == '/')           $depth--;
-        else if ($tag{strlen($tag)-2} != '/') $depth++;
-
-        if ($depth < 0) {
-          #
-          # Going out of parent element. Clean up and break so we
-          # return to the calling function.
-          #
-          $text = $tag . $text;
-          break;
-        }
-        
-        $parsed .= $tag;
-      }
-      else {
-        $parsed .= $tag;
-      }
-    } while ($depth >= 0);
-    
-    return array($parsed, $text);
-  }
-  function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
-  #
-  # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
-  #
-  # *   Calls $hash_method to convert any blocks.
-  # *   Stops when the first opening tag closes.
-  # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
-  #     (it is not inside clean tags)
-  #
-  # Returns an array of that form: ( processed text , remaining text )
-  #
-    if ($text === '') return array('', '');
-    
-    # Regex to match `markdown` attribute inside of a tag.
-    $markdown_attr_re = '
-      {
-        \s*     # Eat whitespace before the `markdown` attribute
-        markdown
-        \s*=\s*
-        (?>
-          (["\'])   # $1: quote delimiter   
-          (.*?)   # $2: attribute value
-          \1      # matching delimiter  
-        |
-          ([^\s>]*) # $3: unquoted attribute value
-        )
-        ()        # $4: make $3 always defined (avoid warnings)
-      }xs';
-    
-    # Regex to match any tag.
-    $tag_re = '{
-        (         # $2: Capture hole tag.
-          </?         # Any opening or closing tag.
-            [\w:$]+     # Tag name.
-            (?:
-              (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
-              (?>
-                ".*?"   | # Double quotes (can contain `>`)
-                \'.*?\'     | # Single quotes (can contain `>`)
-                .+?       # Anything but quotes and `>`.
-              )*?
-            )?
-          >         # End of tag.
-        |
-          <!--    .*?     --> # HTML Comment
-        |
-          <\?.*?\?> | <%.*?%> # Processing instruction
-        |
-          <!\[CDATA\[.*?\]\]> # CData Block
-        )
-      }xs';
-    
-    $original_text = $text;   # Save original text in case of faliure.
-    
-    $depth    = 0;  # Current depth inside the tag tree.
-    $block_text = ""; # Temporary text holder for current text.
-    $parsed   = ""; # Parsed text that will be returned.
-
-    #
-    # Get the name of the starting tag.
-    # (This pattern makes $base_tag_name_re safe without quoting.)
-    #
-    if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
-      $base_tag_name_re = $matches[1];
-
-    #
-    # Loop through every tag until we find the corresponding closing tag.
-    #
-    do {
-      #
-      # Split the text using the first $tag_match pattern found.
-      # Text before  pattern will be first in the array, text after
-      # pattern will be at the end, and between will be any catches made 
-      # by the pattern.
-      #
-      $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
-      
-      if (count($parts) < 3) {
-        #
-        # End of $text reached with unbalenced tag(s).
-        # In that case, we return original text unchanged and pass the
-        # first character as filtered to prevent an infinite loop in the 
-        # parent function.
-        #
-        return array($original_text{0}, substr($original_text, 1));
-      }
-      
-      $block_text .= $parts[0]; # Text before current tag.
-      $tag         = $parts[1]; # Tag to handle.
-      $text        = $parts[2]; # Remaining text after current tag.
-      
-      #
-      # Check for: Auto-close tag (like <hr/>)
-      #      Comments and Processing Instructions.
-      #
-      if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
-        $tag{1} == '!' || $tag{1} == '?')
-      {
-        # Just add the tag to the block as if it was text.
-        $block_text .= $tag;
-      }
-      else {
-        #
-        # Increase/decrease nested tag count. Only do so if
-        # the tag's name match base tag's.
-        #
-        if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
-          if ($tag{1} == '/')           $depth--;
-          else if ($tag{strlen($tag)-2} != '/') $depth++;
-        }
-        
-        #
-        # Check for `markdown="1"` attribute and handle it.
-        #
-        if ($md_attr && 
-          preg_match($markdown_attr_re, $tag, $attr_m) &&
-          preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
-        {
-          # Remove `markdown` attribute from opening tag.
-          $tag = preg_replace($markdown_attr_re, '', $tag);
-          
-          # Check if text inside this tag must be parsed in span mode.
-          $this->mode = $attr_m[2] . $attr_m[3];
-          $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
-            preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
-          
-          # Calculate indent before tag.
-          if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
-            $strlen = $this->utf8_strlen;
-            $indent = $strlen($matches[1], 'UTF-8');
-          } else {
-            $indent = 0;
-          }
-          
-          # End preceding block with this tag.
-          $block_text .= $tag;
-          $parsed .= $this->$hash_method($block_text);
-          
-          # Get enclosing tag name for the ParseMarkdown function.
-          # (This pattern makes $tag_name_re safe without quoting.)
-          preg_match('/^<([\w:$]*)\b/', $tag, $matches);
-          $tag_name_re = $matches[1];
-          
-          # Parse the content using the HTML-in-Markdown parser.
-          list ($block_text, $text)
-            = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
-              $tag_name_re, $span_mode);
-          
-          # Outdent markdown text.
-          if ($indent > 0) {
-            $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
-                          $block_text);
-          }
-          
-          # Append tag content to parsed text.
-          if (!$span_mode)  $parsed .= "\n\n$block_text\n\n";
-          else        $parsed .= "$block_text";
-          
-          # Start over a new block.
-          $block_text = "";
-        }
-        else $block_text .= $tag;
-      }
-      
-    } while ($depth > 0);
-    
-    #
-    # Hash last block text that wasn't processed inside the loop.
-    #
-    $parsed .= $this->$hash_method($block_text);
-    
-    return array($parsed, $text);
-  }
-
-
-  function hashClean($text) {
-  #
-  # Called whenever a tag must be hashed when a function insert a "clean" tag
-  # in $text, it pass through this function and is automaticaly escaped, 
-  # blocking invalid nested overlap.
-  #
-    return $this->hashPart($text, 'C');
-  }
-
-
-  function doHeaders($text) {
-  #
-  # Redefined to add id attribute support.
-  #
-    # Setext-style headers:
-    #   Header 1  {#header1}
-    #   ========
-    #  
-    #   Header 2  {#header2}
-    #   --------
-    #
-    $text = preg_replace_callback(
-      '{
-        (^.+?)                # $1: Header text
-        (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?  # $2: Id attribute
-        [ ]*\n(=+|-+)[ ]*\n+        # $3: Header footer
-      }mx',
-      array(&$this, '_doHeaders_callback_setext'), $text);
-
-    # atx-style headers:
-    # # Header 1        {#header1}
-    # ## Header 2       {#header2}
-    # ## Header 2 with closing hashes ##  {#header3}
-    # ...
-    # ###### Header 6   {#header2}
-    #
-    $text = preg_replace_callback('{
-        ^(\#{1,6})  # $1 = string of #\'s
-        [ ]*
-        (.+?)   # $2 = Header text
-        [ ]*
-        \#*     # optional closing #\'s (not counted)
-        (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
-        [ ]*
-        \n+
-      }xm',
-      array(&$this, '_doHeaders_callback_atx'), $text);
-
-    return $text;
-  }
-  function _doHeaders_attr($attr) {
-    if (empty($attr))  return "";
-    return " id=\"$attr\"";
-  }
-  function _doHeaders_callback_setext($matches) {
-    if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
-      return $matches[0];
-    $level = $matches[3]{0} == '=' ? 1 : 2;
-    $attr  = $this->_doHeaders_attr($id =& $matches[2]);
-    $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-  function _doHeaders_callback_atx($matches) {
-    $level = strlen($matches[1]);
-    $attr  = $this->_doHeaders_attr($id =& $matches[3]);
-    $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-
-
-  function doTables($text) {
-  #
-  # Form HTML tables.
-  #
-    $less_than_tab = $this->tab_width - 1;
-    #
-    # Find tables with leading pipe.
-    #
-    # | Header 1 | Header 2
-    # | -------- | --------
-    # | Cell 1   | Cell 2
-    # | Cell 3   | Cell 4
-    #
-    $text = preg_replace_callback('
-      {
-        ^             # Start of a line
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        [|]             # Optional leading pipe (present)
-        (.+) \n           # $1: Header row (at least one pipe)
-        
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
-        
-        (             # $3: Cells
-          (?>
-            [ ]*        # Allowed whitespace.
-            [|] .* \n     # Row content.
-          )*
-        )
-        (?=\n|\Z)         # Stop at final double newline.
-      }xm',
-      array(&$this, '_doTable_leadingPipe_callback'), $text);
-    
-    #
-    # Find tables without leading pipe.
-    #
-    # Header 1 | Header 2
-    # -------- | --------
-    # Cell 1   | Cell 2
-    # Cell 3   | Cell 4
-    #
-    $text = preg_replace_callback('
-      {
-        ^             # Start of a line
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        (\S.*[|].*) \n        # $1: Header row (at least one pipe)
-        
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        ([-:]+[ ]*[|][-| :]*) \n  # $2: Header underline
-        
-        (             # $3: Cells
-          (?>
-            .* [|] .* \n    # Row content
-          )*
-        )
-        (?=\n|\Z)         # Stop at final double newline.
-      }xm',
-      array(&$this, '_DoTable_callback'), $text);
-
-    return $text;
-  }
-  function _doTable_leadingPipe_callback($matches) {
-    $head   = $matches[1];
-    $underline  = $matches[2];
-    $content  = $matches[3];
-    
-    # Remove leading pipe for each row.
-    $content  = preg_replace('/^ *[|]/m', '', $content);
-    
-    return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
-  }
-  function _doTable_callback($matches) {
-    $head   = $matches[1];
-    $underline  = $matches[2];
-    $content  = $matches[3];
-
-    # Remove any tailing pipes for each line.
-    $head   = preg_replace('/[|] *$/m', '', $head);
-    $underline  = preg_replace('/[|] *$/m', '', $underline);
-    $content  = preg_replace('/[|] *$/m', '', $content);
-    
-    # Reading alignement from header underline.
-    $separators = preg_split('/ *[|] */', $underline);
-    foreach ($separators as $n => $s) {
-      if (preg_match('/^ *-+: *$/', $s))    $attr[$n] = ' align="right"';
-      else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
-      else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
-      else                  $attr[$n] = '';
-    }
-    
-    # Parsing span elements, including code spans, character escapes, 
-    # and inline HTML tags, so that pipes inside those gets ignored.
-    $head   = $this->parseSpan($head);
-    $headers  = preg_split('/ *[|] */', $head);
-    $col_count  = count($headers);
-    
-    # Write column headers.
-    $text = "<table>\n";
-    $text .= "<thead>\n";
-    $text .= "<tr>\n";
-    foreach ($headers as $n => $header)
-      $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
-    $text .= "</tr>\n";
-    $text .= "</thead>\n";
-    
-    # Split content by row.
-    $rows = explode("\n", trim($content, "\n"));
-    
-    $text .= "<tbody>\n";
-    foreach ($rows as $row) {
-      # Parsing span elements, including code spans, character escapes, 
-      # and inline HTML tags, so that pipes inside those gets ignored.
-      $row = $this->parseSpan($row);
-      
-      # Split row by cell.
-      $row_cells = preg_split('/ *[|] */', $row, $col_count);
-      $row_cells = array_pad($row_cells, $col_count, '');
-      
-      $text .= "<tr>\n";
-      foreach ($row_cells as $n => $cell)
-        $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
-      $text .= "</tr>\n";
-    }
-    $text .= "</tbody>\n";
-    $text .= "</table>";
-    
-    return $this->hashBlock($text) . "\n";
-  }
-
-  
-  function doDefLists($text) {
-  #
-  # Form HTML definition lists.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Re-usable pattern to match any entire dl list:
-    $whole_list_re = '(?>
-      (               # $1 = whole list
-        (               # $2
-        [ ]{0,'.$less_than_tab.'}
-        ((?>.*\S.*\n)+)       # $3 = defined term
-        \n?
-        [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-        )
-        (?s:.+?)
-        (               # $4
-          \z
-        |
-          \n{2,}
-          (?=\S)
-          (?!           # Negative lookahead for another term
-          [ ]{0,'.$less_than_tab.'}
-          (?: \S.*\n )+?      # defined term
-          \n?
-          [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-          )
-          (?!           # Negative lookahead for another definition
-          [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-          )
-        )
-      )
-    )'; // mx
-
-    $text = preg_replace_callback('{
-        (?>\A\n?|(?<=\n\n))
-        '.$whole_list_re.'
-      }mx',
-      array(&$this, '_doDefLists_callback'), $text);
-
-    return $text;
-  }
-  function _doDefLists_callback($matches) {
-    # Re-usable patterns to match list item bullets and number markers:
-    $list = $matches[1];
-    
-    # Turn double returns into triple returns, so that we can make a
-    # paragraph for the last item in a list, if necessary:
-    $result = trim($this->processDefListItems($list));
-    $result = "<dl>\n" . $result . "\n</dl>";
-    return $this->hashBlock($result) . "\n\n";
-  }
-
-
-  function processDefListItems($list_str) {
-  #
-  # Process the contents of a single definition list, splitting it
-  # into individual term and definition list items.
-  #
-    $less_than_tab = $this->tab_width - 1;
-    
-    # trim trailing blank lines:
-    $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
-
-    # Process definition terms.
-    $list_str = preg_replace_callback('{
-      (?>\A\n?|\n\n+)         # leading line
-      (               # definition terms = $1
-        [ ]{0,'.$less_than_tab.'} # leading whitespace
-        (?![:][ ]|[ ])        # negative lookahead for a definition 
-                      #   mark (colon) or more whitespace.
-        (?> \S.* \n)+?        # actual term (not whitespace). 
-      )     
-      (?=\n?[ ]{0,3}:[ ])       # lookahead for following line feed 
-                      #   with a definition mark.
-      }xm',
-      array(&$this, '_processDefListItems_callback_dt'), $list_str);
-
-    # Process actual definitions.
-    $list_str = preg_replace_callback('{
-      \n(\n+)?            # leading line = $1
-      (               # marker space = $2
-        [ ]{0,'.$less_than_tab.'} # whitespace before colon
-        [:][ ]+           # definition mark (colon)
-      )
-      ((?s:.+?))            # definition text = $3
-      (?= \n+             # stop at next definition mark,
-        (?:             # next term or end of text
-          [ ]{0,'.$less_than_tab.'} [:][ ]  |
-          <dt> | \z
-        )           
-      )         
-      }xm',
-      array(&$this, '_processDefListItems_callback_dd'), $list_str);
-
-    return $list_str;
-  }
-  function _processDefListItems_callback_dt($matches) {
-    $terms = explode("\n", trim($matches[1]));
-    $text = '';
-    foreach ($terms as $term) {
-      $term = $this->runSpanGamut(trim($term));
-      $text .= "\n<dt>" . $term . "</dt>";
-    }
-    return $text . "\n";
-  }
-  function _processDefListItems_callback_dd($matches) {
-    $leading_line = $matches[1];
-    $marker_space = $matches[2];
-    $def      = $matches[3];
-
-    if ($leading_line || preg_match('/\n{2,}/', $def)) {
-      # Replace marker with the appropriate whitespace indentation
-      $def = str_repeat(' ', strlen($marker_space)) . $def;
-      $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
-      $def = "\n". $def ."\n";
-    }
-    else {
-      $def = rtrim($def);
-      $def = $this->runSpanGamut($this->outdent($def));
-    }
-
-    return "\n<dd>" . $def . "</dd>\n";
-  }
-
-
-  function doFencedCodeBlocks($text) {
-  #
-  # Adding the fenced code block syntax to regular Markdown:
-  #
-  # ~~~
-  # Code block
-  # ~~~
-  #
-    $less_than_tab = $this->tab_width;
-    
-    $text = preg_replace_callback('{
-        (?:\n|\A)
-        # 1: Opening marker
-        (
-          ~{3,} # Marker: three tilde or more.
-        )
-        [ ]* \n # Whitespace and newline following marker.
-        
-        # 2: Content
-        (
-          (?>
-            (?!\1 [ ]* \n)  # Not a closing marker.
-            .*\n+
-          )+
-        )
-        
-        # Closing marker.
-        \1 [ ]* \n
-      }xm',
-      array(&$this, '_doFencedCodeBlocks_callback'), $text);
-
-    return $text;
-  }
-  function _doFencedCodeBlocks_callback($matches) {
-    $codeblock = $matches[2];
-    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
-    $codeblock = preg_replace_callback('/^\n+/',
-      array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
-    $codeblock = "<pre><code>$codeblock</code></pre>";
-    return "\n\n".$this->hashBlock($codeblock)."\n\n";
-  }
-  function _doFencedCodeBlocks_newlines($matches) {
-    return str_repeat("<br$this->empty_element_suffix", 
-      strlen($matches[0]));
-  }
-
-
-  #
-  # Redefining emphasis markers so that emphasis by underscore does not
-  # work in the middle of a word.
-  #
-  var $em_relist = array(
-    ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
-    '*' => '(?<=\S)(?<!\*)\*(?!\*)',
-    '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
-    );
-  var $strong_relist = array(
-    ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
-    '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
-    '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
-    );
-  var $em_strong_relist = array(
-    ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
-    '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
-    '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
-    );
-
-
-  function formParagraphs($text) {
-  #
-  # Params:
-  #   $text - string to process with html <p> tags
-  #
-    # Strip leading and trailing lines:
-    $text = preg_replace('/\A\n+|\n+\z/', '', $text);
-    
-    $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
-
-    #
-    # Wrap <p> tags and unhashify HTML blocks
-    #
-    foreach ($grafs as $key => $value) {
-      $value = trim($this->runSpanGamut($value));
-      
-      # Check if this should be enclosed in a paragraph.
-      # Clean tag hashes & block tag hashes are left alone.
-      $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
-      
-      if ($is_p) {
-        $value = "<p>$value</p>";
-      }
-      $grafs[$key] = $value;
-    }
-    
-    # Join grafs in one text, then unhash HTML tags. 
-    $text = implode("\n\n", $grafs);
-    
-    # Finish by removing any tag hashes still present in $text.
-    $text = $this->unhash($text);
-    
-    return $text;
-  }
-  
-  
-  ### Footnotes
-  
-  function stripFootnotes($text) {
-  #
-  # Strips link definitions from text, stores the URLs and titles in
-  # hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: [^id]: url "optional title"
-    $text = preg_replace_callback('{
-      ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
-        [ ]*
-        \n?         # maybe *one* newline
-      (           # text = $2 (no blank lines allowed)
-        (?:         
-          .+        # actual text
-        |
-          \n        # newlines but 
-          (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
-          (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
-                  # by non-indented content
-        )*
-      )   
-      }xm',
-      array(&$this, '_stripFootnotes_callback'),
-      $text);
-    return $text;
-  }
-  function _stripFootnotes_callback($matches) {
-    $note_id = $this->fn_id_prefix . $matches[1];
-    $this->footnotes[$note_id] = $this->outdent($matches[2]);
-    return ''; # String that will replace the block
-  }
-
-
-  function doFootnotes($text) {
-  #
-  # Replace footnote references in $text [^id] with a special text-token 
-  # which will be replaced by the actual footnote marker in appendFootnotes.
-  #
-    if (!$this->in_anchor) {
-      $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
-    }
-    return $text;
-  }
-
-  
-  function appendFootnotes($text) {
-  #
-  # Append footnote list to text.
-  #
-    $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
-      array(&$this, '_appendFootnotes_callback'), $text);
-  
-    if (!empty($this->footnotes_ordered)) {
-      $text .= "\n\n";
-      $text .= "<div class=\"footnotes\">\n";
-      $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
-      $text .= "<ol>\n\n";
-      
-      $attr = " rev=\"footnote\"";
-      if ($this->fn_backlink_class != "") {
-        $class = $this->fn_backlink_class;
-        $class = $this->encodeAttribute($class);
-        $attr .= " class=\"$class\"";
-      }
-      if ($this->fn_backlink_title != "") {
-        $title = $this->fn_backlink_title;
-        $title = $this->encodeAttribute($title);
-        $attr .= " title=\"$title\"";
-      }
-      $num = 0;
-      
-      while (!empty($this->footnotes_ordered)) {
-        $footnote = reset($this->footnotes_ordered);
-        $note_id = key($this->footnotes_ordered);
-        unset($this->footnotes_ordered[$note_id]);
-        
-        $footnote .= "\n"; # Need to append newline before parsing.
-        $footnote = $this->runBlockGamut("$footnote\n");        
-        $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
-          array(&$this, '_appendFootnotes_callback'), $footnote);
-        
-        $attr = str_replace("%%", ++$num, $attr);
-        $note_id = $this->encodeAttribute($note_id);
-        
-        # Add backlink to last paragraph; create new paragraph if needed.
-        $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
-        if (preg_match('{</p>$}', $footnote)) {
-          $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
-        } else {
-          $footnote .= "\n\n<p>$backlink</p>";
-        }
-        
-        $text .= "<li id=\"fn:$note_id\">\n";
-        $text .= $footnote . "\n";
-        $text .= "</li>\n\n";
-      }
-      
-      $text .= "</ol>\n";
-      $text .= "</div>";
-    }
-    return $text;
-  }
-  function _appendFootnotes_callback($matches) {
-    $node_id = $this->fn_id_prefix . $matches[1];
-    
-    # Create footnote marker only if it has a corresponding footnote *and*
-    # the footnote hasn't been used by another marker.
-    if (isset($this->footnotes[$node_id])) {
-      # Transfert footnote content to the ordered list.
-      $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
-      unset($this->footnotes[$node_id]);
-      
-      $num = $this->footnote_counter++;
-      $attr = " rel=\"footnote\"";
-      if ($this->fn_link_class != "") {
-        $class = $this->fn_link_class;
-        $class = $this->encodeAttribute($class);
-        $attr .= " class=\"$class\"";
-      }
-      if ($this->fn_link_title != "") {
-        $title = $this->fn_link_title;
-        $title = $this->encodeAttribute($title);
-        $attr .= " title=\"$title\"";
-      }
-      
-      $attr = str_replace("%%", $num, $attr);
-      $node_id = $this->encodeAttribute($node_id);
-      
-      return
-        "<sup id=\"fnref:$node_id\">".
-        "<a href=\"#fn:$node_id\"$attr>$num</a>".
-        "</sup>";
-    }
-    
-    return "[^".$matches[1]."]";
-  }
-    
-  
-  ### Abbreviations ###
-  
-  function stripAbbreviations($text) {
-  #
-  # Strips abbreviations from text, stores titles in hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: [id]*: url "optional title"
-    $text = preg_replace_callback('{
-      ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
-      (.*)          # text = $2 (no blank lines allowed)  
-      }xm',
-      array(&$this, '_stripAbbreviations_callback'),
-      $text);
-    return $text;
-  }
-  function _stripAbbreviations_callback($matches) {
-    $abbr_word = $matches[1];
-    $abbr_desc = $matches[2];
-    if ($this->abbr_word_re)
-      $this->abbr_word_re .= '|';
-    $this->abbr_word_re .= preg_quote($abbr_word);
-    $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
-    return ''; # String that will replace the block
-  }
-  
-  
-  function doAbbreviations($text) {
-  #
-  # Find defined abbreviations in text and wrap them in <abbr> elements.
-  #
-    if ($this->abbr_word_re) {
-      // cannot use the /x modifier because abbr_word_re may 
-      // contain significant spaces:
-      $text = preg_replace_callback('{'.
-        '(?<![\w\x1A])'.
-        '(?:'.$this->abbr_word_re.')'.
-        '(?![\w\x1A])'.
-        '}', 
-        array(&$this, '_doAbbreviations_callback'), $text);
-    }
-    return $text;
-  }
-  function _doAbbreviations_callback($matches) {
-    $abbr = $matches[0];
-    if (isset($this->abbr_desciptions[$abbr])) {
-      $desc = $this->abbr_desciptions[$abbr];
-      if (empty($desc)) {
-        return $this->hashPart("<abbr>$abbr</abbr>");
-      } else {
-        $desc = $this->encodeAttribute($desc);
-        return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
-      }
-    } else {
-      return $matches[0];
-    }
-  }
-
-}
-
-
-/*
-
-PHP Markdown Extra
-==================
-
-Description
------------
-
-This is a PHP port of the original Markdown formatter written in Perl 
-by John Gruber. This special "Extra" version of PHP Markdown features 
-further enhancements to the syntax for making additional constructs 
-such as tables and definition list.
-
-Markdown is a text-to-HTML filter; it translates an easy-to-read /
-easy-to-write structured text format into HTML. Markdown's text format
-is most similar to that of plain text email, and supports features such
-as headers, *emphasis*, code blocks, blockquotes, and links.
-
-Markdown's syntax is designed not as a generic markup language, but
-specifically to serve as a front-end to (X)HTML. You can use span-level
-HTML tags anywhere in a Markdown document, and you can use block level
-HTML tags (like <div> and <table> as well).
-
-For more information about Markdown's syntax, see:
-
-<http://daringfireball.net/projects/markdown/>
-
-
-Bugs
-----
-
-To file bug reports please send email to:
-
-<michel.fortin@michelf.com>
-
-Please include with your report: (1) the example input; (2) the output you
-expected; (3) the output Markdown actually produced.
-
-
-Version History
---------------- 
-
-See the readme file for detailed release notes for this version.
-
-
-Copyright and License
----------------------
-
-PHP Markdown & Extra
-Copyright (c) 2004-2008 Michel Fortin  
-<http://www.michelf.com/>  
-All rights reserved.
-
-Based on Markdown  
-Copyright (c) 2003-2006 John Gruber   
-<http://daringfireball.net/>   
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright notice,
-  this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name "Markdown" nor the names of its contributors may
-  be used to endorse or promote products derived from this software
-  without specific prior written permission.
-
-This software is provided by the copyright holders and contributors "as
-is" and any express or implied warranties, including, but not limited
-to, the implied warranties of merchantability and fitness for a
-particular purpose are disclaimed. In no event shall the copyright owner
-or contributors be liable for any direct, indirect, incidental, special,
-exemplary, or consequential damages (including, but not limited to,
-procurement of substitute goods or services; loss of use, data, or
-profits; or business interruption) however caused and on any theory of
-liability, whether in contract, strict liability, or tort (including
-negligence or otherwise) arising in any way out of the use of this
-software, even if advised of the possibility of such damage.
-
-*/
-?>
\ No newline at end of file
diff --git a/ivfdec.c b/ivfdec.c
index e4c9981..3919d6b 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -17,10 +18,10 @@
 #include <string.h>
 #define VPX_CODEC_DISABLE_COMPAT 1
 #include "vpx_config.h"
-#include "vpx_decoder.h"
+#include "vpx/vpx_decoder.h"
 #include "vpx_ports/vpx_timer.h"
 #if CONFIG_VP8_DECODER
-#include "vp8dx.h"
+#include "vpx/vp8dx.h"
 #endif
 #if CONFIG_MD5
 #include "md5_utils.h"
@@ -62,8 +63,10 @@
                                      "Postprocess decoded frames");
 static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
                                     "Show timing summary");
-static const arg_def_t outputfile = ARG_DEF("o", "output-raw-file", 1,
+static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
                                     "Output raw yv12 file instead of images");
+static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
+                                    "Output file is YUV4MPEG2");
 static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
                                     "Max threads to use");
 static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
@@ -77,7 +80,7 @@
 {
     &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
     &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &threadsarg, &quietarg,
+    &usey4marg, &threadsarg, &quietarg,
 #if CONFIG_MD5
     &md5arg,
 #endif
@@ -232,9 +235,9 @@
     if (do_md5)
     {
 #if CONFIG_MD5
-        md5_ctx_t *md5_ctx = out = malloc(sizeof(md5_ctx_t));
+        MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
         (void)out_fn;
-        md5_init(md5_ctx);
+        MD5Init(md5_ctx);
 #endif
     }
     else
@@ -256,7 +259,7 @@
     if (do_md5)
     {
 #if CONFIG_MD5
-        md5_update(out, buf, len);
+        MD5Update(out, buf, len);
 #endif
     }
     else
@@ -273,7 +276,7 @@
         uint8_t md5[16];
         int i;
 
-        md5_finalize(out, md5);
+        MD5Final(md5, out);
         free(out);
 
         for (i = 0; i < 16; i++)
@@ -288,7 +291,12 @@
     }
 }
 
-unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc)
+unsigned int file_is_ivf(FILE *infile,
+                         unsigned int *fourcc,
+                         unsigned int *width,
+                         unsigned int *height,
+                         unsigned int *timebase_num,
+                         unsigned int *timebase_den)
 {
     char raw_hdr[32];
     int is_ivf = 0;
@@ -305,6 +313,10 @@
                         " decode properly.");
 
             *fourcc = mem_get_le32(raw_hdr + 8);
+            *width = mem_get_le16(raw_hdr + 12);
+            *height = mem_get_le16(raw_hdr + 14);
+            *timebase_den = mem_get_le32(raw_hdr + 16);
+            *timebase_num = mem_get_le32(raw_hdr + 20);
         }
     }
 
@@ -330,6 +342,11 @@
     struct arg               arg;
     char                   **argv, **argi, **argj;
     const char                   *fn2 = 0;
+    int                     use_y4m = 0;
+    unsigned int            width;
+    unsigned int            height;
+    unsigned int            timebase_num;
+    unsigned int            timebase_den;
     void                   *out = NULL;
     vpx_codec_dec_cfg_t     cfg = {0};
 #if CONFIG_VP8_DECODER
@@ -361,6 +378,8 @@
         }
         else if (arg_match(&arg, &outputfile, argi))
             fn2 = arg.val;
+        else if (arg_match(&arg, &usey4marg, argi))
+            use_y4m = 1;
         else if (arg_match(&arg, &prefixarg, argi))
             prefix = strdup(arg.val);
         else if (arg_match(&arg, &use_yv12, argi))
@@ -446,10 +465,31 @@
     if (fn2)
         out = out_open(fn2, do_md5);
 
-    is_ivf = file_is_ivf(infile, &fourcc);
+    is_ivf = file_is_ivf(infile, &fourcc, &width, &height,
+                         &timebase_num, &timebase_den);
 
     if (is_ivf)
     {
+        if (use_y4m)
+        {
+            char buffer[128];
+            if (!fn2)
+            {
+                fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
+                return EXIT_FAILURE;
+            }
+            /*Correct for the factor of 2 applied to the timebase in the
+               encoder.*/
+            if(timebase_den&1)timebase_num<<=1;
+            else timebase_den>>=1;
+            /*Note: We can't output an aspect ratio here because IVF doesn't
+               store one, and neither does VP8.
+              That will have to wait until these tools support WebM natively.*/
+            sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+                    "420jpeg", width, height, timebase_den, timebase_num, 'p');
+            out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
+        }
+
         /* Try to determine the codec from the fourcc. */
         for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
             if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
@@ -465,6 +505,11 @@
                 break;
             }
     }
+    else if(use_y4m)
+    {
+        fprintf(stderr, "YUV4MPEG2 output only supported from IVF input.\n");
+        return EXIT_FAILURE;
+    }
 
     if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
                            postproc ? VPX_CODEC_USE_POSTPROC : 0))
@@ -533,29 +578,31 @@
                             prefix, img->d_w, img->d_h, frame_in, sfx);
                     out = out_open(out_fn, do_md5);
                 }
+                else if(use_y4m)
+                    out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
 
-                buf = img->planes[PLANE_Y];
+                buf = img->planes[VPX_PLANE_Y];
 
                 for (y = 0; y < img->d_h; y++)
                 {
                     out_put(out, buf, img->d_w, do_md5);
-                    buf += img->stride[PLANE_Y];
+                    buf += img->stride[VPX_PLANE_Y];
                 }
 
-                buf = img->planes[flipuv?PLANE_V:PLANE_U];
+                buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
 
                 for (y = 0; y < (1 + img->d_h) / 2; y++)
                 {
                     out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[PLANE_U];
+                    buf += img->stride[VPX_PLANE_U];
                 }
 
-                buf = img->planes[flipuv?PLANE_U:PLANE_V];
+                buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
 
                 for (y = 0; y < (1 + img->d_h) / 2; y++)
                 {
                     out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[PLANE_V];
+                    buf += img->stride[VPX_PLANE_V];
                 }
 
                 if (!fn2)
diff --git a/ivfenc.c b/ivfenc.c
index bef3d58..ad17292 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -1,23 +1,28 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 /* This is a simple program that encodes YV12 files and generates ivf
  * files using the new interface.
  */
-#define USE_POSIX_MMAP HAVE_SYS_MMAN_H
+#if defined(_WIN32)
+#define USE_POSIX_MMAP 0
+#else
+#define USE_POSIX_MMAP 1
+#endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
-#include "vpx_encoder.h"
+#include "vpx/vpx_encoder.h"
 #if USE_POSIX_MMAP
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -25,11 +30,10 @@
 #include <fcntl.h>
 #include <unistd.h>
 #endif
-#if CONFIG_VP8_ENCODER
-#include "vp8cx.h"
-#endif
+#include "vpx/vp8cx.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
+#include "y4minput.h"
 
 static const char *exec_name;
 
@@ -51,8 +55,8 @@
 {
     va_list ap;
     va_start(ap, fmt);
-    vprintf(fmt, ap);
-    printf("\n");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
     usage_exit();
 }
 
@@ -62,10 +66,10 @@
     {
         const char *detail = vpx_codec_error_detail(ctx);
 
-        printf("%s: %s\n", s, vpx_codec_error(ctx));
+        fprintf(stderr, "%s: %s\n", s, vpx_codec_error(ctx));
 
         if (detail)
-            printf("    %s\n", detail);
+            fprintf(stderr, "    %s\n", detail);
 
         exit(EXIT_FAILURE);
     }
@@ -215,49 +219,79 @@
     return stats->buf;
 }
 
+enum video_file_type
+{
+    FILE_TYPE_RAW,
+    FILE_TYPE_IVF,
+    FILE_TYPE_Y4M
+};
+
+struct detect_buffer {
+    char buf[4];
+    int  valid;
+};
+
+
 #define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */
-static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf)
+static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
+                      y4m_input *y4m, struct detect_buffer *detect)
 {
     int plane = 0;
 
-    if (is_ivf)
+    if (file_type == FILE_TYPE_Y4M)
     {
-        char junk[IVF_FRAME_HDR_SZ];
-
-        /* Skip the frame header. We know how big the frame should be. See
-         * write_ivf_frame_header() for documentation on the frame header
-         * layout.
-         */
-        fread(junk, 1, IVF_FRAME_HDR_SZ, f);
+        if (y4m_input_fetch_frame(y4m, f, img) < 0)
+           return 0;
     }
-
-    for (plane = 0; plane < 3; plane++)
+    else
     {
-        unsigned char *ptr;
-        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
-        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
-        int r;
-
-        /* Determine the correct plane based on the image format. The for-loop
-         * always counts in Y,U,V order, but this may not match the order of
-         * the data on disk.
-         */
-        switch (plane)
+        if (file_type == FILE_TYPE_IVF)
         {
-        case 1:
-            ptr = img->planes[img->fmt==IMG_FMT_YV12? PLANE_V : PLANE_U];
-            break;
-        case 2:
-            ptr = img->planes[img->fmt==IMG_FMT_YV12?PLANE_U : PLANE_V];
-            break;
-        default:
-            ptr = img->planes[plane];
+            char junk[IVF_FRAME_HDR_SZ];
+
+            /* Skip the frame header. We know how big the frame should be. See
+             * write_ivf_frame_header() for documentation on the frame header
+             * layout.
+             */
+            fread(junk, 1, IVF_FRAME_HDR_SZ, f);
         }
 
-        for (r = 0; r < h; r++)
+        for (plane = 0; plane < 3; plane++)
         {
-            fread(ptr, 1, w, f);
-            ptr += img->stride[plane];
+            unsigned char *ptr;
+            int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+            int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+            int r;
+
+            /* Determine the correct plane based on the image format. The for-loop
+             * always counts in Y,U,V order, but this may not match the order of
+             * the data on disk.
+             */
+            switch (plane)
+            {
+            case 1:
+                ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
+                break;
+            case 2:
+                ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
+                break;
+            default:
+                ptr = img->planes[plane];
+            }
+
+            for (r = 0; r < h; r++)
+            {
+                if (detect->valid)
+                {
+                    memcpy(ptr, detect->buf, 4);
+                    fread(ptr+4, 1, w-4, f);
+                    detect->valid = 0;
+                }
+                else
+                    fread(ptr, 1, w, f);
+
+                ptr += img->stride[plane];
+            }
         }
     }
 
@@ -265,22 +299,36 @@
 }
 
 
+unsigned int file_is_y4m(FILE      *infile,
+                         y4m_input *y4m,
+                         char       detect[4])
+{
+    if(memcmp(detect, "YUV4", 4) == 0)
+    {
+        return 1;
+    }
+    return 0;
+}
+
 #define IVF_FILE_HDR_SZ (32)
 unsigned int file_is_ivf(FILE *infile,
                          unsigned int *fourcc,
                          unsigned int *width,
-                         unsigned int *height)
+                         unsigned int *height,
+                         char          detect[4])
 {
     char raw_hdr[IVF_FILE_HDR_SZ];
     int is_ivf = 0;
 
+    if(memcmp(detect, "DKIF", 4) != 0)
+        return 0;
+
     /* See write_ivf_file_header() for more documentation on the file header
      * layout.
      */
-    if (fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ)
+    if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile)
+        == IVF_FILE_HDR_SZ - 4)
     {
-        if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
-            && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
         {
             is_ivf = 1;
 
@@ -297,8 +345,6 @@
         *width = mem_get_le16(raw_hdr + 12);
         *height = mem_get_le16(raw_hdr + 14);
     }
-    else
-        rewind(infile);
 
     return is_ivf;
 }
@@ -459,9 +505,11 @@
                                      "Minimum keyframe interval (frames)");
 static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1,
                                      "Maximum keyframe interval (frames)");
+static const arg_def_t kf_disabled = ARG_DEF(NULL, "disable-kf", 0,
+                                     "Disable keyframe placement");
 static const arg_def_t *kf_args[] =
 {
-    &kf_min_dist, &kf_max_dist, NULL
+    &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
 };
 
 
@@ -512,28 +560,28 @@
 {
     int i;
 
-    printf("Usage: %s <options> src_filename dst_filename\n", exec_name);
+    fprintf(stderr, "Usage: %s <options> src_filename dst_filename\n", exec_name);
 
-    printf("\n_options:\n");
+    fprintf(stderr, "\n_options:\n");
     arg_show_usage(stdout, main_args);
-    printf("\n_encoder Global Options:\n");
+    fprintf(stderr, "\n_encoder Global Options:\n");
     arg_show_usage(stdout, global_args);
-    printf("\n_rate Control Options:\n");
+    fprintf(stderr, "\n_rate Control Options:\n");
     arg_show_usage(stdout, rc_args);
-    printf("\n_twopass Rate Control Options:\n");
+    fprintf(stderr, "\n_twopass Rate Control Options:\n");
     arg_show_usage(stdout, rc_twopass_args);
-    printf("\n_keyframe Placement Options:\n");
+    fprintf(stderr, "\n_keyframe Placement Options:\n");
     arg_show_usage(stdout, kf_args);
 #if CONFIG_VP8_ENCODER
-    printf("\n_vp8 Specific Options:\n");
+    fprintf(stderr, "\n_vp8 Specific Options:\n");
     arg_show_usage(stdout, vp8_args);
 #endif
-    printf("\n"
+    fprintf(stderr, "\n"
            "Included encoders:\n"
            "\n");
 
     for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
-        printf("    %-6s - %s\n",
+        fprintf(stderr, "    %-6s - %s\n",
                codecs[i].name,
                vpx_codec_iface_name(codecs[i].iface));
 
@@ -566,8 +614,10 @@
     static const int        *ctrl_args_map = NULL;
     int                      verbose = 0, show_psnr = 0;
     int                      arg_use_i420 = 1;
+    int                      arg_have_timebase = 0;
     unsigned long            cx_time = 0;
-    unsigned int             is_ivf, fourcc;
+    unsigned int             file_type, fourcc;
+    y4m_input                y4m;
 
     exec_name = argv_[0];
 
@@ -651,7 +701,7 @@
         /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
         if (one_pass_only > arg_passes)
         {
-            printf("Warning: Assuming --pass=%d implies --passes=%d\n",
+            fprintf(stderr, "Warning: Assuming --pass=%d implies --passes=%d\n",
                    one_pass_only, one_pass_only);
             arg_passes = one_pass_only;
         }
@@ -665,7 +715,8 @@
 
     if (res)
     {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
+        fprintf(stderr, "Failed to get config: %s\n",
+                vpx_codec_err_to_string(res));
         return EXIT_FAILURE;
     }
 
@@ -684,7 +735,10 @@
         else if (arg_match(&arg, &height, argi))
             cfg.g_h = arg_parse_uint(&arg);
         else if (arg_match(&arg, &timebase, argi))
+        {
             cfg.g_timebase = arg_parse_rational(&arg);
+            arg_have_timebase = 1;
+        }
         else if (arg_match(&arg, &error_resilient, argi))
             cfg.g_error_resilient = arg_parse_uint(&arg);
         else if (arg_match(&arg, &lag_in_frames, argi))
@@ -722,29 +776,34 @@
             cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &minsection_pct, argi))
         {
             cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &maxsection_pct, argi))
         {
             cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &kf_min_dist, argi))
             cfg.kf_min_dist = arg_parse_uint(&arg);
         else if (arg_match(&arg, &kf_max_dist, argi))
             cfg.kf_max_dist = arg_parse_uint(&arg);
+        else if (arg_match(&arg, &kf_disabled, argi))
+            cfg.kf_mode = VPX_KF_DISABLED;
         else
             argj++;
     }
@@ -787,7 +846,7 @@
 
     /* Check for unrecognized options */
     for (argi = argv; *argi; argi++)
-        if (argi[0][0] == '-')
+        if (argi[0][0] == '-' && argi[0][1])
             die("Error: Unrecognized option %s\n", *argi);
 
     /* Handle non-option arguments */
@@ -797,106 +856,136 @@
     if (!in_fn || !out_fn)
         usage_exit();
 
-    /* Parse certain options from the input file, if possible */
-    infile = fopen(in_fn, "rb");
-
-    if (!infile)
-    {
-        printf("Failed to open input file");
-        return EXIT_FAILURE;
-    }
-
-    is_ivf = file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h);
-
-    if (is_ivf)
-    {
-        switch (fourcc)
-        {
-        case 0x32315659:
-            arg_use_i420 = 0;
-            break;
-        case 0x30323449:
-            arg_use_i420 = 1;
-            break;
-        default:
-            printf("Unsupported fourcc (%08x) in IVF\n", fourcc);
-            return EXIT_FAILURE;
-        }
-    }
-
-    fclose(infile);
-
-
-#define SHOW(field) printf("    %-28s = %d\n", #field, cfg.field)
-
-    if (verbose)
-    {
-        printf("Codec: %s\n", vpx_codec_iface_name(codec->iface));
-        printf("Source file: %s Format: %s\n", in_fn, arg_use_i420 ? "I420" : "YV12");
-        printf("Destination file: %s\n", out_fn);
-        printf("Encoder parameters:\n");
-
-        SHOW(g_usage);
-        SHOW(g_threads);
-        SHOW(g_profile);
-        SHOW(g_w);
-        SHOW(g_h);
-        SHOW(g_timebase.num);
-        SHOW(g_timebase.den);
-        SHOW(g_error_resilient);
-        SHOW(g_pass);
-        SHOW(g_lag_in_frames);
-        SHOW(rc_dropframe_thresh);
-        SHOW(rc_resize_allowed);
-        SHOW(rc_resize_up_thresh);
-        SHOW(rc_resize_down_thresh);
-        SHOW(rc_end_usage);
-        SHOW(rc_target_bitrate);
-        SHOW(rc_min_quantizer);
-        SHOW(rc_max_quantizer);
-        SHOW(rc_undershoot_pct);
-        SHOW(rc_overshoot_pct);
-        SHOW(rc_buf_sz);
-        SHOW(rc_buf_initial_sz);
-        SHOW(rc_buf_optimal_sz);
-        SHOW(rc_2pass_vbr_bias_pct);
-        SHOW(rc_2pass_vbr_minsection_pct);
-        SHOW(rc_2pass_vbr_maxsection_pct);
-        SHOW(kf_mode);
-        SHOW(kf_min_dist);
-        SHOW(kf_max_dist);
-    }
-
-    vpx_img_alloc(&raw, arg_use_i420 ? IMG_FMT_I420 : IMG_FMT_YV12,
-                  cfg.g_w, cfg.g_h, 1);
-
-    // This was added so that ivfenc will create monotically increasing
-    // timestamps.  Since we create new timestamps for alt-reference frames
-    // we need to make room in the series of timestamps.  Since there can
-    // only be 1 alt-ref frame ( current bitstream) multiplying by 2
-    // gives us enough room.
-    cfg.g_timebase.den *= 2;
-
     memset(&stats, 0, sizeof(stats));
 
     for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++)
     {
         int frames_in = 0, frames_out = 0;
         unsigned long nbytes = 0;
+        struct detect_buffer detect;
 
-        infile = fopen(in_fn, "rb");
+        /* Parse certain options from the input file, if possible */
+        infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
 
         if (!infile)
         {
-            printf("Failed to open input file");
+            fprintf(stderr, "Failed to open input file\n");
             return EXIT_FAILURE;
         }
 
-        outfile = fopen(out_fn, "wb");
+        fread(detect.buf, 1, 4, infile);
+        detect.valid = 0;
+
+        if (file_is_y4m(infile, &y4m, detect.buf))
+        {
+            if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
+            {
+                file_type = FILE_TYPE_Y4M;
+                cfg.g_w = y4m.pic_w;
+                cfg.g_h = y4m.pic_h;
+                /* Use the frame rate from the file only if none was specified
+                 * on the command-line.
+                 */
+                if (!arg_have_timebase)
+                {
+                    cfg.g_timebase.num = y4m.fps_d;
+                    cfg.g_timebase.den = y4m.fps_n;
+                    /* And don't reset it in the second pass.*/
+                    arg_have_timebase = 1;
+                }
+                arg_use_i420 = 0;
+            }
+            else
+            {
+                fprintf(stderr, "Unsupported Y4M stream.\n");
+                return EXIT_FAILURE;
+            }
+        }
+        else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
+        {
+            file_type = FILE_TYPE_IVF;
+            switch (fourcc)
+            {
+            case 0x32315659:
+                arg_use_i420 = 0;
+                break;
+            case 0x30323449:
+                arg_use_i420 = 1;
+                break;
+            default:
+                fprintf(stderr, "Unsupported fourcc (%08x) in IVF\n", fourcc);
+                return EXIT_FAILURE;
+            }
+        }
+        else
+        {
+            file_type = FILE_TYPE_RAW;
+            detect.valid = 1;
+        }
+#define SHOW(field) fprintf(stderr, "    %-28s = %d\n", #field, cfg.field)
+
+        if (verbose && pass == 0)
+        {
+            fprintf(stderr, "Codec: %s\n", vpx_codec_iface_name(codec->iface));
+            fprintf(stderr, "Source file: %s Format: %s\n", in_fn,
+                    arg_use_i420 ? "I420" : "YV12");
+            fprintf(stderr, "Destination file: %s\n", out_fn);
+            fprintf(stderr, "Encoder parameters:\n");
+
+            SHOW(g_usage);
+            SHOW(g_threads);
+            SHOW(g_profile);
+            SHOW(g_w);
+            SHOW(g_h);
+            SHOW(g_timebase.num);
+            SHOW(g_timebase.den);
+            SHOW(g_error_resilient);
+            SHOW(g_pass);
+            SHOW(g_lag_in_frames);
+            SHOW(rc_dropframe_thresh);
+            SHOW(rc_resize_allowed);
+            SHOW(rc_resize_up_thresh);
+            SHOW(rc_resize_down_thresh);
+            SHOW(rc_end_usage);
+            SHOW(rc_target_bitrate);
+            SHOW(rc_min_quantizer);
+            SHOW(rc_max_quantizer);
+            SHOW(rc_undershoot_pct);
+            SHOW(rc_overshoot_pct);
+            SHOW(rc_buf_sz);
+            SHOW(rc_buf_initial_sz);
+            SHOW(rc_buf_optimal_sz);
+            SHOW(rc_2pass_vbr_bias_pct);
+            SHOW(rc_2pass_vbr_minsection_pct);
+            SHOW(rc_2pass_vbr_maxsection_pct);
+            SHOW(kf_mode);
+            SHOW(kf_min_dist);
+            SHOW(kf_max_dist);
+        }
+
+        if(pass == (one_pass_only ? one_pass_only - 1 : 0)) {
+            if (file_type == FILE_TYPE_Y4M)
+                /*The Y4M reader does its own allocation.
+                  Just initialize this here to avoid problems if we never read any
+                   frames.*/
+                memset(&raw, 0, sizeof(raw));
+            else
+                vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
+                              cfg.g_w, cfg.g_h, 1);
+
+            // This was added so that ivfenc will create monotically increasing
+            // timestamps.  Since we create new timestamps for alt-reference frames
+            // we need to make room in the series of timestamps.  Since there can
+            // only be 1 alt-ref frame ( current bitstream) multiplying by 2
+            // gives us enough room.
+            cfg.g_timebase.den *= 2;
+        }
+
+        outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
 
         if (!outfile)
         {
-            printf("Failed to open output file");
+            fprintf(stderr, "Failed to open output file\n");
             return EXIT_FAILURE;
         }
 
@@ -904,7 +993,7 @@
         {
             if (!stats_open_file(&stats, stats_fn, pass))
             {
-                printf("Failed to open statistics store\n");
+                fprintf(stderr, "Failed to open statistics store\n");
                 return EXIT_FAILURE;
             }
         }
@@ -912,7 +1001,7 @@
         {
             if (!stats_open_mem(&stats, pass))
             {
-                printf("Failed to open statistics store\n");
+                fprintf(stderr, "Failed to open statistics store\n");
                 return EXIT_FAILURE;
             }
         }
@@ -933,9 +1022,6 @@
 
 
         /* Construct Encoder Context */
-        if (cfg.kf_min_dist == cfg.kf_max_dist)
-            cfg.kf_mode = VPX_KF_FIXED;
-
         vpx_codec_enc_init(&encoder, codec->iface, &cfg,
                            show_psnr ? VPX_CODEC_USE_PSNR : 0);
         ctx_exit_on_error(&encoder, "Failed to initialize encoder");
@@ -947,8 +1033,8 @@
         for (i = 0; i < arg_ctrl_cnt; i++)
         {
             if (vpx_codec_control_(&encoder, arg_ctrls[i][0], arg_ctrls[i][1]))
-                printf("Error: Tried to set control %d = %d\n",
-                       arg_ctrls[i][0], arg_ctrls[i][1]);
+                fprintf(stderr, "Error: Tried to set control %d = %d\n",
+                        arg_ctrls[i][0], arg_ctrls[i][1]);
 
             ctx_exit_on_error(&encoder, "Failed to control codec");
         }
@@ -964,13 +1050,15 @@
 
             if (!arg_limit || frames_in < arg_limit)
             {
-                frame_avail = read_frame(infile, &raw, is_ivf);
+                frame_avail = read_frame(infile, &raw, file_type, &y4m,
+                                         &detect);
 
                 if (frame_avail)
                     frames_in++;
 
-                printf("\rPass %d/%d frame %4d/%-4d %7ldB \033[K", pass + 1,
-                       arg_passes, frames_in, frames_out, nbytes);
+                fprintf(stderr,
+                        "\rPass %d/%d frame %4d/%-4d %7ldB \033[K", pass + 1,
+                        arg_passes, frames_in, frames_out, nbytes);
             }
             else
                 frame_avail = 0;
@@ -989,24 +1077,25 @@
             while ((pkt = vpx_codec_get_cx_data(&encoder, &iter)))
             {
                 got_data = 1;
-                nbytes += pkt->data.raw.sz;
 
                 switch (pkt->kind)
                 {
                 case VPX_CODEC_CX_FRAME_PKT:
                     frames_out++;
-                    printf(" %6luF",
-                           (unsigned long)pkt->data.frame.sz);
+                    fprintf(stderr, " %6luF",
+                            (unsigned long)pkt->data.frame.sz);
                     write_ivf_frame_header(outfile, pkt);
                     fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                    nbytes += pkt->data.raw.sz;
                     break;
                 case VPX_CODEC_STATS_PKT:
                     frames_out++;
-                    printf(" %6luS",
+                    fprintf(stderr, " %6luS",
                            (unsigned long)pkt->data.twopass_stats.sz);
                     stats_write(&stats,
                                 pkt->data.twopass_stats.buf,
                                 pkt->data.twopass_stats.sz);
+                    nbytes += pkt->data.raw.sz;
                     break;
                 case VPX_CODEC_PSNR_PKT:
 
@@ -1015,7 +1104,7 @@
                         int i;
 
                         for (i = 0; i < 4; i++)
-                            printf("%.3lf ", pkt->data.psnr.psnr[i]);
+                            fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
                     }
 
                     break;
@@ -1030,10 +1119,11 @@
         /* this bitrate calc is simplified and relies on the fact that this
          * application uses 1/timebase for framerate.
          */
-        printf("\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
+        fprintf(stderr,
+               "\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
                " %7lu %s (%.2f fps)\033[K", pass + 1,
                arg_passes, frames_in, frames_out, nbytes, nbytes * 8 / frames_in,
-               nbytes * 8 *(int64_t)cfg.g_timebase.den / cfg.g_timebase.num / frames_in,
+               nbytes * 8 *(int64_t)cfg.g_timebase.den/2/ cfg.g_timebase.num / frames_in,
                cx_time > 9999999 ? cx_time / 1000 : cx_time,
                cx_time > 9999999 ? "ms" : "us",
                (float)frames_in * 1000000.0 / (float)cx_time);
@@ -1047,7 +1137,7 @@
 
         fclose(outfile);
         stats_close(&stats);
-        printf("\n");
+        fprintf(stderr, "\n");
 
         if (one_pass_only)
             break;
diff --git a/libs.doxy_template b/libs.doxy_template
index eb37dfc..02e2902 100644
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/libs.mk b/libs.mk
index f741dba..45cf9bf 100644
--- a/libs.mk
+++ b/libs.mk
@@ -1,17 +1,20 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
 
-include $(SRC_PATH_BARE)/vpx_codec/vpx_codec.mk
-CODEC_SRCS-yes += $(addprefix vpx_codec/,$(call enabled,API_SRCS))
+CODEC_SRCS-yes += libs.mk
+
+include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
+CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 
 include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk
 CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
@@ -19,18 +22,16 @@
 include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk
 CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
 
-# Add vpx_codec/ to the include path to allow vp_n[cd]x.h to reference
-# vpx_codec_impl_*.h without extra ifdeffery
-CFLAGS += -I$(SRC_PATH_BARE)/vpx_codec
 
 ifeq ($(CONFIG_VP8_ENCODER),yes)
   VP8_PREFIX=vp8/
   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
-  CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk
-  INSTALL_MAPS += include/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
-  CODEC_DOC_SRCS += vp8/vp8.h vp8/vp8cx.h
+  CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h vpx/vp8e.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8e.h include/vpx/vp8cx.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
   CODEC_DOC_SECTIONS += vp8 vp8_encoder
 endif
 
@@ -39,9 +40,10 @@
   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
   CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
   CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
-  CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk
-  INSTALL_MAPS += include/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
-  CODEC_DOC_SRCS += vp8/vp8.h vp8/vp8dx.h
+  CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
   CODEC_DOC_SECTIONS += vp8 vp8_decoder
 endif
 
@@ -59,18 +61,17 @@
 # This variable uses deferred expansion intentionally, since the results of
 # $(wildcard) may change during the course of the Make.
 VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
-CODEC_SRCS-yes += $(SRC_PATH_BARE)/libs.mk # to show up in the msvs workspace
 endif
 
 # The following pairs define a mapping of locations in the distribution
 # tree to locations in the source/build trees.
-INSTALL_MAPS += include/% $(SRC_PATH_BARE)/vpx_codec/%
-INSTALL_MAPS += include/% $(SRC_PATH_BARE)/vpx_ports/%
-INSTALL_MAPS += lib/%     %
+INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/vpx/%
+INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/vpx_ports/%
+INSTALL_MAPS += $(LIBSUBDIR)/%     %
 INSTALL_MAPS += src/%     $(SRC_PATH_BARE)/%
 ifeq ($(CONFIG_MSVS),yes)
-INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),lib/$(p)/%  $(p)/Release/%)
-INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),lib/$(p)/%  $(p)/Debug/%)
+INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
+INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif
 
 # If this is a universal (fat) binary, then all the subarchitectures have
@@ -81,7 +82,7 @@
 $(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
 
 CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_integer.h
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx/vpx_integer.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_timer.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem.h
 CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
@@ -92,26 +93,28 @@
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
 endif
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx_codec/exports
+CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
+CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
 
-INSTALL-LIBS-yes += include/vpx_codec.h
-INSTALL-LIBS-yes += include/vpx_image.h
-INSTALL-LIBS-yes += include/vpx_integer.h
-INSTALL-LIBS-yes += include/vpx_codec_impl_top.h
-INSTALL-LIBS-yes += include/vpx_codec_impl_bottom.h
-INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx_decoder.h
-INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx_decoder_compat.h
-INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx_encoder.h
+INSTALL-LIBS-yes += include/vpx/vpx_codec.h
+INSTALL-LIBS-yes += include/vpx/vpx_image.h
+INSTALL-LIBS-yes += include/vpx/vpx_integer.h
+INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_top.h
+INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_bottom.h
+INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder.h
+INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder_compat.h
+INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_encoder.h
 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)
-INSTALL-LIBS-yes                  += $(foreach p,$(VS_PLATFORMS),lib/$(p)/$(CODEC_LIB).lib)
-INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(foreach p,$(VS_PLATFORMS),lib/$(p)/$(CODEC_LIB)d.lib)
-INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),lib/$(p)/vpx.dll)
-INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),lib/$(p)/vpx.exp)
+INSTALL-LIBS-yes                  += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB).lib)
+INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB)d.lib)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.dll)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.exp)
 endif
 else
-INSTALL-LIBS-yes += lib/libvpx.a
-INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += lib/libvpx_g.a
+INSTALL-LIBS-yes += $(LIBSUBDIR)/libvpx.a
+INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a
 endif
 
 CODEC_SRCS=$(call enabled,CODEC_SRCS)
@@ -130,7 +133,6 @@
 endif
 obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
 	@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/obj_int_extract.bat .
-	@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/armasm$(ARM_ARCH).rules .
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
 			--exe\
@@ -143,7 +145,6 @@
 
 PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj
 PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
-PROJECTS-$(BUILD_LIBVPX) += armasm$(ARM_ARCH).rules
 endif
 
 vpx.def: $(call enabled,CODEC_EXPORTS)
@@ -175,6 +176,31 @@
 OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
 LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
+
+BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
+LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
+LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)
+$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
+$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm -pthread
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver
+LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
+                             libvpx.so libvpx.so.$(VERSION_MAJOR) \
+                             libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
+
+libvpx.ver: $(call enabled,CODEC_EXPORTS)
+	@echo "    [CREATE] $@"
+	$(qexec)echo "{ global:" > $@
+	$(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done
+	$(qexec)echo "local: *; };" >> $@
+CLEAN-OBJS += libvpx.ver
+
+$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)):
+	@echo "    [LN]      $@"
+	$(qexec)ln -sf $(LIBVPX_SO) $@
+
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
 endif
 
 LIBS-$(LIPO_LIBVPX) += libvpx.a
@@ -209,10 +235,10 @@
 $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
 CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
 
-CODEC_DOC_SRCS += vpx_codec/vpx_codec.h \
-                  vpx_codec/vpx_decoder.h \
-                  vpx_codec/vpx_encoder.h \
-                  vpx_codec/vpx_image.h
+CODEC_DOC_SRCS += vpx/vpx_codec.h \
+                  vpx/vpx_decoder.h \
+                  vpx/vpx_encoder.h \
+                  vpx/vpx_image.h
 
 CLEAN-OBJS += libs.doxy
 DOCS-yes += libs.doxy
diff --git a/mainpage.dox b/mainpage.dox
index 3596ce0..49dff7b 100644
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -11,7 +11,7 @@
   source codec deployed on millions of computers and devices worldwide.
 
   This distribution of the WebM VP8 Codec SDK includes the following support:
-  
+
   \if vp8_encoder    - \ref vp8_encoder   \endif
   \if vp8_decoder    - \ref vp8_decoder   \endif
 
diff --git a/md5_utils.c b/md5_utils.c
index 16c6f7e..455d9cd 100644
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -1,298 +1,253 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h' header
+ * definitions
+ *  - Ian Jackson <ian@chiark.greenend.org.uk>.
+ * Still in the public domain.
  */
 
+#include <sys/types.h>    /* for stupid systems */
 
-/*
-Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-rights reserved.
-
-License to copy and use this software is granted provided that it
-is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-Algorithm" in all material mentioning or referencing this software
-or this function.
-
-License is also granted to make and use derivative works provided
-that such works are identified as "derived from the RSA Data
-Security, Inc. MD5 Message-Digest Algorithm" in all material
-mentioning or referencing the derived work.
-
-RSA Data Security, Inc. makes no representations concerning either
-the merchantability of this software or the suitability of this
-software for any particular purpose. It is provided "as is"
-without express or implied warranty of any kind.
-
-These notices must be retained in any copies of any part of this
-documentation and/or software.
-*/
+#include <string.h>   /* for memcpy() */
 
 #include "md5_utils.h"
-#include <string.h>
 
-/* Constants for md5_transform routine.
- */
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-
-static void md5_transform(uint32_t state[4], const uint8_t block[64]);
-static void Encode(uint8_t *output, const uint32_t *input, unsigned int len);
-static void Decode(uint32_t *output, const uint8_t *input, unsigned int len);
-#define md5_memset memset
-#define md5_memcpy memcpy
-
-static unsigned char PADDING[64] =
+void
+byteSwap(UWORD32 *buf, unsigned words)
 {
-    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
+    md5byte *p;
 
-/* F, G, H and I are basic MD5 functions.
- */
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
+    /* Only swap bytes for big endian machines */
+    int i = 1;
 
-/* ROTATE_LEFT rotates x left n bits.
- */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+    if (*(char *)&i == 1)
+        return;
 
-/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
-Rotation is separate from addition to prevent recomputation.
- */
-#define FF(a, b, c, d, x, s, ac) { \
-        (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define GG(a, b, c, d, x, s, ac) { \
-        (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define HH(a, b, c, d, x, s, ac) { \
-        (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define II(a, b, c, d, x, s, ac) { \
-        (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
+    p = (md5byte *)buf;
 
-/* MD5 initialization. Begins an MD5 operation, writing a new context.
- */
-void md5_init(md5_ctx_t *context)
-{
-    context->count[0] = context->count[1] = 0;
-    /* Load magic initialization constants.
-    */
-    context->state[0] = 0x67452301;
-    context->state[1] = 0xefcdab89;
-    context->state[2] = 0x98badcfe;
-    context->state[3] = 0x10325476;
-}
-
-/* MD5 block update operation. Continues an MD5 message-digest
-  operation, processing another message block, and updating the
-  context.
- */
-void md5_update(md5_ctx_t *context, const uint8_t *input, unsigned int input_len)
-{
-    unsigned int i, index, part_len;
-
-    /* Compute number of bytes mod 64 */
-    index = (unsigned int)((context->count[0] >> 3) & 0x3F);
-
-    /* Update number of bits */
-    if ((context->count[0] += ((uint32_t)input_len << 3))
-        < ((uint32_t)input_len << 3))
-        context->count[1]++;
-
-    context->count[1] += ((uint32_t)input_len >> 29);
-
-    part_len = 64 - index;
-
-    /* Transform as many times as possible. */
-    if (input_len >= part_len)
+    do
     {
-        memcpy(&context->buffer[index], input, part_len);
-        md5_transform(context->state, context->buffer);
-
-        for (i = part_len; i + 63 < input_len; i += 64)
-            md5_transform(context->state, &input[i]);
-
-        index = 0;
+        *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 |
+                 ((unsigned)p[1] << 8 | p[0]);
+        p += 4;
     }
-    else
-        i = 0;
-
-    /* Buffer remaining input */
-    memcpy(&context->buffer[index], &input[i], input_len - i);
+    while (--words);
 }
 
-/* MD5 finalization. Ends an MD5 message-digest operation, writing the
-  the message digest and zeroizing the context.
+/*
+ * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
+ * initialization constants.
  */
-void md5_finalize(md5_ctx_t *context, uint8_t digest[16])
+void
+MD5Init(struct MD5Context *ctx)
 {
-    unsigned char bits[8];
-    unsigned int index, pad_len;
+    ctx->buf[0] = 0x67452301;
+    ctx->buf[1] = 0xefcdab89;
+    ctx->buf[2] = 0x98badcfe;
+    ctx->buf[3] = 0x10325476;
 
-    /* Save number of bits */
-    Encode(bits, context->count, 8);
-
-    /* Pad out to 56 mod 64.
-    */
-    index = (unsigned int)((context->count[0] >> 3) & 0x3f);
-    pad_len = (index < 56) ? (56 - index) : (120 - index);
-    md5_update(context, PADDING, pad_len);
-
-    /* Append length (before padding) */
-    md5_update(context, bits, 8);
-    /* Store state in digest */
-    Encode(digest, context->state, 16);
-
-    /* Zeroize sensitive information.
-    */
-    memset(context, 0, sizeof(*context));
+    ctx->bytes[0] = 0;
+    ctx->bytes[1] = 0;
 }
 
-/* MD5 basic transformation. Transforms state based on block.
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
  */
-static void md5_transform(uint32_t state[4], const uint8_t block[64])
+void
+MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len)
 {
-    uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+    UWORD32 t;
 
-    Decode(x, block, 64);
+    /* Update byte count */
 
-    /* Round 1 */
-    FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
-    FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
-    FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
-    FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
-    FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
-    FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
-    FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
-    FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
-    FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
-    FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
-    FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
-    FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
-    FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
-    FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
-    FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
-    FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
+    t = ctx->bytes[0];
 
-    /* Round 2 */
-    GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
-    GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
-    GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
-    GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
-    GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
-    GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
-    GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
-    GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
-    GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
-    GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
-    GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
-    GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
-    GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
-    GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
-    GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
-    GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
+    if ((ctx->bytes[0] = t + len) < t)
+        ctx->bytes[1]++;  /* Carry from low to high */
 
-    /* Round 3 */
-    HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
-    HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
-    HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
-    HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
-    HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
-    HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
-    HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
-    HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
-    HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
-    HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
-    HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
-    HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
-    HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
-    HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
-    HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
-    HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
+    t = 64 - (t & 0x3f);  /* Space available in ctx->in (at least 1) */
 
-    /* Round 4 */
-    II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
-    II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
-    II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
-    II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
-    II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
-    II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
-    II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
-    II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
-    II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
-    II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
-    II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
-    II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
-    II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
-    II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
-    II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
-    II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
-
-    state[0] += a;
-    state[1] += b;
-    state[2] += c;
-    state[3] += d;
-
-    /* Zeroize sensitive information.
-    */
-    memset(x, 0, sizeof(x));
-}
-
-/* Encodes input (uint32_t) into output (unsigned char). Assumes len is
-  a multiple of 4.
- */
-static void Encode(uint8_t *output, const uint32_t *input, unsigned int len)
-{
-    unsigned int i, j;
-
-    for (i = 0, j = 0; j < len; i++, j += 4)
+    if (t > len)
     {
-        output[j] = (unsigned char)(input[i] & 0xff);
-        output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
-        output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
-        output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+        memcpy((md5byte *)ctx->in + 64 - t, buf, len);
+        return;
     }
+
+    /* First chunk is an odd size */
+    memcpy((md5byte *)ctx->in + 64 - t, buf, t);
+    byteSwap(ctx->in, 16);
+    MD5Transform(ctx->buf, ctx->in);
+    buf += t;
+    len -= t;
+
+    /* Process data in 64-byte chunks */
+    while (len >= 64)
+    {
+        memcpy(ctx->in, buf, 64);
+        byteSwap(ctx->in, 16);
+        MD5Transform(ctx->buf, ctx->in);
+        buf += 64;
+        len -= 64;
+    }
+
+    /* Handle any remaining bytes of data. */
+    memcpy(ctx->in, buf, len);
 }
 
-/* Decodes input (unsigned char) into output (uint32_t). Assumes len is
-  a multiple of 4.
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
  */
-static void Decode(uint32_t *output, const uint8_t *input, unsigned int len)
+void
+MD5Final(md5byte digest[16], struct MD5Context *ctx)
 {
-    unsigned int i, j;
+    int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
+    md5byte *p = (md5byte *)ctx->in + count;
 
-    for (i = 0, j = 0; j < len; i++, j += 4)
-        output[i] = ((uint32_t)input[j]) | (((uint32_t)input[j+1]) << 8) |
-                    (((uint32_t)input[j+2]) << 16) | (((uint32_t)input[j+3]) << 24);
+    /* Set the first char of padding to 0x80.  There is always room. */
+    *p++ = 0x80;
+
+    /* Bytes of padding needed to make 56 bytes (-8..55) */
+    count = 56 - 1 - count;
+
+    if (count < 0)    /* Padding forces an extra block */
+    {
+        memset(p, 0, count + 8);
+        byteSwap(ctx->in, 16);
+        MD5Transform(ctx->buf, ctx->in);
+        p = (md5byte *)ctx->in;
+        count = 56;
+    }
+
+    memset(p, 0, count);
+    byteSwap(ctx->in, 14);
+
+    /* Append length in bits and transform */
+    ctx->in[14] = ctx->bytes[0] << 3;
+    ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
+    MD5Transform(ctx->buf, ctx->in);
+
+    byteSwap(ctx->buf, 4);
+    memcpy(digest, ctx->buf, 16);
+    memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
 }
+
+#ifndef ASM_MD5
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f,w,x,y,z,in,s) \
+    (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data.  MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void
+MD5Transform(UWORD32 buf[4], UWORD32 const in[16])
+{
+    register UWORD32 a, b, c, d;
+
+    a = buf[0];
+    b = buf[1];
+    c = buf[2];
+    d = buf[3];
+
+    MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+    MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+    MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+    MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+    MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+    MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+    MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+    MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+    MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+    MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+    MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+    MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+    MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+    MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+    MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+    MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+    MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+    MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+    MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+    MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+    MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+    MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+    MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+    MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+    buf[0] += a;
+    buf[1] += b;
+    buf[2] += c;
+    buf[3] += d;
+}
+
+#endif
diff --git a/md5_utils.h b/md5_utils.h
index 8dda8d1..5ca1b5f 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -1,51 +1,42 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * This is the header file for the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h'
+ * header definitions
+ *  - Ian Jackson <ian@chiark.greenend.org.uk>.
+ * Still in the public domain.
  */
 
-/*
-Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-rights reserved.
+#ifndef MD5_H
+#define MD5_H
 
-License to copy and use this software is granted provided that it
-is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-Algorithm" in all material mentioning or referencing this software
-or this function.
+#define md5byte unsigned char
+#define UWORD32 unsigned int
 
-License is also granted to make and use derivative works provided
-that such works are identified as "derived from the RSA Data
-Security, Inc. MD5 Message-Digest Algorithm" in all material
-mentioning or referencing the derived work.
-
-RSA Data Security, Inc. makes no representations concerning either
-the merchantability of this software or the suitability of this
-software for any particular purpose. It is provided "as is"
-without express or implied warranty of any kind.
-
-These notices must be retained in any copies of any part of this
-documentation and/or software.
-*/
-#ifndef HAVE_VPX_PORTS
-#define HAVE_VPX_PORTS 0
-#endif
-#if HAVE_VPX_PORTS
-#include "vpx_ports/vpx_integer.h"
-#else
-#include "vpx_integer.h"
-#endif
-
-/* MD5 context. */
-typedef struct
+typedef struct MD5Context MD5Context;
+struct MD5Context
 {
-    uint32_t state[4];        /* state (ABCD) */
-    uint32_t count[2];        /* number of bits, modulo 2^64 (lsb first) */
-    uint8_t  buffer[64];      /* input buffer */
-} md5_ctx_t;
+    UWORD32 buf[4];
+    UWORD32 bytes[2];
+    UWORD32 in[16];
+};
 
-void md5_init(md5_ctx_t *ctx);
-void md5_update(md5_ctx_t *ctx, const uint8_t *buf, unsigned int len);
-void md5_finalize(md5_ctx_t *ctx, uint8_t md5[16]);
+void MD5Init(struct MD5Context *context);
+void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
+
+#endif /* !MD5_H */
diff --git a/release.sh b/release.sh
index 3b77dad..800bdf8 100755
--- a/release.sh
+++ b/release.sh
@@ -1,11 +1,12 @@
-#!/bin/bash
+#!/bin/sh
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
@@ -20,7 +21,7 @@
     esac
 done
 
-TAB=$'\t'
+TAB="$(printf '\t')"
 cat > release.mk << EOF
 %\$(BUILD_SFX).tar.bz2: %/.done
 ${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball"
@@ -185,7 +186,7 @@
     esac
     opts="$opts --enable-postproc"
 
-    [ "x${clean}" == "xyes" ] \
+    [ "x${clean}" = "xyes" ] \
         && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \
         && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2
 
diff --git a/solution.mk b/solution.mk
index 783c6f8..8e852ec 100644
--- a/solution.mk
+++ b/solution.mk
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/tools/gen_authors.sh b/tools/gen_authors.sh
new file mode 100755
index 0000000..e1246f0
--- /dev/null
+++ b/tools/gen_authors.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Add organization names manually.
+
+cat <<EOF
+# This file is automatically generated from the git commit history
+# by tools/gen_authors.sh.
+
+$(git log --pretty=format:"%aN <%aE>" | sort | uniq)
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
+EOF
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index ac110f7..408c253 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -1,13 +1,15 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
+#include "vpx_ports/config.h"
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxc_int.h"
@@ -16,52 +18,42 @@
 #include "systemdependent.h"
 #include "vpxerrors.h"
 
-#ifdef HAVE_CONFIG_H
-#include "vpx_config.h"
-#endif
 
 extern  void vp8_init_scan_order_mask();
 
 void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
 {
     int i;
-    vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1);
+    vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
 
     for (i = 0; i < rows; i++)
     {
         vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
     }
 }
+
 void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
 {
+    int i;
+
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+        vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
+
     vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->new_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->last_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame);
     vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
 
-    vpx_free(oci->above_context[Y1CONTEXT]);
-    vpx_free(oci->above_context[UCONTEXT]);
-    vpx_free(oci->above_context[VCONTEXT]);
-    vpx_free(oci->above_context[Y2CONTEXT]);
+    vpx_free(oci->above_context);
     vpx_free(oci->mip);
 
-    oci->above_context[Y1CONTEXT] = 0;
-    oci->above_context[UCONTEXT]  = 0;
-    oci->above_context[VCONTEXT]  = 0;
-    oci->above_context[Y2CONTEXT] = 0;
+    oci->above_context = 0;
     oci->mip = 0;
 
-    // Structure used to minitor GF useage
-    if (oci->gf_active_flags != 0)
-        vpx_free(oci->gf_active_flags);
-
-    oci->gf_active_flags = 0;
 }
 
 int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
 {
+    int i;
+
     vp8_de_alloc_frame_buffers(oci);
 
     // our internal buffers are always multiples of 16
@@ -72,37 +64,33 @@
         height += 16 - (height & 0xf);
 
 
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+    {
+      oci->fb_idx_ref_cnt[0] = 0;
+
+      if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i],  width, height, VP8BORDERINPIXELS) < 0)
+        {
+            vp8_de_alloc_frame_buffers(oci);
+            return ALLOC_FAILURE;
+        }
+    }
+
+    oci->new_fb_idx = 0;
+    oci->lst_fb_idx = 1;
+    oci->gld_fb_idx = 2;
+    oci->alt_fb_idx = 3;
+
+    oci->fb_idx_ref_cnt[0] = 1;
+    oci->fb_idx_ref_cnt[1] = 1;
+    oci->fb_idx_ref_cnt[2] = 1;
+    oci->fb_idx_ref_cnt[3] = 1;
+
     if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
     {
         vp8_de_alloc_frame_buffers(oci);
         return ALLOC_FAILURE;
     }
 
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->new_frame,   width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->last_frame,  width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
     if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
     {
         vp8_de_alloc_frame_buffers(oci);
@@ -124,33 +112,9 @@
     oci->mi = oci->mip + oci->mode_info_stride + 1;
 
 
-    oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
+    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
 
-    if (!oci->above_context[Y1CONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[UCONTEXT]  = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
-    if (!oci->above_context[UCONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[VCONTEXT]  = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
-    if (!oci->above_context[VCONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols     , 1);
-
-    if (!oci->above_context[Y2CONTEXT])
+    if (!oci->above_context)
     {
         vp8_de_alloc_frame_buffers(oci);
         return ALLOC_FAILURE;
@@ -158,20 +122,6 @@
 
     vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
 
-    // Structures used to minitor GF usage
-    if (oci->gf_active_flags != 0)
-        vpx_free(oci->gf_active_flags);
-
-    oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1);
-
-    if (!oci->gf_active_flags)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->gf_active_count = oci->mb_rows * oci->mb_cols;
-
     return 0;
 }
 void vp8_setup_version(VP8_COMMON *cm)
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index 73c7383..ea93c25 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index 4428cf8..09d7338 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm
index 00e9739..fca91a0 100644
--- a/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/vp8/common/arm/armv6/copymem16x16_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm
index 94473ca..d8362ef 100644
--- a/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x4_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm
index 7cfa533..c6a60c6 100644
--- a/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x8_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
new file mode 100644
index 0000000..e0660e9
--- /dev/null
+++ b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@@ -0,0 +1,67 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT  |vp8_dc_only_idct_add_v6|
+
+    AREA    |.text|, CODE, READONLY
+
+;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
+;                             unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dest_ptr
+; r3  pitch
+; sp  stride
+
+|vp8_dc_only_idct_add_v6| PROC
+    stmdb       sp!, {r4 - r7, lr}
+
+    add         r0, r0, #4                ; input_dc += 4
+    ldr         r12, c0x0000FFFF
+    ldr         r4, [r1], r3
+    ldr         r6, [r1], r3
+    and         r0, r12, r0, asr #3       ; input_dc >> 3 + mask
+    ldr         lr, [sp, #20]
+    orr         r0, r0, r0, lsl #16       ; a1 | a1
+
+    uxtab16     r5, r0, r4                ; a1+2 | a1+0
+    uxtab16     r4, r0, r4, ror #8        ; a1+3 | a1+1
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    ldr         r4, [r1], r3
+    ldr         r6, [r1]
+    str         r5, [r2], lr
+    str         r7, [r2], lr
+
+    uxtab16     r5, r0, r4
+    uxtab16     r4, r0, r4, ror #8
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    str         r5, [r2], lr
+    str         r7, [r2]
+
+    ldmia       sp!, {r4 - r7, pc}
+
+    ENDP  ; |vp8_dc_only_idct_add_v6|
+
+; Constant Pool
+c0x0000FFFF DCD 0x0000FFFF
+    END
diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index a7863fc..8bc6d77 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index 25c5165..27215af 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -14,8 +15,6 @@
     EXPORT  |vp8_short_idct4x4llm_v6_scott|
     EXPORT  |vp8_short_idct4x4llm_v6_dual|
 
-    EXPORT  |vp8_dc_only_idct_armv6|
-
     AREA    |.text|, CODE, READONLY
 
 ;********************************************************************************
@@ -343,34 +342,4 @@
     ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
     ENDP
 
-
-; sjl added 10/17/08
-;void dc_only_idct_armv6(short input_dc, short *output, int pitch)
-|vp8_dc_only_idct_armv6| PROC
-    stmdb       sp!, {r4 - r6, lr}
-
-    add         r0, r0, #0x4
-    add         r4, r1, r2                      ; output + shortpitch
-    mov         r0, r0, ASR #0x3    ;aka a1
-    add         r5, r1, r2, LSL #1              ; output + shortpitch * 2
-    pkhbt       r0, r0, r0, lsl #16             ; a1 | a1
-    add         r6, r5, r2                      ; output + shortpitch * 3
-
-    str         r0, [r1, #0]
-    str         r0, [r1, #4]
-
-    str         r0, [r4, #0]
-    str         r0, [r4, #4]
-
-    str         r0, [r5, #0]
-    str         r0, [r5, #4]
-
-    str         r0, [r6, #0]
-    str         r0, [r6, #4]
-
-
-    ldmia       sp!, {r4 - r6, pc}
-
-    ENDP  ; |vp8_dc_only_idct_armv6|
-
     END
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index 8747568..463bff0 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -1,14 +1,15 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT |vp8_short_inv_walsh4x4_armv6|
-    EXPORT |vp8_short_inv_walsh4x4_1_armv6|
+    EXPORT |vp8_short_inv_walsh4x4_v6|
+    EXPORT |vp8_short_inv_walsh4x4_1_v6|
 
     ARM
     REQUIRE8
@@ -16,8 +17,8 @@
 
     AREA    |.text|, CODE, READONLY  ; name this block of code
 
-;short vp8_short_inv_walsh4x4_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_armv6| PROC
+;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_v6| PROC
 
     stmdb       sp!, {r4 - r11, lr}
 
@@ -122,11 +123,11 @@
     str         r5, [r1]
 
     ldmia       sp!, {r4 - r11, pc}
-    ENDP        ; |vp8_short_inv_walsh4x4_armv6|
+    ENDP        ; |vp8_short_inv_walsh4x4_v6|
 
 
-;short vp8_short_inv_walsh4x4_1_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_1_armv6| PROC
+;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_1_v6| PROC
 
     ldrsh       r2, [r0]             ; [0]
     add         r2, r2, #3           ; [0] + 3
@@ -144,7 +145,7 @@
     str         r2, [r1]
 
     bx          lr
-    ENDP        ; |vp8_short_inv_walsh4x4_1_armv6|
+    ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
 
 ; Constant Pool
 c0x00030003 DCD 0x00030003
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index c2b02dc..b6417de 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
index 085ff80..99c7bcf 100644
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ b/vp8/common/arm/armv6/recon_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 15c6c7d..0137120 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -54,113 +55,87 @@
 ;stack  const char *thresh,
 ;stack  int  count
 
-;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
+; All 16 elements in flimit are equal. So, in the code, only one load is needed
+; for flimit. Same applies to limit. thresh is not used in simple looopfilter
 
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
 |vp8_loop_filter_simple_horizontal_edge_armv6| PROC
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
     stmdb       sp!, {r4 - r11, lr}
 
-    sub         src, src, pstep, lsl #1     ; move src pointer down by 2 lines
-
-    ldr         r12, [r3], #4               ; limit
-    ldr         r3, [src], pstep            ; p1
-
-    ldr         r9, [sp, #36]               ; count for 8-in-parallel
-    ldr         r4, [src], pstep            ; p0
-
-    ldr         r7, [r2], #4                ; flimit
-    ldr         r5, [src], pstep            ; q0
+    ldr         r12, [r3]                   ; limit
+    ldr         r3, [src, -pstep, lsl #1]   ; p1
+    ldr         r4, [src, -pstep]           ; p0
+    ldr         r5, [src]                   ; q0
+    ldr         r6, [src, pstep]            ; q1
+    ldr         r7, [r2]                    ; flimit
     ldr         r2, c0x80808080
-
-    ldr         r6, [src]                   ; q1
-
+    ldr         r9, [sp, #40]               ; count for 8-in-parallel
     uadd8       r7, r7, r7                  ; flimit * 2
-    mov         r9, r9, lsl #1              ; 4-in-parallel
+    mov         r9, r9, lsl #1              ; double the count. we're doing 4 at a time
     uadd8       r12, r7, r12                ; flimit * 2 + limit
+    mov         lr, #0                      ; need 0 in a couple places
 
 |simple_hnext8|
-    ; vp8_simple_filter_mask() function
+    ; vp8_simple_filter_mask()
 
     uqsub8      r7, r3, r6                  ; p1 - q1
     uqsub8      r8, r6, r3                  ; q1 - p1
     uqsub8      r10, r4, r5                 ; p0 - q0
     uqsub8      r11, r5, r4                 ; q0 - p0
     orr         r8, r8, r7                  ; abs(p1 - q1)
-    ldr         lr, c0x7F7F7F7F             ; 01111111 mask
     orr         r10, r10, r11               ; abs(p0 - q0)
-    and         r8, lr, r8, lsr #1          ; abs(p1 - q1) / 2
     uqadd8      r10, r10, r10               ; abs(p0 - q0) * 2
-    mvn         lr, #0                      ; r10 == -1
+    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
     uqadd8      r10, r10, r8                ; abs(p0 - q0)*2 + abs(p1 - q1)/2
-    ; STALL waiting on r10 :(
-    uqsub8      r10, r10, r12               ; compare to flimit
-    mov         r8, #0
-
-    usub8       r10, r8, r10                ; use usub8 instead of ssub8
-    ; STALL (maybe?) when are flags set? :/
-    sel         r10, lr, r8                 ; filter mask: lr
-
+    mvn         r8, #0
+    usub8       r10, r12, r10               ; compare to flimit. usub8 sets GE flags
+    sel         r10, r8, lr                 ; filter mask: F or 0
     cmp         r10, #0
-    beq         simple_hskip_filter         ; skip filtering
+    beq         simple_hskip_filter         ; skip filtering if all masks are 0x00
 
-    ;vp8_simple_filter() function
+    ;vp8_simple_filter()
 
     eor         r3, r3, r2                  ; p1 offset to convert to a signed value
     eor         r6, r6, r2                  ; q1 offset to convert to a signed value
     eor         r4, r4, r2                  ; p0 offset to convert to a signed value
     eor         r5, r5, r2                  ; q0 offset to convert to a signed value
 
-    qsub8       r3, r3, r6                  ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
-    qsub8       r6, r5, r4                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
-
-    qadd8       r3, r3, r6
-    ldr         r8, c0x03030303             ; r8 = 3
-
-    qadd8       r3, r3, r6
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
+    qadd8       r3, r3, r6                  ; += q0 - p0
     ldr         r7, c0x04040404
+    qadd8       r3, r3, r6                  ; += q0 - p0
+    ldr         r8, c0x03030303
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
+    and         r3, r3, r10                 ; vp8_filter &= mask
 
-    qadd8       r3, r3, r6
-    and         r3, r3, lr                  ; vp8_filter &= mask;
+    qadd8       r7 , r3 , r7                ; Filter1 = vp8_filter + 4
+    qadd8       r8 , r3 , r8                ; Filter2 = vp8_filter + 3
 
-    ;save bottom 3 bits so that we round one side +4 and the other +3
-    qadd8       r8 , r3 , r8                ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
-    qadd8       r3 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr                ; Filter1 >>= 3
+    shadd8      r8 , r8 , lr                ; Filter2 >>= 3
 
-    mov         r7, #0
-    shadd8      r8 , r8 , r7                ; Filter2 >>= 3
-    shadd8      r3 , r3 , r7                ; Filter1 >>= 3
-    shadd8      r8 , r8 , r7
-    shadd8      r3 , r3 , r7
-    shadd8      r8 , r8 , r7                ; r8: Filter2
-    shadd8      r3 , r3 , r7                ; r7: filter1
-
-    ;calculate output
-    sub         src, src, pstep, lsl #1
-
-    qadd8       r4, r4, r8                  ; u = vp8_signed_char_clamp(p0 + Filter2)
-    qsub8       r5 ,r5, r3                  ; u = vp8_signed_char_clamp(q0 - Filter1)
-    eor         r4, r4, r2                  ; *op0 = u^0x80
-    str         r4, [src], pstep            ; store op0 result
+    qsub8       r5 ,r5, r7                  ; u = q0 - Filter1
+    qadd8       r4, r4, r8                  ; u = p0 + Filter2
     eor         r5, r5, r2                  ; *oq0 = u^0x80
-    str         r5, [src], pstep            ; store oq0 result
+    str         r5, [src]                   ; store oq0 result
+    eor         r4, r4, r2                  ; *op0 = u^0x80
+    str         r4, [src, -pstep]           ; store op0 result
 
 |simple_hskip_filter|
-    add         src, src, #4
-    sub         src, src, pstep
-    sub         src, src, pstep, lsl #1
-
     subs        r9, r9, #1
+    addne       src, src, #4                ; next row
 
-    ;pld            [src]
-    ;pld            [src, pstep]
-    ;pld            [src, pstep, lsl #1]
-
-    ldrne           r3, [src], pstep            ; p1
-    ldrne           r4, [src], pstep            ; p0
-    ldrne           r5, [src], pstep            ; q0
-    ldrne           r6, [src]                   ; q1
+    ldrne       r3, [src, -pstep, lsl #1]   ; p1
+    ldrne       r4, [src, -pstep]           ; p0
+    ldrne       r5, [src]                   ; q0
+    ldrne       r6, [src, pstep]            ; q1
 
     bne         simple_hnext8
 
@@ -173,9 +148,9 @@
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
     stmdb       sp!, {r4 - r11, lr}
 
-    ldr         r12, [r2], #4               ; r12: flimit
+    ldr         r12, [r2]                   ; r12: flimit
     ldr         r2, c0x80808080
-    ldr         r7, [r3], #4                ; limit
+    ldr         r7, [r3]                    ; limit
 
     ; load soure data to r7, r8, r9, r10
     ldrh        r3, [src, #-2]
@@ -212,16 +187,14 @@
     uqsub8      r10, r5, r4                 ; q0 - p0
     orr         r7, r7, r8                  ; abs(p1 - q1)
     orr         r9, r9, r10                 ; abs(p0 - q0)
-    ldr         lr, c0x7F7F7F7F             ; 0111 1111 mask
-    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
-    and         r7, lr, r7, lsr #1          ; abs(p1 - q1) / 2
     mov         r8, #0
+    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
+    uhadd8      r7, r7, r8                  ; abs(p1 - q1) / 2
     uqadd8      r7, r7, r9                  ; abs(p0 - q0)*2 + abs(p1 - q1)/2
     mvn         r10, #0                     ; r10 == -1
-    uqsub8      r7, r7, r12                 ; compare to flimit
 
-    usub8       r7, r8, r7
-    sel         r7, r10, r8                 ; filter mask: lr
+    usub8       r7, r12, r7                 ; compare to flimit
+    sel         lr, r10, r8                 ; filter mask
 
     cmp         lr, #0
     beq         simple_vskip_filter         ; skip filtering
@@ -232,35 +205,34 @@
     eor         r4, r4, r2                  ; p0 offset to convert to a signed value
     eor         r5, r5, r2                  ; q0 offset to convert to a signed value
 
-    qsub8       r3, r3, r6                  ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
-    qsub8       r6, r5, r4                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
 
-    qadd8       r3, r3, r6
-    ldr         r8, c0x03030303             ; r8 = 3
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
+    ldr         r9, c0x03030303             ; r9 = 3
 
-    qadd8       r3, r3, r6
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
     ldr         r7, c0x04040404
 
-    qadd8       r3, r3, r6
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
     and         r3, r3, lr                  ; vp8_filter &= mask
 
-    ;save bottom 3 bits so that we round one side +4 and the other +3
-    qadd8       r8 , r3 , r8                ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
-    qadd8       r3 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
+    qadd8       r9 , r3 , r9                ; Filter2 = vp8_filter + 3
+    qadd8       r3 , r3 , r7                ; Filter1 = vp8_filter + 4
 
-    mov         r7, #0
-    shadd8      r8 , r8 , r7                ; Filter2 >>= 3
-    shadd8      r3 , r3 , r7                ; Filter1 >>= 3
-    shadd8      r8 , r8 , r7
-    shadd8      r3 , r3 , r7
-    shadd8      r8 , r8 , r7                ; r8: filter2
-    shadd8      r3 , r3 , r7                ; r7: filter1
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8                ; Filter2 >>= 3
+    shadd8      r3 , r3 , r8                ; Filter1 >>= 3
 
     ;calculate output
     sub         src, src, pstep, lsl #2
 
-    qadd8       r4, r4, r8                  ; u = vp8_signed_char_clamp(p0 + Filter2)
-    qsub8       r5, r5, r3                  ; u = vp8_signed_char_clamp(q0 - Filter1)
+    qadd8       r4, r4, r9                  ; u = p0 + Filter2
+    qsub8       r5, r5, r3                  ; u = q0 - Filter1
     eor         r4, r4, r2                  ; *op0 = u^0x80
     eor         r5, r5, r2                  ; *oq0 = u^0x80
 
@@ -285,10 +257,6 @@
 |simple_vskip_filter|
     subs        r11, r11, #1
 
-    ;pld            [src]
-    ;pld            [src, pstep]
-    ;pld            [src, pstep, lsl #1]
-
     ; load soure data to r7, r8, r9, r10
     ldrneh      r3, [src, #-2]
     ldrneh      r4, [src], pstep
@@ -308,14 +276,12 @@
 
     bne         simple_vnext8
 
-    ldmia       sp!, {r4 - r12, pc}
+    ldmia       sp!, {r4 - r11, pc}
     ENDP        ; |vp8_loop_filter_simple_vertical_edge_armv6|
 
 ; Constant Pool
 c0x80808080 DCD     0x80808080
 c0x03030303 DCD     0x03030303
 c0x04040404 DCD     0x04040404
-c0x01010101 DCD     0x01010101
-c0x7F7F7F7F DCD     0x7F7F7F7F
 
     END
diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 551d863..8fb80ef 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index bf972a3..247f95b 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 2a4640c..5ed4f80 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f9ed21e..f28d7f6 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -14,9 +15,9 @@
 #if HAVE_ARMV6
 extern prototype_idct(vp8_short_idct4x4llm_1_v6);
 extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
-extern prototype_idct_scalar(vp8_dc_only_idct_armv6);
-extern prototype_second_order(vp8_short_inv_walsh4x4_1_armv6);
-extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
@@ -24,20 +25,20 @@
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_armv6
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
 
 #undef  vp8_idct_iwalsh1
-#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_armv6
+#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
 
 #undef  vp8_idct_iwalsh16
-#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_armv6
+#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 #endif
 
 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
 extern prototype_idct(vp8_short_idct4x4llm_neon);
-extern prototype_idct_scalar(vp8_dc_only_idct_neon);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 
@@ -47,8 +48,8 @@
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_neon
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_neon
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
 
 #undef  vp8_idct_iwalsh1
 #define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index fa7c626..f86bca1 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -13,16 +14,6 @@
 #include "loopfilter.h"
 #include "onyxc_int.h"
 
-typedef void loop_filter_uvfunction
-(
-    unsigned char *u,   // source pointer
-    int p,              // pitch
-    const signed char *flimit,
-    const signed char *limit,
-    const signed char *thresh,
-    unsigned char *v
-);
-
 extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
 extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
 extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 4bb4945..6c3628a 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
index a2fea2b..bb72bad 100644
--- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
index 74d2db5..6d4820b 100644
--- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
index 46ebb0e..b9f3ce0 100644
--- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
index 80728d4..f7a7d14 100644
--- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index f42ac63..e3ea91f 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
index 89d5e10..bda4b96 100644
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/vp8/common/arm/neon/copymem16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
index 302f734..35c0f67 100644
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/vp8/common/arm/neon/copymem8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
index 50d39ef..1f5b941 100644
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/vp8/common/arm/neon/copymem8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
new file mode 100644
index 0000000..49ba05f
--- /dev/null
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@@ -0,0 +1,49 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_dc_only_idct_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
+;                               unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dst_ptr
+; r3  pitch
+; sp  stride
+|vp8_dc_only_idct_add_neon| PROC
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    ldr             r12, [sp]
+    vdup.16         q0, r0
+
+    vld1.32         {d2[0]}, [r1], r3
+    vld1.32         {d2[1]}, [r1], r3
+    vld1.32         {d4[0]}, [r1], r3
+    vld1.32         {d4[1]}, [r1]
+
+    vaddw.u8        q1, q0, d2
+    vaddw.u8        q2, q0, d4
+
+    vqmovun.s16     d2, q1
+    vqmovun.s16     d4, q2
+
+    vst1.32         {d2[0]}, [r2], r12
+    vst1.32         {d2[1]}, [r2], r12
+    vst1.32         {d4[0]}, [r2], r12
+    vst1.32         {d4[1]}, [r2]
+
+    bx             lr
+
+    ENDP
+    END
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index 4fc744c..663bf39 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
     EXPORT  |vp8_short_inv_walsh4x4_neon|
     EXPORT  |vp8_short_inv_walsh4x4_1_neon|
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
index e3e8e8a..c0c3e33 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
index f11055d..a8314cd 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 6d74fab..0b84dc7 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index 2bb6222..a793d09 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
index d79cc68..57913d2 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
index 3a230a9..2eb695f 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
index 86eddaa..4576a6a 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
index 2ab0fc2..8e85caa 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index ad5afba..d9dbdcf 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index 60e5175..bdffc62 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm
index b9ba1cb..3f1a30f 100644
--- a/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/vp8/common/arm/neon/recon16x16mb_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm
index 25aaf8c..99b251c 100644
--- a/vp8/common/arm/neon/recon2b_neon.asm
+++ b/vp8/common/arm/neon/recon2b_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm
index a4f5b80..9917277 100644
--- a/vp8/common/arm/neon/recon4b_neon.asm
+++ b/vp8/common/arm/neon/recon4b_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
index 16d85a0..288c0ef 100644
--- a/vp8/common/arm/neon/reconb_neon.asm
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm
index 4873e44..fd7002e 100644
--- a/vp8/common/arm/neon/save_neon_reg.asm
+++ b/vp8/common/arm/neon/save_neon_reg.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
index 7d06ff9..d7bdbae 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index ffecfbf..d77a287 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 9f5f0d2..e434a70 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index c23a9db..3d22d77 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index 18e19f9..1dd6b1b 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index d27485e..37255c7 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
index 130059e..218898b 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index fd9f85e..18855a3 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
index 334d352..8d968d7 100644
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index d7ee1dd..4cc93d1 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -28,7 +29,7 @@
     unsigned char *y_buffer = x->dst.y_buffer;
     unsigned char *ypred_ptr = x->predictor;
     int y_stride = x->dst.y_stride;
-    int mode = x->mbmi.mode;
+    int mode = x->mode_info_context->mbmi.mode;
     int Up = x->up_available;
     int Left = x->left_available;
 
@@ -51,7 +52,7 @@
     unsigned char *y_buffer = x->dst.y_buffer;
     unsigned char *ypred_ptr = x->predictor;
     int y_stride = x->dst.y_stride;
-    int mode = x->mbmi.mode;
+    int mode = x->mode_info_context->mbmi.mode;
     int Up = x->up_available;
     int Left = x->left_available;
 
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 56aec55..53600e5 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index ecc6929..1eed97e 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -42,7 +43,6 @@
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_neon;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
 
@@ -74,7 +74,6 @@
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_armv6;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
 
@@ -127,11 +126,13 @@
     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
 #endif
 
+#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
     rtcd->postproc.down        = vp8_mbpost_proc_down_c;
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
     rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
 #endif
+#endif
 
 #if HAVE_ARMV7
     vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 68634bf..5baf8cc 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -31,55 +32,50 @@
  */
 
 #if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
-DEFINE(yv12_buffer_config_y_width,               offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height,              offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride,              offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width,               offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height,              offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride,              offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer,              offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer,              offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer,              offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_y_width,              offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height,             offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride,             offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width,             offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height,            offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride,            offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer,             offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer,             offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer,             offsetof(YV12_BUFFER_CONFIG, v_buffer));
 DEFINE(yv12_buffer_config_border,               offsetof(YV12_BUFFER_CONFIG, border));
 #endif
 
 #if CONFIG_VP8_DECODER
 DEFINE(mb_diff,                                 offsetof(MACROBLOCKD, diff));
 DEFINE(mb_predictor,                            offsetof(MACROBLOCKD, predictor));
-DEFINE(mb_dst_y_stride,                          offsetof(MACROBLOCKD, dst.y_stride));
-DEFINE(mb_dst_y_buffer,                          offsetof(MACROBLOCKD, dst.y_buffer));
-DEFINE(mb_dst_u_buffer,                          offsetof(MACROBLOCKD, dst.u_buffer));
-DEFINE(mb_dst_v_buffer,                          offsetof(MACROBLOCKD, dst.v_buffer));
-DEFINE(mb_mbmi_mode,                            offsetof(MACROBLOCKD, mbmi.mode));
-DEFINE(mb_up_available,                          offsetof(MACROBLOCKD, up_available));
-DEFINE(mb_left_available,                        offsetof(MACROBLOCKD, left_available));
+DEFINE(mb_dst_y_stride,                         offsetof(MACROBLOCKD, dst.y_stride));
+DEFINE(mb_dst_y_buffer,                         offsetof(MACROBLOCKD, dst.y_buffer));
+DEFINE(mb_dst_u_buffer,                         offsetof(MACROBLOCKD, dst.u_buffer));
+DEFINE(mb_dst_v_buffer,                         offsetof(MACROBLOCKD, dst.v_buffer));
+DEFINE(mb_up_available,                         offsetof(MACROBLOCKD, up_available));
+DEFINE(mb_left_available,                       offsetof(MACROBLOCKD, left_available));
 
 DEFINE(detok_scan,                              offsetof(DETOK, scan));
-DEFINE(detok_ptr_onyxblock2context_leftabove,    offsetof(DETOK, ptr_onyxblock2context_leftabove));
-DEFINE(detok_onyx_coef_tree_ptr,                  offsetof(DETOK, vp8_coef_tree_ptr));
-DEFINE(detok_teb_base_ptr,                        offsetof(DETOK, teb_base_ptr));
-DEFINE(detok_norm_ptr,                           offsetof(DETOK, norm_ptr));
-DEFINE(detok_ptr_onyx_coef_bands_x,                offsetof(DETOK, ptr_onyx_coef_bands_x));
+DEFINE(detok_ptr_block2leftabove,               offsetof(DETOK, ptr_block2leftabove));
+DEFINE(detok_coef_tree_ptr,                     offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_teb_base_ptr,                      offsetof(DETOK, teb_base_ptr));
+DEFINE(detok_norm_ptr,                          offsetof(DETOK, norm_ptr));
+DEFINE(detok_ptr_coef_bands_x,                  offsetof(DETOK, ptr_coef_bands_x));
 
-DEFINE(DETOK_A,                                 offsetof(DETOK, A));
-DEFINE(DETOK_L,                                 offsetof(DETOK, L));
+DEFINE(detok_A,                                 offsetof(DETOK, A));
+DEFINE(detok_L,                                 offsetof(DETOK, L));
 
-DEFINE(detok_qcoeff_start_ptr,                    offsetof(DETOK, qcoeff_start_ptr));
-DEFINE(detok_current_bc,                         offsetof(DETOK, current_bc));
-DEFINE(detok_coef_probs,                         offsetof(DETOK, coef_probs));
+DEFINE(detok_qcoeff_start_ptr,                  offsetof(DETOK, qcoeff_start_ptr));
+DEFINE(detok_current_bc,                        offsetof(DETOK, current_bc));
+DEFINE(detok_coef_probs,                        offsetof(DETOK, coef_probs));
 DEFINE(detok_eob,                               offsetof(DETOK, eob));
 
-DEFINE(bool_decoder_lowvalue,                   offsetof(BOOL_DECODER, lowvalue));
-DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
+DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
 DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
 DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
-DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
-DEFINE(bool_decoder_user_buffer_sz,             offsetof(BOOL_DECODER, user_buffer_sz));
-DEFINE(bool_decoder_decode_buffer,              offsetof(BOOL_DECODER, decode_buffer));
-DEFINE(bool_decoder_read_ptr,                   offsetof(BOOL_DECODER, read_ptr));
-DEFINE(bool_decoder_write_ptr,                  offsetof(BOOL_DECODER, write_ptr));
+DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
 
-DEFINE(tokenextrabits_min_val,                   offsetof(TOKENEXTRABITS, min_val));
+DEFINE(tokenextrabits_min_val,                  offsetof(TOKENEXTRABITS, min_val));
 DEFINE(tokenextrabits_length,                   offsetof(TOKENEXTRABITS, Length));
 #endif
 
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
index 6a91ba1..6ac3f8b 100644
--- a/vp8/common/bigend.h
+++ b/vp8/common/bigend.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index 53f5e72..7f75a72 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -1,23 +1,24 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
-void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
-{
-    vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
-    vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
-}
-
-const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
-const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
 const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
-const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
+
+const unsigned char vp8_block2left[25] =
+{
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+const unsigned char vp8_block2above[25] =
+{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 84ed53a..4b7f1a3 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -48,19 +49,19 @@
 } POS;
 
 
-typedef int ENTROPY_CONTEXT;
-
+typedef char ENTROPY_CONTEXT;
 typedef struct
 {
-    ENTROPY_CONTEXT l[4];
-    ENTROPY_CONTEXT a[4];
-} TEMP_CONTEXT;
+    ENTROPY_CONTEXT y1[4];
+    ENTROPY_CONTEXT u[2];
+    ENTROPY_CONTEXT v[2];
+    ENTROPY_CONTEXT y2;
+} ENTROPY_CONTEXT_PLANES;
 
-extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
-extern const int vp8_block2left[25];
-extern const int vp8_block2above[25];
 extern const int vp8_block2type[25];
-extern const int vp8_block2context[25];
+
+extern const unsigned char vp8_block2left[25];
+extern const unsigned char vp8_block2above[25];
 
 #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
     Dest = ((A)!=0) + ((B)!=0);
@@ -167,15 +168,15 @@
         int as_int;
         MV  as_mv;
     } mv;
-    int partitioning;
-    int partition_count;
-    int mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
-    int dc_diff;
-    unsigned char   segment_id;                  // Which set of segmentation parameters should be used for this MB
-    int force_no_skip;
 
-    B_MODE_INFO partition_bmi[16];
+    char partitioning;
+    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char dc_diff;
+    unsigned char need_to_clamp_mvs;
 
+    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+
+    unsigned char force_no_skip; //encoder only
 } MB_MODE_INFO;
 
 
@@ -215,9 +216,10 @@
 {
     DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-    DECLARE_ALIGNED(16, short, reference[384]);
+//not used    DECLARE_ALIGNED(16, short, reference[384]);
     DECLARE_ALIGNED(16, short, qcoeff[400]);
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
+    DECLARE_ALIGNED(16, char,  eobs[25]);
 
     // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
     BLOCKD block[25];
@@ -226,20 +228,16 @@
     YV12_BUFFER_CONFIG dst;
 
     MODE_INFO *mode_info_context;
-    MODE_INFO *mode_info;
-
     int mode_info_stride;
 
     FRAME_TYPE frame_type;
 
-    MB_MODE_INFO mbmi;
-
     int up_available;
     int left_available;
 
     // Y,U,V,Y2
-    ENTROPY_CONTEXT *above_context[4];   // row of context for each plane
-    ENTROPY_CONTEXT(*left_context)[4];   // (up to) 4 contexts ""
+    ENTROPY_CONTEXT_PLANES *above_context;
+    ENTROPY_CONTEXT_PLANES *left_context;
 
     // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
     unsigned char segmentation_enabled;
@@ -275,9 +273,6 @@
     int mb_to_top_edge;
     int mb_to_bottom_edge;
 
-    //char * gf_active_ptr;
-    signed char *gf_active_ptr;
-
     unsigned int frames_since_golden;
     unsigned int frames_till_alt_ref_frame;
     vp8_subpix_fn_t  subpixel_predict;
diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h
index 0659d48..5658868 100644
--- a/vp8/common/boolcoder.h
+++ b/vp8/common/boolcoder.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h
index 7881b0a..7a7db38 100644
--- a/vp8/common/codec_common_interface.h
+++ b/vp8/common/codec_common_interface.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 #ifndef CODEC_COMMON_INTERFACE_H
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 99affd6..785e3ff 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 29f6d37..9a93da9 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
index deb5ed8..4e62486 100644
--- a/vp8/common/common_types.h
+++ b/vp8/common/common_types.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/context.c b/vp8/common/context.c
index 17ee8c3..99e95d3 100644
--- a/vp8/common/context.c
+++ b/vp8/common/context.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index e2d2d2c..c3ac88f 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index ccdf326..b85f59b 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
index 5e6fa0c..b923da6 100644
--- a/vp8/common/dma_desc.h
+++ b/vp8/common/dma_desc.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
index f63a5cd..43daa65 100644
--- a/vp8/common/duck_io.h
+++ b/vp8/common/duck_io.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index e524c2a..1438e7e 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 1415832..80b4817 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 7dc1acd..e9dc668 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -263,8 +264,10 @@
     vp8_tokens_from_tree(vp8_uv_mode_encodings,  vp8_uv_mode_tree);
     vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
 
-    vp8_tokens_from_tree(VP8_MVREFENCODINGS,   vp8_mv_ref_tree);
-    vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree);
+    vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
+                                vp8_mv_ref_tree, NEARESTMV);
+    vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array,
+                                vp8_sub_mv_ref_tree, LEFT4X4);
 
     vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
 }
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index ff630a4..da6ae8e 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -53,10 +54,6 @@
 extern struct vp8_token_struct vp8_mv_ref_encoding_array    [VP8_MVREFS];
 extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
 
-#define VP8_MVREFENCODINGS      (vp8_mv_ref_encoding_array - NEARESTMV)
-#define VP8_SUBMVREFENCODINGS   (vp8_sub_mv_ref_encoding_array - LEFT4X4)
-
-
 extern const vp8_tree_index vp8_small_mvtree[];
 
 extern struct vp8_token_struct vp8_small_mvencodings [8];
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 2b00c17..8e72881 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index d940c59..911507d 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 7407952..7e06ac3 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -38,7 +39,10 @@
 
     for (i = 0; i < h - 0 + 1; i++)
     {
-        vpx_memset(dest_ptr1, src_ptr1[0], el);
+        // Some linkers will complain if we call vpx_memset with el set to a
+        // constant 0.
+        if (el)
+            vpx_memset(dest_ptr1, src_ptr1[0], el);
         vpx_memset(dest_ptr2, src_ptr2[0], er);
         src_ptr1  += sp;
         src_ptr2  += sp;
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index 6809ae7..fd0a608 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 38991cb..3d18d81 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index fcb1f20..41037f7 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 2c02033..1a6c72b 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp
index 5f1faed..c582628 100644
--- a/vp8/common/fourcc.hpp
+++ b/vp8/common/fourcc.hpp
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
index e68c53e..5f52398 100644
--- a/vp8/common/g_common.h
+++ b/vp8/common/g_common.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0011ae0..c04e31f 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -31,7 +32,7 @@
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
+    rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
 
@@ -60,7 +61,7 @@
     rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
 
-#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER
+#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
     rtcd->postproc.down        = vp8_mbpost_proc_down_c;
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
diff --git a/vp8/common/header.h b/vp8/common/header.h
index 8b2b009..3e98eeb 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 47b5f05..f5fd94d 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -17,8 +18,10 @@
 #define prototype_idct(sym) \
     void sym(short *input, short *output, int pitch)
 
-#define prototype_idct_scalar(sym) \
-    void sym(short input, short *output, int pitch)
+#define prototype_idct_scalar_add(sym) \
+    void sym(short input, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride)
 
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/idct_x86.h"
@@ -38,10 +41,10 @@
 #endif
 extern prototype_idct(vp8_idct_idct16);
 
-#ifndef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_c
+#ifndef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
 #endif
-extern prototype_idct_scalar(vp8_idct_idct1_scalar);
+extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add);
 
 
 #ifndef vp8_idct_iwalsh1
@@ -55,14 +58,14 @@
 extern prototype_second_order(vp8_idct_iwalsh16);
 
 typedef prototype_idct((*vp8_idct_fn_t));
-typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t));
+typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
 typedef prototype_second_order((*vp8_second_order_fn_t));
 
 typedef struct
 {
-    vp8_idct_fn_t         idct1;
-    vp8_idct_fn_t         idct16;
-    vp8_idct_scalar_fn_t  idct1_scalar;
+    vp8_idct_fn_t            idct1;
+    vp8_idct_fn_t            idct16;
+    vp8_idct_scalar_add_fn_t idct1_scalar_add;
 
     vp8_second_order_fn_t iwalsh1;
     vp8_second_order_fn_t iwalsh16;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 57cf858..196062d 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -103,23 +104,30 @@
     }
 }
 
-
-void vp8_dc_only_idct_c(short input_dc, short *output, int pitch)
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
 {
-    int i;
-    int a1;
-    short *op = output;
-    int shortpitch = pitch >> 1;
-    a1 = ((input_dc + 4) >> 3);
+    int a1 = ((input_dc + 4) >> 3);
+    int r, c;
 
-    for (i = 0; i < 4; i++)
+    for (r = 0; r < 4; r++)
     {
-        op[0] = a1;
-        op[1] = a1;
-        op[2] = a1;
-        op[3] = a1;
-        op += shortpitch;
+        for (c = 0; c < 4; c++)
+        {
+            int a = a1 + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+
+        dst_ptr += stride;
+        pred_ptr += pitch;
     }
+
 }
 
 void vp8_short_inv_walsh4x4_c(short *input, short *output)
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 1ff596e..4cb433a 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -64,7 +65,8 @@
 {
     int i;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         // do 2nd order transform on the dc block
 
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 93a40f9..b3ffb70 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
index 08c525c..99df116 100644
--- a/vp8/common/littlend.h
+++ b/vp8/common/littlend.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 79e6177..da9ca28 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index c6ce508..a2049bf 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -116,5 +117,14 @@
 #define LF_INVOKE(ctx,fn) vp8_lf_##fn
 #endif
 
+typedef void loop_filter_uvfunction
+(
+    unsigned char *u,   // source pointer
+    int p,              // pitch
+    const signed char *flimit,
+    const signed char *limit,
+    const signed char *thresh,
+    unsigned char *v
+);
 
 #endif
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 7d16e48..ea82e2a 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -17,7 +18,7 @@
 
 typedef unsigned char uc;
 
-__inline signed char vp8_signed_char_clamp(int t)
+static __inline signed char vp8_signed_char_clamp(int t)
 {
     t = (t < -128 ? -128 : t);
     t = (t > 127 ? 127 : t);
@@ -26,7 +27,7 @@
 
 
 // should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_filter_mask(signed char limit, signed char flimit,
+static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                      uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
     signed char mask = 0;
@@ -46,7 +47,7 @@
 }
 
 // is there high variance internal edge ( 11111111 yes, 00000000 no)
-__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
     signed char hev = 0;
     hev  |= (abs(p1 - p0) > thresh) * -1;
@@ -54,7 +55,7 @@
     return hev;
 }
 
-__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
 
 {
     signed char ps0, qs0;
@@ -160,7 +161,7 @@
     while (++i < count * 8);
 }
 
-__inline void vp8_mbfilter(signed char mask, signed char hev,
+static __inline void vp8_mbfilter(signed char mask, signed char hev,
                            uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
 {
     signed char s, u;
@@ -280,7 +281,7 @@
 }
 
 // should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
 // Why does this cause problems for win32?
 // error C2143: syntax error : missing ';' before 'type'
@@ -293,7 +294,7 @@
     return mask;
 }
 
-__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
 {
     signed char vp8_filter, Filter1, Filter2;
     signed char p1 = (signed char) * op1 ^ 0x80;
diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h
index 97bffc7..4b8ee58 100644
--- a/vp8/common/mac_specs.h
+++ b/vp8/common/mac_specs.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index a7e0ce9..ce40d16 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index 9301a25..0fa2995 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 0c57651..24db882 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index ceee74c..8e483b8 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index 3d84181..73c91b9 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index b66c400..a006306 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,7 +17,7 @@
 {
 #endif
 
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_scale/yv12config.h"
 #include "type_aliases.h"
 #include "ppflags.h"
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index a40ffb9..132765d 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -1,18 +1,19 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef __INC_VP8C_INT_H
 #define __INC_VP8C_INT_H
 
-#include "vpx_ports/config.h"
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx_config.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "loopfilter.h"
 #include "entropymv.h"
 #include "entropy.h"
@@ -32,6 +33,7 @@
 #define MAXQ 127
 #define QINDEX_RANGE (MAXQ + 1)
 
+#define NUM_YV12_BUFFERS 4
 
 typedef struct frame_contexts
 {
@@ -93,15 +95,16 @@
     YUV_TYPE clr_type;
     CLAMP_TYPE  clamp_type;
 
-    YV12_BUFFER_CONFIG last_frame;
-    YV12_BUFFER_CONFIG golden_frame;
-    YV12_BUFFER_CONFIG alt_ref_frame;
-    YV12_BUFFER_CONFIG new_frame;
     YV12_BUFFER_CONFIG *frame_to_show;
+
+    YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
+    int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
+    int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+
     YV12_BUFFER_CONFIG post_proc_buffer;
     YV12_BUFFER_CONFIG temp_scale_frame;
 
-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skiped.
+    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
     FRAME_TYPE frame_type;
 
     int show_frame;
@@ -130,8 +133,6 @@
 
     unsigned int frames_since_golden;
     unsigned int frames_till_alt_ref_frame;
-    unsigned char *gf_active_flags;   // Record of which MBs still refer to last golden frame either directly or through 0,0
-    int gf_active_count;
 
     /* We allocate a MODE_INFO struct for each macroblock, together with
        an extra row on top and column on the left to simplify prediction. */
@@ -164,8 +165,8 @@
     int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
 
     // Y,U,V,Y2
-    ENTROPY_CONTEXT *above_context[4];   // row of context for each plane
-    ENTROPY_CONTEXT left_context[4][4];  // (up to) 4 contexts ""
+    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
+    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
 
 
     // keyframe block modes are predicted by their above, left neighbors
@@ -200,6 +201,7 @@
 
 void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
 void vp8_init_loop_filter(VP8_COMMON *cm);
+void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
 extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
 
 #endif
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 644c0ec..00a97d9 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h
index 32a55ee..115134a 100644
--- a/vp8/common/partialgfupdate.h
+++ b/vp8/common/partialgfupdate.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index f019925..0c8cf13 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -274,7 +275,7 @@
 
 }
 
-extern void vp8_deblock(YV12_BUFFER_CONFIG         *source,
+void vp8_deblock(YV12_BUFFER_CONFIG         *source,
                         YV12_BUFFER_CONFIG         *post,
                         int                         q,
                         int                         low_var_thresh,
@@ -329,13 +330,6 @@
 
 }
 
-
-//Notes: It is better to change CHAR to unsigned or signed to
-//avoid error on ARM platform.
-char vp8_an[8][64][3072];
-int vp8_cd[8][64];
-
-
 double vp8_gaussian(double sigma, double mu, double x)
 {
     return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index c45fe92..80337fc 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -87,4 +88,11 @@
                   int                         low_var_thresh,
                   int                         flag,
                   vp8_postproc_rtcd_vtable_t *rtcd);
+
+void vp8_deblock(YV12_BUFFER_CONFIG         *source,
+                 YV12_BUFFER_CONFIG         *post,
+                 int                         q,
+                 int                         low_var_thresh,
+                 int                         flag,
+                 vp8_postproc_rtcd_vtable_t *rtcd);
 #endif
diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm
index e87eb21..a4ce915 100644
--- a/vp8/common/ppc/copy_altivec.asm
+++ b/vp8/common/ppc/copy_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm
index 2a35507..4da2e94 100644
--- a/vp8/common/ppc/filter_altivec.asm
+++ b/vp8/common/ppc/filter_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm
index 27e02a8..fd8aa66 100644
--- a/vp8/common/ppc/filter_bilinear_altivec.asm
+++ b/vp8/common/ppc/filter_bilinear_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm
index e88af8d..117d9cf 100644
--- a/vp8/common/ppc/idctllm_altivec.asm
+++ b/vp8/common/ppc/idctllm_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c
index 586eed4..bad3cf3 100644
--- a/vp8/common/ppc/loopfilter_altivec.c
+++ b/vp8/common/ppc/loopfilter_altivec.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm
index 78a5cf9..61df4e9 100644
--- a/vp8/common/ppc/loopfilter_filters_altivec.asm
+++ b/vp8/common/ppc/loopfilter_filters_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm
index 227ef2a..f81d86f 100644
--- a/vp8/common/ppc/platform_altivec.asm
+++ b/vp8/common/ppc/platform_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm
index f478b95..dd39e05 100644
--- a/vp8/common/ppc/recon_altivec.asm
+++ b/vp8/common/ppc/recon_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 2847310..1f5d790 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index c663976..b1f925c 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index 25a4b77..99fee5a 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c
index df4c96e..f315f50 100644
--- a/vp8/common/predictdc.c
+++ b/vp8/common/predictdc.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h
index b8871e4..fa85968 100644
--- a/vp8/common/predictdc.h
+++ b/vp8/common/predictdc.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h
index 00ec9a8..0b142bd 100644
--- a/vp8/common/preproc.h
+++ b/vp8/common/preproc.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h
index 986c45b..7d554b5 100644
--- a/vp8/common/preprocif.h
+++ b/vp8/common/preprocif.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h
index 1171ede..c965990 100644
--- a/vp8/common/proposed.h
+++ b/vp8/common/proposed.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index 09fe31f..e9833fe 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 0c92ce8..cb64d8e 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index d1268ea..0b439e0 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index f65a90f..e34a63c 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index c48886d..ffdc660 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -209,7 +210,8 @@
 {
     int i;
 
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         unsigned char *uptr, *vptr;
         unsigned char *upred_ptr = &x->predictor[256];
@@ -253,16 +255,18 @@
     }
 }
 
-
+//encoder only
 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 {
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+  if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+      x->mode_info_context->mbmi.mode != SPLITMV)
     {
         unsigned char *ptr_base;
         unsigned char *ptr;
         unsigned char *pred_ptr = x->predictor;
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -281,7 +285,7 @@
     {
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
@@ -312,7 +316,9 @@
 
 void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
 {
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+    if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         int offset;
         unsigned char *ptr_base;
@@ -322,8 +328,8 @@
         unsigned char *upred_ptr = &x->predictor[256];
         unsigned char *vpred_ptr = &x->predictor[320];
 
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -360,7 +366,7 @@
     {
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
@@ -409,7 +415,7 @@
 {
     int i, j;
 
-    if (x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
     {
         for (i = 0; i < 2; i++)
         {
@@ -454,8 +460,8 @@
     }
     else
     {
-        int mvrow = x->mbmi.mv.as_mv.row;
-        int mvcol = x->mbmi.mv.as_mv.col;
+        int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
 
         if (mvrow < 0)
             mvrow -= 1;
@@ -534,7 +540,7 @@
     unsigned char *pred_ptr = x->predictor;
     unsigned char *dst_ptr = x->dst.y_buffer;
 
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
         int offset;
         unsigned char *ptr_base;
@@ -546,8 +552,8 @@
         unsigned char *udst_ptr = x->dst.u_buffer;
         unsigned char *vdst_ptr = x->dst.v_buffer;
 
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -586,7 +592,7 @@
         //if sth is wrong, go back to what it is in build_inter_predictors_mb.
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index b2d1ae9..7c1dee4 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index e33bce3..ce0b1b8 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -42,7 +43,7 @@
     }
 
     // for Y
-    switch (x->mbmi.mode)
+    switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
     {
@@ -163,7 +164,7 @@
     }
 
     // for Y
-    switch (x->mbmi.mode)
+    switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
     {
@@ -289,7 +290,7 @@
         vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
     }
 
-    switch (x->mbmi.uv_mode)
+    switch (x->mode_info_context->mbmi.uv_mode)
     {
     case DC_PRED:
     {
@@ -429,7 +430,7 @@
         vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
     }
 
-    switch (x->mbmi.uv_mode)
+    switch (x->mode_info_context->mbmi.uv_mode)
     {
     case DC_PRED:
     {
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index d63aa15..988b43a 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index d92d5c9..c6e5fe7 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index 788c8c4..6ac2b71 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/segmentation_common.c b/vp8/common/segmentation_common.c
deleted file mode 100644
index 72b8c87..0000000
--- a/vp8/common/segmentation_common.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "segmentation_common.h"
-#include "vpx_mem/vpx_mem.h"
-
-void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
-{
-    int mb_row, mb_col;
-
-    MODE_INFO *this_mb_mode_info = cm->mi;
-
-    xd->gf_active_ptr = (signed char *)cm->gf_active_flags;
-
-    if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
-    {
-        // Reset Gf useage monitors
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
-    }
-    else
-    {
-        // for each macroblock row in image
-        for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
-        {
-            // for each macroblock col in image
-            for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-            {
-
-                // If using golden then set GF active flag if not already set.
-                // If using last frame 0,0 mode then leave flag as it is
-                // else if using non 0,0 motion or intra modes then clear flag if it is currently set
-                if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
-                {
-                    if (*(xd->gf_active_ptr) == 0)
-                    {
-                        *(xd->gf_active_ptr) = 1;
-                        cm->gf_active_count ++;
-                    }
-                }
-                else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr))
-                {
-                    *(xd->gf_active_ptr) = 0;
-                    cm->gf_active_count--;
-                }
-
-                xd->gf_active_ptr++;          // Step onto next entry
-                this_mb_mode_info++;           // skip to next mb
-
-            }
-
-            // this is to account for the border
-            this_mb_mode_info++;
-        }
-    }
-}
diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h
deleted file mode 100644
index bb93533..0000000
--- a/vp8/common/segmentation_common.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "string.h"
-#include "blockd.h"
-#include "onyxc_int.h"
-
-extern void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd);
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index dcaafe6..8647ae2 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,21 +17,15 @@
     int i;
 
     // set up frame new frame for intra coded blocks
-    vpx_memset(ybf->y_buffer - 1 - 2 * ybf->y_stride, 127, ybf->y_width + 5);
     vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
-
     for (i = 0; i < ybf->y_height; i++)
         ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
 
-    vpx_memset(ybf->u_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
     vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
     for (i = 0; i < ybf->uv_height; i++)
         ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
 
-    vpx_memset(ybf->v_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
     vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
     for (i = 0; i < ybf->uv_height; i++)
         ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
 
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 6ec79b2..5264fd0 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h
index fbd5f4d..acdeec3 100644
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c
index afe6a88..73656b3 100644
--- a/vp8/common/swapyv12buffer.c
+++ b/vp8/common/swapyv12buffer.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index caf9499..a6473ed 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 1829b64..db99698 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index a45937b..da40f93 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index a02cb24..f9a2574 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -74,7 +75,8 @@
 #define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
 #else
 #include <unistd.h>
-#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#include <sched.h>
+#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
 #endif
 /* Not Windows. Assume pthreads */
 
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 4ad018d..d80c64b 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -46,6 +47,12 @@
     tree2tok(p, t, 0, 0, 0);
 }
 
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
+                                 int offset)
+{
+    tree2tok(p - offset, t, 0, 0, 0);
+}
+
 static void branch_counts(
     int n,                      /* n = size of alphabet */
     vp8_token tok               [ /* n */ ],
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 0356d2b..35e5be1 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -53,6 +54,8 @@
 /* Construct encoding array from tree. */
 
 void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
+                                 int offset);
 
 
 /* Convert array of token occurrence counts into a table of probabilities
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index addd264..f2a3702 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp
index e352e7a..44869ec 100644
--- a/vp8/common/vfwsetting.hpp
+++ b/vp8/common/vfwsetting.hpp
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h
index 40608c6..a2fce65 100644
--- a/vp8/common/vpx_ref_build_prefix.h
+++ b/vp8/common/vpx_ref_build_prefix.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h
index d03e0bd..a95d905 100644
--- a/vp8/common/vpxblit.h
+++ b/vp8/common/vpxblit.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h
index a8e28f5..4ee617f 100644
--- a/vp8/common/vpxblit_c64.h
+++ b/vp8/common/vpxblit_c64.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h
index e4c9f3e..b70f296 100644
--- a/vp8/common/vpxerrors.h
+++ b/vp8/common/vpxerrors.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx
index 06faca6..faddf1f 100644
--- a/vp8/common/x86/boolcoder.cxx
+++ b/vp8/common/x86/boolcoder.cxx
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index 5dfb212..f6e568c 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -21,7 +22,7 @@
 #if HAVE_MMX
 extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
 extern prototype_idct(vp8_short_idct4x4llm_mmx);
-extern prototype_idct_scalar(vp8_dc_only_idct_mmx);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
 
 extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
@@ -33,8 +34,8 @@
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
 
 #undef vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 2751c69..99e09a5 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -219,35 +220,61 @@
     pop         rbp
     ret
 
-;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
-global sym(vp8_dc_only_idct_mmx)
-sym(vp8_dc_only_idct_mmx):
+;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+global sym(vp8_dc_only_idct_add_mmx)
+sym(vp8_dc_only_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
+    SHADOW_ARGS_TO_STACK 5
     GET_GOT     rbx
+    push        rsi
+    push        rdi
     ; end prolog
 
-        movd        mm0,            arg(0) ;input_dc
+        mov         rsi,            arg(1) ;s -- prediction
+        mov         rdi,            arg(2) ;d -- destination
+        movsxd      rax,            dword ptr arg(4) ;stride
+        movsxd      rdx,            dword ptr arg(3) ;pitch
+        pxor        mm0,            mm0
 
-        paddw       mm0,            [fours GLOBAL]
-        mov         rdx,            arg(1) ;output
+        movd        mm5,            arg(0) ;input_dc
 
-        psraw       mm0,            3
-        movsxd      rax,            dword ptr arg(2) ;pitch
+        paddw       mm5,            [fours GLOBAL]
 
-        punpcklwd   mm0,            mm0
-        punpckldq   mm0,            mm0
+        psraw       mm5,            3
 
-        movq        [rdx],          mm0
-        movq        [rdx+rax],      mm0
+        punpcklwd   mm5,            mm5
+        punpckldq   mm5,            mm5
 
-        movq        [rdx+rax*2],    mm0
-        add         rdx,            rax
+        movd        mm1,            [rsi]
+        punpcklbw   mm1,            mm0
+        paddsw      mm1,            mm5
+        packuswb    mm1,            mm0              ; pack and unpack to saturate
+        movd        [rdi],          mm1
 
-        movq        [rdx+rax*2],    mm0
+        movd        mm2,            [rsi+rdx]
+        punpcklbw   mm2,            mm0
+        paddsw      mm2,            mm5
+        packuswb    mm2,            mm0              ; pack and unpack to saturate
+        movd        [rdi+rax],      mm2
+
+        movd        mm3,            [rsi+2*rdx]
+        punpcklbw   mm3,            mm0
+        paddsw      mm3,            mm5
+        packuswb    mm3,            mm0              ; pack and unpack to saturate
+        movd        [rdi+2*rax],    mm3
+
+        add         rdi,            rax
+        add         rsi,            rdx
+        movd        mm4,            [rsi+2*rdx]
+        punpcklbw   mm4,            mm0
+        paddsw      mm4,            mm5
+        packuswb    mm4,            mm0              ; pack and unpack to saturate
+        movd        [rdi+2*rax],    mm4
 
     ; begin epilog
+    pop rdi
+    pop rsi
     RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
new file mode 100644
index 0000000..ac94185
--- /dev/null
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -0,0 +1,708 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void idct_dequant_0_2x_sse2
+; (
+;   short *qcoeff       - 0
+;   short *dequant      - 1
+;   unsigned char *pre  - 2
+;   unsigned char *dst  - 3
+;   int dst_stride      - 4
+;   int blk_stride      - 5
+; )
+
+global sym(idct_dequant_0_2x_sse2)
+sym(idct_dequant_0_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    ; end prolog
+
+        mov         rdx,            arg(1) ; dequant
+        mov         rax,            arg(0) ; qcoeff
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        movd        xmm4,           [rax]
+        movd        xmm5,           [rdx]
+
+        pinsrw      xmm4,           [rax+32],   4
+        pinsrw      xmm5,           [rdx],      4
+
+        pmullw      xmm4,           xmm5
+
+    ; clear coeffs
+        movd        [rax],          xmm7
+        movd        [rax+32],       xmm7
+;pshufb
+        pshuflw     xmm4,           xmm4,       00000000b
+        pshufhw     xmm4,           xmm4,       00000000b
+
+        mov         rax,            arg(2) ; pre
+        paddw       xmm4,           [fours GLOBAL]
+
+        movsxd      rcx,            dword ptr arg(5) ; blk_stride
+        psraw       xmm4,           3
+
+        movq        xmm0,           [rax]
+        movq        xmm1,           [rax+rcx]
+        movq        xmm2,           [rax+2*rcx]
+        lea         rcx,            [3*rcx]
+        movq        xmm3,           [rax+rcx]
+
+        punpcklbw   xmm0,           xmm7
+        punpcklbw   xmm1,           xmm7
+        punpcklbw   xmm2,           xmm7
+        punpcklbw   xmm3,           xmm7
+
+        mov         rax,            arg(3) ; dst
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; Add to predict buffer
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm4
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm4
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; store blocks back out
+        movq        [rax],          xmm0
+        movq        [rax + rdx],    xmm1
+
+        lea         rax,            [rax + 2*rdx]
+
+        movq        [rax],          xmm2
+        movq        [rax + rdx],    xmm3
+
+    ; begin epilog
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(idct_dequant_full_2x_sse2)
+sym(idct_dequant_full_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+        movsxd      rcx,            dword ptr arg(5) ; blk_stride
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        mov         rdx,            arg(1)  ; dequant
+
+    ; note the transpose of xmm1 and xmm2, necessary for shuffle
+    ;   to spit out sensicle data
+        movdqa      xmm0,           [rax]
+        movdqa      xmm2,           [rax+16]
+        movdqa      xmm1,           [rax+32]
+        movdqa      xmm3,           [rax+48]
+
+    ; Clear out coeffs
+        movdqa      [rax],          xmm7
+        movdqa      [rax+16],       xmm7
+        movdqa      [rax+32],       xmm7
+        movdqa      [rax+48],       xmm7
+
+    ; dequantize qcoeff buffer
+        pmullw      xmm0,           [rdx]
+        pmullw      xmm2,           [rdx+16]
+        pmullw      xmm1,           [rdx]
+        pmullw      xmm3,           [rdx+16]
+
+    ; repack so block 0 row x and block 1 row x are together
+        movdqa      xmm4,           xmm0
+        punpckldq   xmm0,           xmm1
+        punpckhdq   xmm4,           xmm1
+
+        pshufd      xmm0,           xmm0,       11011000b
+        pshufd      xmm1,           xmm4,       11011000b
+
+        movdqa      xmm4,           xmm2
+        punpckldq   xmm2,           xmm3
+        punpckhdq   xmm4,           xmm3
+
+        pshufd      xmm2,           xmm2,       11011000b
+        pshufd      xmm3,           xmm4,       11011000b
+
+    ; first pass
+        psubw       xmm0,           xmm2        ; b1 = 0-2
+        paddw       xmm2,           xmm2        ;
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0        ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5        ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5        ; d1
+        movdqa      xmm6,           xmm2        ; a1
+
+        movdqa      xmm4,           xmm0        ; b1
+        paddw       xmm2,           xmm3        ;0
+
+        paddw       xmm4,           xmm7        ;1
+        psubw       xmm0,           xmm7        ;2
+
+        psubw       xmm6,           xmm3        ;3
+
+    ; transpose for the second pass
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+    ; second pass
+        psubw       xmm0,           xmm2            ; b1 = 0-2
+        paddw       xmm2,           xmm2
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0            ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5            ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5            ; d1
+        paddw       xmm0,           [fours GLOBAL]
+
+        paddw       xmm2,           [fours GLOBAL]
+        movdqa      xmm6,           xmm2            ; a1
+
+        movdqa      xmm4,           xmm0            ; b1
+        paddw       xmm2,           xmm3            ;0
+
+        paddw       xmm4,           xmm7            ;1
+        psubw       xmm0,           xmm7            ;2
+
+        psubw       xmm6,           xmm3            ;3
+        psraw       xmm2,           3
+
+        psraw       xmm0,           3
+        psraw       xmm4,           3
+
+        psraw       xmm6,           3
+
+    ; transpose to save
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+        pxor        xmm7,           xmm7
+
+    ; Load up predict blocks
+        movq        xmm4,           [rsi]
+        movq        xmm5,           [rsi+rcx]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm5
+
+        movq        xmm4,           [rsi+2*rcx]
+        lea         rcx,            [3*rcx]
+        movq        xmm5,           [rsi+rcx]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm5
+
+.finish:
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void idct_dequant_dc_0_2x_sse2
+; (
+;   short *qcoeff       - 0
+;   short *dequant      - 1
+;   unsigned char *pre  - 2
+;   unsigned char *dst  - 3
+;   int dst_stride      - 4
+;   short *dc           - 5
+; )
+global sym(idct_dequant_dc_0_2x_sse2)
+sym(idct_dequant_dc_0_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+        mov         rdx,            arg(5) ; dc
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+    ; load up 2 dc words here == 2*16 = doubleword
+        movd        xmm4,           [rdx]
+
+    ; Load up predict blocks
+        movq        xmm0,           [rsi]
+        movq        xmm1,           [rsi+16]
+        movq        xmm2,           [rsi+32]
+        movq        xmm3,           [rsi+48]
+
+    ; Duplicate and expand dc across
+        punpcklwd   xmm4,           xmm4
+        punpckldq   xmm4,           xmm4
+
+    ; Rounding to dequant and downshift
+        paddw       xmm4,           [fours GLOBAL]
+        psraw       xmm4,           3
+
+    ; Predict buffer needs to be expanded from bytes to words
+        punpcklbw   xmm0,           xmm7
+        punpcklbw   xmm1,           xmm7
+        punpcklbw   xmm2,           xmm7
+        punpcklbw   xmm3,           xmm7
+
+    ; Add to predict buffer
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm4
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm4
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(idct_dequant_dc_full_2x_sse2)
+sym(idct_dequant_dc_full_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        mov         rdx,            arg(1)  ; dequant
+
+    ; note the transpose of xmm1 and xmm2, necessary for shuffle
+    ;   to spit out sensicle data
+        movdqa      xmm0,           [rax]
+        movdqa      xmm2,           [rax+16]
+        movdqa      xmm1,           [rax+32]
+        movdqa      xmm3,           [rax+48]
+
+    ; Clear out coeffs
+        movdqa      [rax],          xmm7
+        movdqa      [rax+16],       xmm7
+        movdqa      [rax+32],       xmm7
+        movdqa      [rax+48],       xmm7
+
+    ; dequantize qcoeff buffer
+        pmullw      xmm0,           [rdx]
+        pmullw      xmm2,           [rdx+16]
+        pmullw      xmm1,           [rdx]
+        pmullw      xmm3,           [rdx+16]
+
+    ; DC component
+        mov         rdx,            arg(5)
+
+    ; repack so block 0 row x and block 1 row x are together
+        movdqa      xmm4,           xmm0
+        punpckldq   xmm0,           xmm1
+        punpckhdq   xmm4,           xmm1
+
+        pshufd      xmm0,           xmm0,       11011000b
+        pshufd      xmm1,           xmm4,       11011000b
+
+        movdqa      xmm4,           xmm2
+        punpckldq   xmm2,           xmm3
+        punpckhdq   xmm4,           xmm3
+
+        pshufd      xmm2,           xmm2,       11011000b
+        pshufd      xmm3,           xmm4,       11011000b
+
+    ; insert DC component
+        pinsrw      xmm0,           [rdx],      0
+        pinsrw      xmm0,           [rdx+2],    4
+
+    ; first pass
+        psubw       xmm0,           xmm2        ; b1 = 0-2
+        paddw       xmm2,           xmm2        ;
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0        ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5        ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5        ; d1
+        movdqa      xmm6,           xmm2        ; a1
+
+        movdqa      xmm4,           xmm0        ; b1
+        paddw       xmm2,           xmm3        ;0
+
+        paddw       xmm4,           xmm7        ;1
+        psubw       xmm0,           xmm7        ;2
+
+        psubw       xmm6,           xmm3        ;3
+
+    ; transpose for the second pass
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+    ; second pass
+        psubw       xmm0,           xmm2            ; b1 = 0-2
+        paddw       xmm2,           xmm2
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0            ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5            ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5            ; d1
+        paddw       xmm0,           [fours GLOBAL]
+
+        paddw       xmm2,           [fours GLOBAL]
+        movdqa      xmm6,           xmm2            ; a1
+
+        movdqa      xmm4,           xmm0            ; b1
+        paddw       xmm2,           xmm3            ;0
+
+        paddw       xmm4,           xmm7            ;1
+        psubw       xmm0,           xmm7            ;2
+
+        psubw       xmm6,           xmm3            ;3
+        psraw       xmm2,           3
+
+        psraw       xmm0,           3
+        psraw       xmm4,           3
+
+        psraw       xmm6,           3
+
+    ; transpose to save
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+        pxor        xmm7,           xmm7
+
+    ; Load up predict blocks
+        movq        xmm4,           [rsi]
+        movq        xmm5,           [rsi+16]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm5
+
+        movq        xmm4,           [rsi+32]
+        movq        xmm5,           [rsi+48]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm5
+
+.finish:
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+fours:
+    times 8 dw 0x0004
+align 16
+x_s1sqr2:
+    times 8 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+    times 8 dw 0x4E7B
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 562e590..3f0671c 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 96943df..83c97df 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -16,6 +17,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 2
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -100,6 +102,7 @@
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 6e4d2b6..0b39e62 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 5275dfa..57276b6 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1,16 +1,293 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
 %include "vpx_ports/x86_abi_support.asm"
 
 
+%macro LFH_FILTER_MASK 1
+%if %1
+        movdqa      xmm2,                   [rdi+2*rax]       ; q3
+        movdqa      xmm1,                   [rsi+2*rax]       ; q2
+%else
+        movq        xmm0,                   [rsi + rcx*2]     ; q3
+        movq        xmm2,                   [rdi + rcx*2]
+        pslldq      xmm2,                   8
+        por         xmm2,                   xmm0
+        movq        xmm1,                   [rsi + rcx]       ; q2
+        movq        xmm3,                   [rdi + rcx]
+        pslldq      xmm3,                   8
+        por         xmm1,                   xmm3
+        movdqa      XMMWORD PTR [rsp],      xmm1              ; store q2
+%endif
+
+        movdqa      xmm6,                   xmm1              ; q2
+        psubusb     xmm1,                   xmm2              ; q2-=q3
+        psubusb     xmm2,                   xmm6              ; q3-=q2
+        por         xmm1,                   xmm2              ; abs(q3-q2)
+
+        psubusb     xmm1,                   xmm7
+
+%if %1
+        movdqa      xmm4,                   [rsi+rax]         ; q1
+%else
+        movq        xmm0,                   [rsi]             ; q1
+        movq        xmm4,                   [rdi]
+        pslldq      xmm4,                   8
+        por         xmm4,                   xmm0
+        movdqa      XMMWORD PTR [rsp + 16], xmm4              ; store q1
+%endif
+
+        movdqa      xmm3,                   xmm4              ; q1
+        psubusb     xmm4,                   xmm6              ; q1-=q2
+        psubusb     xmm6,                   xmm3              ; q2-=q1
+        por         xmm4,                   xmm6              ; abs(q2-q1)
+        psubusb     xmm4,                   xmm7
+
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm4,                   [rsi]             ; q0
+%else
+        movq        xmm4,                   [rsi + rax]       ; q0
+        movq        xmm0,                   [rdi + rax]
+        pslldq      xmm0,                   8
+        por         xmm4,                   xmm0
+%endif
+
+        movdqa      xmm0,                   xmm4              ; q0
+        psubusb     xmm4,                   xmm3              ; q0-=q1
+        psubusb     xmm3,                   xmm0              ; q1-=q0
+        por         xmm4,                   xmm3              ; abs(q0-q1)
+        movdqa      t0,                     xmm4              ; save to t0
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        neg         rax                     ; negate pitch to deal with above border
+
+        movdqa      xmm2,                   [rsi+4*rax]       ; p3
+        movdqa      xmm4,                   [rdi+4*rax]       ; p2
+%else
+        lea         rsi,                    [rsi + rax*4]
+        lea         rdi,                    [rdi + rax*4]
+
+        movq        xmm2,                   [rsi + rax]       ; p3
+        movq        xmm3,                   [rdi + rax]
+        pslldq      xmm3,                   8
+        por         xmm2,                   xmm3
+        movq        xmm4,                   [rsi]             ; p2
+        movq        xmm5,                   [rdi]
+        pslldq      xmm5,                   8
+        por         xmm4,                   xmm5
+        movdqa      XMMWORD PTR [rsp + 32], xmm4              ; store p2
+%endif
+
+        movdqa      xmm5,                   xmm4              ; p2
+        psubusb     xmm4,                   xmm2              ; p2-=p3
+        psubusb     xmm2,                   xmm5              ; p3-=p2
+        por         xmm4,                   xmm2              ; abs(p3 - p2)
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm4,                   [rsi+2*rax]       ; p1
+%else
+        movq        xmm4,                   [rsi + rcx]       ; p1
+        movq        xmm3,                   [rdi + rcx]
+        pslldq      xmm3,                   8
+        por         xmm4,                   xmm3
+        movdqa      XMMWORD PTR [rsp + 48], xmm4              ; store p1
+%endif
+
+        movdqa      xmm3,                   xmm4              ; p1
+        psubusb     xmm4,                   xmm5              ; p1-=p2
+        psubusb     xmm5,                   xmm3              ; p2-=p1
+        por         xmm4,                   xmm5              ; abs(p2 - p1)
+        psubusb     xmm4,                   xmm7
+
+        por         xmm1,                   xmm4
+        movdqa      xmm2,                   xmm3              ; p1
+
+%if %1
+        movdqa      xmm4,                   [rsi+rax]         ; p0
+%else
+        movq        xmm4,                   [rsi + rcx*2]     ; p0
+        movq        xmm5,                   [rdi + rcx*2]
+        pslldq      xmm5,                   8
+        por         xmm4,                   xmm5
+%endif
+
+        movdqa      xmm5,                   xmm4              ; p0
+        psubusb     xmm4,                   xmm3              ; p0-=p1
+        psubusb     xmm3,                   xmm5              ; p1-=p0
+        por         xmm4,                   xmm3              ; abs(p1 - p0)
+        movdqa        t1,                   xmm4              ; save to t1
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm3,                   [rdi]             ; q1
+%else
+        movdqa      xmm3,                   q1                ; q1
+%endif
+
+        movdqa      xmm4,                   xmm3              ; q1
+        psubusb     xmm3,                   xmm2              ; q1-=p1
+        psubusb     xmm2,                   xmm4              ; p1-=q1
+        por         xmm2,                   xmm3              ; abs(p1-q1)
+        pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
+        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
+
+        movdqa      xmm6,                   xmm5              ; p0
+        movdqa      xmm3,                   xmm0              ; q0
+        psubusb     xmm5,                   xmm3              ; p0-=q0
+        psubusb     xmm3,                   xmm6              ; q0-=p0
+        por         xmm5,                   xmm3              ; abs(p0 - q0)
+        paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2
+        paddusb     xmm5,                   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+        mov         rdx,                    arg(2)            ; get flimit
+        movdqa      xmm2,                   XMMWORD PTR [rdx]
+        paddb       xmm2,                   xmm2              ; flimit*2 (less than 255)
+        paddb       xmm7,                   xmm2              ; flimit * 2 + limit (less than 255)
+
+        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        por         xmm1,                   xmm5
+        pxor        xmm5,                   xmm5
+        pcmpeqb     xmm1,                   xmm5              ; mask mm1
+%endmacro
+
+%macro LFH_HEV_MASK 0
+        mov         rdx,                    arg(4)            ; get thresh
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        movdqa      xmm4,                   t0                ; get abs (q1 - q0)
+        psubusb     xmm4,                   xmm7
+        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
+        psubusb     xmm3,                   xmm7
+        paddb       xmm4,                   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+        pcmpeqb     xmm4,                   xmm5
+
+        pcmpeqb     xmm5,                   xmm5
+        pxor        xmm4,                   xmm5
+%endmacro
+
+%macro BH_FILTER 1
+%if %1
+        movdqa      xmm2,                   [rsi+2*rax]       ; p1
+        movdqa      xmm7,                   [rdi]             ; q1
+%else
+        movdqa      xmm2,                   p1                ; p1
+        movdqa      xmm7,                   q1                ; q1
+%endif
+
+        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
+        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
+
+        psubsb      xmm2,                   xmm7              ; p1 - q1
+        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
+
+        pand        xmm2,                   xmm4              ; high var mask (hvm)(p1 - q1)
+        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
+
+        movdqa      xmm3,                   xmm0              ; q0
+        psubsb      xmm0,                   xmm6              ; q0 - p0
+        paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + hvm(p1 - q1)
+        pand        xmm1,                   xmm2              ; mask filter values we don't care about
+        movdqa      xmm2,                   xmm1
+        paddsb      xmm1,                   [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+        paddsb      xmm2,                   [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+
+        punpckhbw   xmm5,                   xmm2              ; axbxcxdx
+        punpcklbw   xmm2,                   xmm2              ; exfxgxhx
+
+        psraw       xmm5,                   11                ; sign extended shift right by 3
+        psraw       xmm2,                   11                ; sign extended shift right by 3
+        packsswb    xmm2,                   xmm5              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+        punpcklbw   xmm0,                   xmm1              ; exfxgxhx
+        punpckhbw   xmm1,                   xmm1              ; axbxcxdx
+
+        psraw       xmm0,                   11                ; sign extended shift right by 3
+        psraw       xmm1,                   11                ; sign extended shift right by 3
+
+        movdqa      xmm5,                   xmm0              ; save results
+        packsswb    xmm0,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+
+        paddsw      xmm5,                   [ones GLOBAL]
+        paddsw      xmm1,                   [ones GLOBAL]
+
+        psraw       xmm5,                   1                 ; partial shifted one more time for 2nd tap
+        psraw       xmm1,                   1                 ; partial shifted one more time for 2nd tap
+
+        packsswb    xmm5,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+        pandn       xmm4,                   xmm5              ; high edge variance additive
+%endmacro
+
+%macro BH_WRITEBACK 1
+        paddsb      xmm6,                   xmm2              ; p0+= p0 add
+        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi+rax],              xmm6              ; write back
+%else
+        lea         rsi,                    [rsi + rcx*2]
+        lea         rdi,                    [rdi + rcx*2]
+        movq        MMWORD PTR [rsi],       xmm6              ; p0
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi],       xmm6
+%endif
+
+%if %1
+        movdqa      xmm6,                   [rsi+2*rax]       ; p1
+%else
+        movdqa      xmm6,                   p1                ; p1
+%endif
+        pxor        xmm6,                   [t80 GLOBAL]      ; reoffset
+        paddsb      xmm6,                   xmm4               ; p1+= p1 add
+        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi+2*rax],            xmm6              ; write back
+%else
+        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi + rax], xmm6
+%endif
+
+        psubsb      xmm3,                   xmm0              ; q0-= q0 add
+        pxor        xmm3,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi],                  xmm3              ; write back
+%else
+        movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx], xmm3
+%endif
+
+        psubsb      xmm7,                   xmm4              ; q1-= q1 add
+        pxor        xmm7,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rdi],                  xmm7              ; write back
+%else
+        movq        MMWORD PTR [rsi + rcx*2],xmm7             ; q1
+        psrldq      xmm7,                   8
+        movq        MMWORD PTR [rdi + rcx*2],xmm7
+%endif
+%endmacro
+
+
 ;void vp8_loop_filter_horizontal_edge_sse2
 ;(
 ;    unsigned char *src_ptr,
@@ -25,185 +302,35 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
     ; end prolog
 
     ALIGN_STACK 16, rax
-    sub         rsp, 32                         ; reserve 32 bytes
+    sub         rsp, 32     ; reserve 32 bytes
     %define t0 [rsp + 0]    ;__declspec(align(16)) char t0[16];
     %define t1 [rsp + 16]   ;__declspec(align(16)) char t1[16];
 
-        mov         rsi, arg(0) ;src_ptr
-        movsxd      rax, dword ptr arg(1) ;src_pixel_step     ; destination pitch?
+        mov         rsi,                    arg(0)           ;src_ptr
+        movsxd      rax,                    dword ptr arg(1) ;src_pixel_step
 
-        mov         rdx,    arg(3) ;limit
-        movdqa      xmm7,   XMMWORD PTR [rdx]
-        mov         rdi,    rsi           ; rdi points to row +1 for indirect addressing
-        add         rdi,    rax
+        mov         rdx,                    arg(3)           ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rdi,                    [rsi+rax]        ; rdi points to row +1 for indirect addressing
 
         ; calculate breakout conditions
-        movdqu      xmm2,   [rdi+2*rax]      ; q3
-        movdqu      xmm1,   [rsi+2*rax]      ; q2
-        movdqa      xmm6,   xmm1              ; q2
-        psubusb     xmm1,   xmm2              ; q2-=q3
-        psubusb     xmm2,   xmm6              ; q3-=q2
-        por         xmm1,   xmm2              ; abs(q3-q2)
-        psubusb     xmm1,   xmm7              ;
-
-
-        movdqu      xmm4,   [rsi+rax]         ; q1
-        movdqa      xmm3,   xmm4              ; q1
-        psubusb     xmm4,   xmm6              ; q1-=q2
-        psubusb     xmm6,   xmm3              ; q2-=q1
-        por         xmm4,   xmm6              ; abs(q2-q1)
-
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        movdqu      xmm4,   [rsi]             ; q0
-        movdqa      xmm0,   xmm4              ; q0
-        psubusb     xmm4,   xmm3              ; q0-=q1
-        psubusb     xmm3,   xmm0              ; q1-=q0
-        por         xmm4,   xmm3              ; abs(q0-q1)
-        movdqa        t0,       xmm4                  ; save to t0
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        neg         rax                   ; negate pitch to deal with above border
-        movdqu      xmm2,   [rsi+4*rax]      ; p3
-        movdqu      xmm4,   [rdi+4*rax]      ; p2
-        movdqa      xmm5,   xmm4              ; p2
-        psubusb     xmm4,   xmm2              ; p2-=p3
-        psubusb     xmm2,   xmm5              ; p3-=p2
-        por         xmm4,   xmm2              ; abs(p3 - p2)
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-
-        movdqu      xmm4,   [rsi+2*rax]      ; p1
-        movdqa      xmm3,   xmm4              ; p1
-        psubusb     xmm4,   xmm5              ; p1-=p2
-        psubusb     xmm5,   xmm3              ; p2-=p1
-        por         xmm4,   xmm5              ; abs(p2 - p1)
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        movdqa      xmm2,   xmm3              ; p1
-
-        movdqu      xmm4,   [rsi+rax]         ; p0
-        movdqa      xmm5,   xmm4              ; p0
-        psubusb     xmm4,   xmm3              ; p0-=p1
-        psubusb     xmm3,   xmm5              ; p1-=p0
-        por         xmm4,   xmm3              ; abs(p1 - p0)
-        movdqa      t1,     xmm4                  ; save to t1
-        psubusb     xmm4,   xmm7
-        por         xmm1,    xmm4
-
-        movdqu      xmm3,   [rdi]             ; q1
-        movdqa      xmm4,   xmm3              ; q1
-        psubusb     xmm3,   xmm2              ; q1-=p1
-        psubusb     xmm2,   xmm4              ; p1-=q1
-        por         xmm2,   xmm3              ; abs(p1-q1)
-        pand        xmm2,   [tfe GLOBAL]      ; set lsb of each byte to zero
-        psrlw       xmm2,   1                 ; abs(p1-q1)/2
-
-        movdqa      xmm6,   xmm5              ; p0
-        movdqu      xmm3,   [rsi]             ; q0
-        psubusb     xmm5,   xmm3              ; p0-=q0
-        psubusb     xmm3,   xmm6              ; q0-=p0
-        por         xmm5,   xmm3              ; abs(p0 - q0)
-        paddusb     xmm5,   xmm5              ; abs(p0-q0)*2
-        paddusb     xmm5,   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        mov         rdx,    arg(2) ;flimit            ; get flimit
-        movdqa      xmm2,   [rdx]             ;
-
-        paddb       xmm2,   xmm2              ; flimit*2 (less than 255)
-        paddb       xmm7,   xmm2              ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm5,    xmm7             ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,    xmm5
-        pxor        xmm5,    xmm5
-        pcmpeqb     xmm1,    xmm5             ; mask mm1
-
+        LFH_FILTER_MASK 1
 
         ; calculate high edge variance
-        mov         rdx,    arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,   [rdx]             ;
-        movdqa      xmm4,   t0                ; get abs (q1 - q0)
-        psubusb     xmm4,   xmm7
-        movdqa      xmm3,   t1                ; get abs (p1 - p0)
-        psubusb     xmm3,   xmm7
-        paddb       xmm4,   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,        xmm5
-        pcmpeqb     xmm5,        xmm5
-        pxor        xmm4,        xmm5
-
+        LFH_HEV_MASK
 
         ; start work on filters
-        movdqu      xmm2, [rsi+2*rax]     ; p1
-        movdqu      xmm7, [rdi]           ; q1
-        pxor        xmm2, [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7, [t80 GLOBAL]    ; q1 offset to convert to signed values
-        psubsb      xmm2, xmm7            ; p1 - q1
-        pand        xmm2, xmm4            ; high var mask (hvm)(p1 - q1)
-        pxor        xmm6, [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0, [t80 GLOBAL]    ; offset to convert to signed values
-        movdqa      xmm3, xmm0            ; q0
-        psubsb      xmm0, xmm6            ; q0 - p0
-        paddsb      xmm2, xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2, xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2, xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
-        pand        xmm1, xmm2            ; mask filter values we don't care about
-        movdqa      xmm2, xmm1
-        paddsb      xmm1, [t4 GLOBAL]         ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-        paddsb      xmm2, [t3 GLOBAL]         ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-
-        pxor        xmm0, xmm0           ;
-        pxor        xmm5, xmm5
-        punpcklbw   xmm0, xmm2          ;
-        punpckhbw   xmm5, xmm2          ;
-        psraw       xmm0, 11                ;
-        psraw       xmm5, 11
-        packsswb    xmm0, xmm5
-        movdqa      xmm2, xmm0          ;  (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
-        pxor        xmm0, xmm0            ; 0
-        movdqa      xmm5, xmm1            ; abcdefgh
-        punpcklbw   xmm0, xmm1            ; e0f0g0h0
-        psraw       xmm0, 11                  ; sign extended shift right by 3
-        pxor        xmm1, xmm1            ; 0
-        punpckhbw   xmm1, xmm5            ; a0b0c0d0
-        psraw       xmm1, 11                  ; sign extended shift right by 3
-        movdqa      xmm5, xmm0              ; save results
-
-        packsswb    xmm0, xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5, [ones GLOBAL]
-        paddsw      xmm1, [ones GLOBAL]
-        psraw       xmm5, 1               ; partial shifted one more time for 2nd tap
-        psraw       xmm1, 1               ; partial shifted one more time for 2nd tap
-        packsswb    xmm5, xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-        pandn       xmm4, xmm5            ; high edge variance additive
-
-        paddsb      xmm6, xmm2            ; p0+= p0 add
-        pxor        xmm6, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi+rax], xmm6       ; write back
-
-        movdqu      xmm6, [rsi+2*rax]     ; p1
-        pxor        xmm6, [t80 GLOBAL]    ; reoffset
-        paddsb      xmm6, xmm4            ; p1+= p1 add
-        pxor        xmm6, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi+2*rax], xmm6     ; write back
-
-        psubsb      xmm3, xmm0            ; q0-= q0 add
-        pxor        xmm3, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi], xmm3           ; write back
-
-        psubsb      xmm7, xmm4            ; q1-= q1 add
-        pxor        xmm7, [t80 GLOBAL]    ; unoffset
-        movdqu      [rdi], xmm7           ; write back
+        BH_FILTER 1
+        ; write back the result
+        BH_WRITEBACK 1
 
     add rsp, 32
     pop rsp
@@ -211,12 +338,13 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
 
 
-;void vp8_loop_filter_vertical_edge_sse2
+;void vp8_loop_filter_horizontal_edge_uv_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    int            src_pixel_step,
@@ -225,425 +353,47 @@
 ;    const char    *thresh,
 ;    int            count
 ;)
-global sym(vp8_loop_filter_vertical_edge_sse2)
-sym(vp8_loop_filter_vertical_edge_sse2):
+global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
+sym(vp8_loop_filter_horizontal_edge_uv_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
     ; end prolog
 
     ALIGN_STACK 16, rax
-    sub          rsp, 96      ; reserve 96 bytes
-    %define t0   [rsp + 0]    ;__declspec(align(16)) char t0[16];
-    %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
-    %define srct [rsp + 32]   ;__declspec(align(16)) char srct[64];
+    sub         rsp, 96       ; reserve 96 bytes
+    %define q2  [rsp + 0]     ;__declspec(align(16)) char q2[16];
+    %define q1  [rsp + 16]    ;__declspec(align(16)) char q1[16];
+    %define p2  [rsp + 32]    ;__declspec(align(16)) char p2[16];
+    %define p1  [rsp + 48]    ;__declspec(align(16)) char p1[16];
+    %define t0  [rsp + 64]    ;__declspec(align(16)) char t0[16];
+    %define t1  [rsp + 80]    ;__declspec(align(16)) char t1[16];
 
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rax,        dword ptr arg(1) ;src_pixel_step     ; destination pitch?
+        mov         rsi,                    arg(0)             ; u
+        mov         rdi,                    arg(5)             ; v
+        movsxd      rax,                    dword ptr arg(1)   ; src_pixel_step
+        mov         rcx,                    rax
+        neg         rax                     ; negate pitch to deal with above border
 
-        lea         rsi,        [rsi + rax*4 - 4]
-        mov         rdi,        rsi           ; rdi points to row +1 for indirect addressing
+        mov         rdx,                    arg(3)             ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
 
-        add         rdi,        rax
-        lea         rcx,        [rdi + rax *8]
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
 
-        ;transpose
-        movq        xmm7,       QWORD PTR [rsi+2*rax]                 ; 67 66 65 64 63 62 61 60
-        movq        xmm6,       QWORD PTR [rdi+2*rax]                 ; 77 76 75 74 73 72 71 70
-
-        punpcklbw   xmm7,       xmm6                        ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
-        movq        xmm5,       QWORD PTR [rsi]                       ; 47 46 45 44 43 42 41 40
-
-        movq        xmm4,       QWORD PTR [rsi+rax]                   ; 57 56 55 54 53 52 51 50
-        punpcklbw   xmm5,       xmm4                        ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-
-        movdqa      xmm3,       xmm5                        ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-        punpckhwd   xmm5,       xmm7                        ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-
-        lea         rsi,        [rsi+ rax*8]
-
-        punpcklwd   xmm3,       xmm7                        ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
-        movq        xmm6,       QWORD PTR [rsi + 2*rax]               ; e7 e6 e5 e4 e3 e2 e1 e0
-
-        movq        xmm7,       QWORD PTR [rcx + 2*rax]               ; f7 f6 f5 f4 f3 f2 f1 f0
-        punpcklbw   xmm6,       xmm7                        ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-
-        movq        xmm4,       QWORD PTR [rsi]                       ; c7 c6 c5 c4 c3 c2 c1 c0
-        movq        xmm7,       QWORD PTR [rsi + rax]                 ; d7 d6 d5 d4 d3 d2 d1 d0
-
-        punpcklbw   xmm4,       xmm7                        ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
-        movdqa      xmm7,       xmm4                        ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
-
-        punpckhwd   xmm7,       xmm6                        ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
-        punpcklwd   xmm4,       xmm6                        ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-
-        ; xmm3 xmm4, xmm5 xmm7   in use
-        neg         rax
-
-        lea         rsi,        [rsi+rax*8]
-        movq        xmm6,       QWORD PTR [rsi+rax*2]                 ; 27 26 25 24 23 22 21 20
-
-        movq        xmm1,       QWORD PTR [rsi+rax  ]                 ; 37 36 35 34 33 32 31 30
-        punpcklbw   xmm6,       xmm1                        ; 37 27 36 26 35 25 34 24 33 23 32 22 31 21 30 20
-
-        movq        xmm2,       QWORD PTR [rsi+rax*4]                 ; 07 06 05 04 03 02 01 00
-        movq        xmm1,       QWORD PTR [rdi+rax*4]                 ; 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm2,       xmm1                        ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-        movdqa      xmm0,       xmm2
-
-        punpckhwd   xmm2,       xmm6                        ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-        punpcklwd   xmm0,       xmm6                        ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-
-        movdqa      xmm6,       xmm2
-        punpckldq   xmm2,       xmm5                        ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-
-        punpckhdq   xmm6,       xmm5                        ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        ;xmm0 xmm2 xmm3 xmm4, xmm6, xmm7
-
-        movdqa      xmm5,       xmm0                        ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-        punpckhdq   xmm5,       xmm3                        ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
-        punpckldq   xmm0,       xmm3                        ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-        lea         rsi,        [rcx+rax]
-        ; xmm1, xmm3 free
-        movq        xmm1,       QWORD PTR [rsi+rax*2]                 ; a7 a6 a5 a4 a3 a2 a1 a0
-        movq        xmm3,       QWORD PTR [rsi+rax]                   ; b7 b6 b5 b4 b3 b2 b1 b0
-
-        punpcklbw   xmm1,       xmm3                        ;
-        lea         rdx,        srct                        ;
-
-        movdqa      [rdx+16],   xmm1                        ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-        movq        xmm3,       QWORD PTR [rsi+rax*4]                 ; 87 86 85 84 83 82 81 80
-
-        movq        xmm1,       QWORD PTR [rcx+rax*4]
-        punpcklbw   xmm3,       xmm1                        ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        movdqa      [rdx],      xmm3                        ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        punpckhwd   xmm3,       [rdx+16]                    ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-        movdqa      xmm1,       xmm3                        ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        punpckhdq   xmm1,       xmm7                        ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
-        punpckldq   xmm3,       xmm7                        ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-
-        movdqa      xmm7,       xmm2                        ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-        punpcklqdq  xmm7,       xmm3                        ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-
-        punpckhqdq  xmm2,       xmm3                        ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-        movdqa      [rdx+32],   xmm7                        ; save 4s
-
-        movdqa      [rdx+48],   xmm2                        ; save 5s
-        movdqa      xmm7,       xmm6                        ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-
-        punpckhqdq  xmm7,       xmm1                        ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 = q3
-        punpcklqdq  xmm6,       xmm1                        ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 = q2
-
-        ; free 1, 3   xmm7-7s xmm6-6s, xmm2-5s
-        movq        xmm1,       QWORD PTR [rdx]                       ; 93 83 92 82 91 81 90 80
-        movq        xmm3,       QWORD PTR [rdx+16]                    ; b3 a3 b2 a2 b1 a1 b0 a0
-
-        punpcklwd   xmm1,       xmm3                        ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-        movdqa      xmm3,       xmm1                        ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-
-        punpckhdq   xmm3,       xmm4                        ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-        punpckldq   xmm1,       xmm4                        ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-
-        movdqa      xmm4,       xmm5                        ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-        punpcklqdq  xmm5,       xmm3                        ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        punpckhqdq  xmm4,       xmm3                        ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        movdqa      [rdx],      xmm5                        ; save 2s
-
-        movdqa      [rdx+16],   xmm4                        ; save 3s
-
-        movdqa      xmm3,       xmm6                        ;
-        psubusb     xmm3,       xmm7                        ; q3 - q2
-
-        psubusb     xmm7,       xmm6                        ; q2 - q3
-        por         xmm7,       xmm3                        ; abs(q3-q2)
-
-        movdqa      xmm3,       xmm2                        ; q1
-        psubusb     xmm3,       xmm6                        ; q1 - q2
-
-        psubusb     xmm6,       xmm2                        ; q2 - q1
-        por         xmm6,       xmm3                        ; abs(q2-q1)
-
-
-        movdqa      xmm3,       xmm0                        ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-        punpcklqdq  xmm0,       xmm1                        ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
-        punpckhqdq  xmm3,       xmm1                        ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        movdqa      xmm1,       xmm3
-
-        psubusb     xmm3,       xmm0                        ; p2-p3
-        psubusb     xmm0,       xmm1                        ; p3-p2
-
-        por         xmm0,       xmm3                        ; abs(p3-p2)
-        movdqa      xmm3,       xmm5                        ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        psubusb     xmm3,       xmm1                        ; p1-p2
-        psubusb     xmm1,       xmm5                        ; p2-p1
-
-        por         xmm1,       xmm3                        ; abs(p1-p2)
-        mov         rdx,        arg(3) ;limit
-
-        movdqa      xmm3,       [rdx]                       ; limit
-
-        psubusb     xmm7,       xmm3
-        psubusb     xmm0,       xmm3
-
-        psubusb     xmm1,       xmm3
-        psubusb     xmm6,       xmm3
-
-        por         xmm7,       xmm6
-        por         xmm0,       xmm1
-
-        por         xmm0,       xmm7                         ;   abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
-        movdqa      xmm1,       xmm5                         ; p1
-
-        movdqa      xmm7,       xmm4                        ; xmm4 xmm7 = p0
-
-        psubusb     xmm7,       xmm5                        ; p0 - p1
-        psubusb     xmm5,       xmm4                        ; p1 - p0
-
-        por         xmm5,       xmm7                        ; abs(p1-p0)
-        movdqa        t0,       xmm5                        ; save abs(p1-p0)
-
-        lea         rdx,        srct
-        psubusb     xmm5,       xmm3
-
-        por         xmm0,       xmm5                        ; xmm0=mask
-        movdqa      xmm5,       [rdx+32]                    ; xmm5=q0
-
-        movdqa      xmm7,       [rdx+48]                    ; xmm7=q1
-        movdqa      xmm6,       xmm5                        ; mm6=q0
-
-        movdqa      xmm2,       xmm7                        ; q1
-
-        psubusb     xmm5,       xmm7                        ; q0-q1
-        psubusb     xmm7,       xmm6                        ; q1-q0
-
-        por         xmm7,       xmm5                        ; abs(q1-q0)
-        movdqa        t1,       xmm7                        ; save abs(q1-q0)
-
-        psubusb     xmm7,       xmm3
-        por         xmm0,       xmm7                        ; mask
-
-        movdqa      xmm5,       xmm2                        ; q1
-        psubusb     xmm5,       xmm1                        ; q1-=p1
-        psubusb     xmm1,       xmm2                        ; p1-=q1
-        por         xmm5,       xmm1                        ; abs(p1-q1)
-        pand        xmm5,       [tfe GLOBAL]                ; set lsb of each byte to zero
-        psrlw       xmm5,       1                           ; abs(p1-q1)/2
-
-        mov         rdx,        arg(2) ;flimit                      ;
-        movdqa        xmm2,       [rdx]                       ;flimit  xmm2
-
-        movdqa      xmm1,       xmm4                        ; xmm1=xmm4=p0
-
-        movdqa      xmm7,       xmm6                        ; xmm7=xmm6=q0
-        psubusb     xmm1,       xmm7                        ; p0-q0
-
-        psubusb     xmm7,       xmm4                        ; q0-p0
-        por         xmm1,       xmm7                        ; abs(q0-p0)
-        paddusb     xmm1,       xmm1                        ; abs(q0-p0)*2
-        paddusb     xmm1,       xmm5                        ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        paddb       xmm2,       xmm2                        ; flimit*2 (less than 255)
-        paddb       xmm3,       xmm2                        ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm1,       xmm3                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-
-        por         xmm1,       xmm0;                       ; mask
-
-        pxor        xmm0,       xmm0
-        pcmpeqb     xmm1,       xmm0
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 0
         ; calculate high edge variance
-        mov         rdx,        arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,       [rdx]
-
-        ;
-        movdqa      xmm4,       t0              ; get abs (q1 - q0)
-        psubusb     xmm4,       xmm7
-
-        movdqa      xmm3,       t1              ; get abs (p1 - p0)
-        psubusb     xmm3,       xmm7
-
-        por         xmm4,       xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,       xmm0
-
-        pcmpeqb     xmm0,       xmm0
-        pxor        xmm4,       xmm0
+        LFH_HEV_MASK
 
         ; start work on filters
-        lea         rdx,        srct
-
-        movdqa      xmm2,       [rdx]           ; p1
-        movdqa      xmm7,       [rdx+48]        ; q1
-
-        movdqa      xmm6,       [rdx+16]        ; p0
-        movdqa      xmm0,       [rdx+32]        ; q0
-
-        pxor        xmm2,       [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7,       [t80 GLOBAL]    ; q1 offset to convert to signed values
-
-        psubsb      xmm2,       xmm7            ; p1 - q1
-        pand        xmm2,       xmm4            ; high var mask (hvm)(p1 - q1)
-
-        pxor        xmm6,       [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0,       [t80 GLOBAL]    ; offset to convert to signed values
-
-        movdqa      xmm3,       xmm0            ; q0
-        psubsb      xmm0,       xmm6            ; q0 - p0
-
-        paddsb      xmm2,       xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2,       xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
-
-        paddsb      xmm2,       xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
-        pand        xmm1,       xmm2            ; mask filter values we don't care about
-
-        movdqa      xmm2,       xmm1
-        paddsb      xmm1,       [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-
-        paddsb      xmm2,       [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-        pxor        xmm0,       xmm0             ;
-
-        pxor        xmm5,       xmm5
-        punpcklbw   xmm0,       xmm2            ;
-
-        punpckhbw   xmm5,       xmm2            ;
-        psraw       xmm0,       11              ;
-
-        psraw       xmm5,       11
-        packsswb    xmm0,       xmm5
-
-        movdqa      xmm2,       xmm0            ;  (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
-        pxor        xmm0,       xmm0              ; 0
-        movdqa      xmm5,       xmm1              ; abcdefgh
-
-        punpcklbw   xmm0,       xmm1              ; e0f0g0h0
-        psraw       xmm0,       11                ; sign extended shift right by 3
-
-        pxor        xmm1,       xmm1              ; 0
-        punpckhbw   xmm1,       xmm5              ; a0b0c0d0
-
-        psraw       xmm1,       11                ; sign extended shift right by 3
-        movdqa      xmm5,       xmm0              ; save results
-
-        packsswb    xmm0,       xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5,       [ones GLOBAL]
-
-        paddsw      xmm1,       [ones GLOBAL]
-        psraw       xmm5,       1                 ; partial shifted one more time for 2nd tap
-
-        psraw       xmm1,       1                 ; partial shifted one more time for 2nd tap
-        packsswb    xmm5,       xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-
-        pandn       xmm4,       xmm5            ; high edge variance additive
-
-        paddsb      xmm6,       xmm2            ; p0+= p0 add
-        pxor        xmm6,       [t80 GLOBAL]    ; unoffset
-
-        ; mm6=p0                               ;
-        movdqa      xmm1,       [rdx]           ; p1
-        pxor        xmm1,       [t80 GLOBAL]    ; reoffset
-
-        paddsb      xmm1,       xmm4            ; p1+= p1 add
-        pxor        xmm1,       [t80 GLOBAL]    ; unoffset
-        ; mm6 = p0 mm1 = p1
-
-        psubsb      xmm3,       xmm0            ; q0-= q0 add
-        pxor        xmm3,       [t80 GLOBAL]    ; unoffset
-
-        ; mm3 = q0
-        psubsb      xmm7,       xmm4            ; q1-= q1 add
-        pxor        xmm7,       [t80 GLOBAL]    ; unoffset
-        ; mm7 = q1
-
-        ; tranpose and write back
-        ; xmm1 =    f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        ; xmm6 =    f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        ; xmm3 =    f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        ; xmm7 =    f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-        movdqa      xmm2,       xmm1            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        punpcklbw   xmm2,       xmm6            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-
-        movdqa      xmm4,       xmm3            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpckhbw   xmm1,       xmm6            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
-        punpcklbw   xmm4,       xmm7            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-        punpckhbw   xmm3,       xmm7            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
-
-        movdqa      xmm6,       xmm2            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-        punpcklwd   xmm2,       xmm4            ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
-
-        punpckhwd   xmm6,       xmm4            ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
-        movdqa      xmm5,       xmm1            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
-        punpcklwd   xmm1,       xmm3            ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
-        punpckhwd   xmm5,       xmm3            ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
-
-        ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
-        ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
-        ; xmm5 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-        ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
-        lea         rsi,           [rsi+rax*8]
-
-        movd        [rsi+rax*4+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rdi+rax*4+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rsi+rax*2+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rdi+rax*2+2], xmm2
-        movd        [rsi+2],       xmm6
-
-        psrldq      xmm6,   4
-        movd        [rdi+2],       xmm6
-
-        psrldq      xmm6,   4
-        neg         rax
-
-        movd        [rdi+rax+2],    xmm6
-        psrldq      xmm6,   4
-
-        movd        [rdi+rax*2+2],  xmm6
-        lea         rsi,       [rsi+rax*8]
-
-        neg         rax
-        ;;;;;;;;;;;;;;;;;;;;/
-        movd        [rsi+rax*4+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rcx+rax*4+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rsi+rax*2+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rcx+rax*2+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rsi+2],       xmm5
-        psrldq      xmm5,   4
-
-        movd        [rcx+2],        xmm5
-        psrldq      xmm5,   4
-
-        neg         rax
-        movd        [rcx+rax+2],    xmm5
-
-        psrldq      xmm5,   4
-        movd        [rcx+rax*2+2],  xmm5
+        BH_FILTER 0
+        ; write back the result
+        BH_WRITEBACK 0
 
     add rsp, 96
     pop rsp
@@ -651,243 +401,67 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
 
 
-;void vp8_mbloop_filter_horizontal_edge_sse2
-;(
-;    unsigned char *src_ptr,
-;    int            src_pixel_step,
-;    const char    *flimit,
-;    const char    *limit,
-;    const char    *thresh,
-;    int            count
-;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
-sym(vp8_mbloop_filter_horizontal_edge_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    ALIGN_STACK 16, rax
-    sub         rsp, 32                         ; reserve 32 bytes
-    %define t0  [rsp + 0]    ;__declspec(align(16)) char t0[8];
-    %define t1  [rsp + 16]   ;__declspec(align(16)) char t1[8];
-
-        mov         rsi,                    arg(0) ;src_ptr
-        movsxd      rax,                    dword ptr arg(1) ;src_pixel_step     ; destination pitch?
-
-        mov         rdx,                    arg(3) ;limit
-        movdqa      xmm7,                   XMMWORD PTR [rdx]
-
-        mov         rdi,                    rsi           ; rdi points to row +1 for indirect addressing
-        add         rdi,                    rax
-
-        ; calculate breakout conditions
-        movdqa      xmm2,                   XMMWORD PTR [rdi+2*rax]      ; q3
-        movdqa      xmm1,                   XMMWORD PTR [rsi+2*rax]      ; q2
-
-        movdqa      xmm6,                   xmm1              ; q2
-        psubusb     xmm1,                   xmm2              ; q2-=q3
-
-
-        psubusb     xmm2,                   xmm6              ; q3-=q2
-        por         xmm1,                   xmm2              ; abs(q3-q2)
-
-        psubusb     xmm1,                   xmm7
-
-        ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit
-        movdqa      xmm4,                   XMMWORD PTR [rsi+rax]         ; q1
-        movdqa      xmm3,                   xmm4              ; q1
-
-        psubusb     xmm4,                   xmm6              ; q1-=q2
-        psubusb     xmm6,                   xmm3              ; q2-=q1
-
-        por         xmm4,                   xmm6              ; abs(q2-q1)
-        psubusb     xmm4,                   xmm7
-
-        por         xmm1,                   xmm4
-        ; mm1 = mask,      mm3=q1, mm7 = limit
-
-        movdqa      xmm4,                   XMMWORD PTR [rsi]             ; q0
-        movdqa      xmm0,                   xmm4              ; q0
-
-        psubusb     xmm4,                   xmm3              ; q0-=q1
-        psubusb     xmm3,                   xmm0              ; q1-=q0
-
-        por         xmm4,                   xmm3              ; abs(q0-q1)
-        movdqa      t0,                     xmm4                  ; save to t0
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        neg         rax                   ; negate pitch to deal with above border
-
-        movdqa      xmm2,                   XMMWORD PTR [rsi+4*rax]      ; p3
-        movdqa      xmm4,                   XMMWORD PTR [rdi+4*rax]      ; p2
-
-        movdqa      xmm5,                   xmm4              ; p2
-        psubusb     xmm4,                   xmm2              ; p2-=p3
-
-        psubusb     xmm2,                   xmm5              ; p3-=p2
-        por         xmm4,                   xmm2              ; abs(p3 - p2)
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        movdqa      xmm4,                   XMMWORD PTR [rsi+2*rax]      ; p1
-        movdqa      xmm3,                   xmm4              ; p1
-
-        psubusb     xmm4,                   xmm5              ; p1-=p2
-        psubusb     xmm5,                   xmm3              ; p2-=p1
-
-        por         xmm4,                   xmm5              ; abs(p2 - p1)
-        psubusb     xmm4,                   xmm7
-
-        por         xmm1,                   xmm4
-
-        movdqa      xmm2,                   xmm3              ; p1
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        movdqa      xmm4,                   XMMWORD PTR [rsi+rax]         ; p0
-        movdqa      xmm5,                   xmm4              ; p0
-
-        psubusb     xmm4,                   xmm3              ; p0-=p1
-        psubusb     xmm3,                   xmm5              ; p1-=p0
-
-        por         xmm4,                   xmm3              ; abs(p1 - p0)
-        movdqa        t1,                   xmm4                  ; save to t1
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm5 = p0
-        movdqa      xmm3,                   XMMWORD PTR [rdi] ; q1
-        movdqa      xmm4,                   xmm3              ; q1
-        psubusb     xmm3,                   xmm2              ; q1-=p1
-        psubusb     xmm2,                   xmm4              ; p1-=q1
-        por         xmm2,                   xmm3              ; abs(p1-q1)
-        pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
-        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
-
-        movdqa      xmm6,                   xmm5              ; p0
-        movdqa      xmm3,                   xmm0              ; q0
-
-        psubusb     xmm5,                   xmm3              ; p0-=q0
-        psubusb     xmm3,                   xmm6              ; q0-=p0
-
-        por         xmm5,                   xmm3              ; abs(p0 - q0)
-        paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2
-        paddusb     xmm5,                   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        mov         rdx,                    arg(2) ;flimit            ; get flimit
-        movdqa      xmm2,                   XMMWORD PTR [rdx]             ;
-        paddb       xmm2,                   xmm2              ; flimit*2 (less than 255)
-        paddb       xmm7,                   xmm2              ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,                   xmm5
-        pxor        xmm5,                   xmm5
-        pcmpeqb     xmm1,                   xmm5               ; mask mm1
-        ; mm1 = mask, mm0=q0,  mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm6 = p0,
-
-        ; calculate high edge variance
-        mov         rdx,                    arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,                   XMMWORD PTR [rdx]             ;
-
-        movdqa      xmm4,                   t0                ; get abs (q1 - q0)
-        psubusb     xmm4,                   xmm7
-
-        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
-        psubusb     xmm3,                   xmm7
-
-        paddb       xmm4,                   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,                   xmm5
-
-        pcmpeqb     xmm5,                   xmm5
-        pxor        xmm4,                   xmm5
-        ; mm1 = mask, mm0=q0,  mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm6 = p0, mm4=hev
-        ; start work on filters
-        movdqa      xmm2,                   XMMWORD PTR [rsi+2*rax]   ; p1
-        movdqa      xmm7,                   XMMWORD PTR [rdi]             ; q1
-
-        pxor        xmm2,                   [t80 GLOBAL]  ; p1 offset to convert to signed values
-        pxor        xmm7,                   [t80 GLOBAL]  ; q1 offset to convert to signed values
+%macro MBH_FILTER 1
+%if %1
+        movdqa      xmm2,                   [rsi+2*rax]       ; p1
+        movdqa      xmm7,                   [rdi]             ; q1
+%else
+        movdqa      xmm2,                   p1                ; p1
+        movdqa      xmm7,                   q1                ; q1
+%endif
+        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
+        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
 
         psubsb      xmm2,                   xmm7              ; p1 - q1
-        pxor        xmm6,                   [t80 GLOBAL]  ; offset to convert to signed values
-
-        pxor        xmm0,                   [t80 GLOBAL]  ; offset to convert to signed values
+        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
+        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
         movdqa      xmm3,                   xmm0              ; q0
-
         psubsb      xmm0,                   xmm6              ; q0 - p0
         paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + (p1 - q1)
-
         paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0)
         paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + (p1 - q1)
 
         pand        xmm1,                   xmm2              ; mask filter values we don't care about
-        ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0
         movdqa      xmm2,                   xmm1              ; vp8_filter
         pand        xmm2,                   xmm4;             ; Filter2 = vp8_filter & hev
 
+        movdqa      xmm5,                   xmm2
+        paddsb      xmm5,                   [t3 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 3)
 
-        movdqa      xmm5,                   xmm2          ;
-        paddsb      xmm5,                   [t3 GLOBAL];
-
-        pxor        xmm0,                   xmm0              ; 0
-        pxor        xmm7,                   xmm7              ; 0
-
-        punpcklbw   xmm0,                   xmm5              ; e0f0g0h0
-        psraw       xmm0,                   11                ; sign extended shift right by 3
-
-        punpckhbw   xmm7,                   xmm5              ; a0b0c0d0
-        psraw       xmm7,                   11                ; sign extended shift right by 3
-
-        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
-        movdqa      xmm5,                   xmm0              ; Filter2
-
-        paddsb      xmm2,                   [t4 GLOBAL]      ; vp8_signed_char_clamp(Filter2 + 4)
-        pxor        xmm0,                   xmm0              ; 0
-
-        pxor        xmm7,                   xmm7              ; 0
-        punpcklbw   xmm0,                   xmm2              ; e0f0g0h0
-
-        psraw       xmm0,                   11                ; sign extended shift right by 3
-        punpckhbw   xmm7,                   xmm2              ; a0b0c0d0
+        punpckhbw   xmm7,                   xmm5              ; axbxcxdx
+        punpcklbw   xmm5,                   xmm5              ; exfxgxhx
 
         psraw       xmm7,                   11                ; sign extended shift right by 3
-        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
+        psraw       xmm5,                   11                ; sign extended shift right by 3
 
-        ; mm0= filter2 mm1 = vp8_filter,  mm3 =qs0 mm5=s mm4 =hev mm6=ps0
-        psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
+        packsswb    xmm5,                   xmm7              ; Filter2 >>=3;
+        paddsb      xmm2,                   [t4 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 4)
+
+        punpckhbw   xmm7,                   xmm2              ; axbxcxdx
+        punpcklbw   xmm0,                   xmm2              ; exfxgxhx
+
+        psraw       xmm7,                   11                ; sign extended shift right by 3
+        psraw       xmm0,                   11                ; sign extended shift right by 3
+
+        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
         paddsb      xmm6,                   xmm5              ; ps0 =ps0 + Fitler2
 
-        ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0
-        ; vp8_filter &= ~hev;
-        ; Filter2 = vp8_filter;
+        psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
         pandn       xmm4,                   xmm1              ; vp8_filter&=~hev
+%endmacro
 
-
-        ; mm3=qs0, mm4=filter2, mm6=ps0
-
+%macro MBH_WRITEBACK 1
         ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
         ; s = vp8_signed_char_clamp(qs0 - u);
         ; *oq0 = s^0x80;
         ; s = vp8_signed_char_clamp(ps0 + u);
         ; *op0 = s^0x80;
-        pxor        xmm0,                   xmm0
         pxor        xmm1,                   xmm1
 
         pxor        xmm2,                   xmm2
@@ -911,8 +485,20 @@
         pxor        xmm3,                   [t80 GLOBAL]
         pxor        xmm6,                   [t80 GLOBAL]
 
+%if %1
         movdqa      XMMWORD PTR [rsi+rax],  xmm6
         movdqa      XMMWORD PTR [rsi],      xmm3
+%else
+        lea         rsi,                    [rsi + rcx*2]
+        lea         rdi,                    [rdi + rcx*2]
+
+        movq        MMWORD PTR [rsi],       xmm6              ; p0
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi],       xmm6
+        movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx], xmm3
+%endif
 
         ; roughly 2/7th difference across boundary
         ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
@@ -937,8 +523,13 @@
 
         packsswb    xmm1,                   xmm2
 
+%if %1
         movdqa      xmm3,                   XMMWORD PTR [rdi]
-        movdqa      xmm6,                   XMMWORD PTR [rsi+rax*2]       ; p1
+        movdqa      xmm6,                   XMMWORD PTR [rsi+rax*2] ; p1
+%else
+        movdqa      xmm3,                   q1                ; q1
+        movdqa      xmm6,                   p1                ; p1
+%endif
 
         pxor        xmm3,                   [t80 GLOBAL]
         pxor        xmm6,                   [t80 GLOBAL]
@@ -949,9 +540,18 @@
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
 
+%if %1
         movdqa      XMMWORD PTR [rdi],      xmm3
         movdqa      XMMWORD PTR [rsi+rax*2],xmm6
+%else
+        movq        MMWORD PTR [rsi + rcx*2],xmm3             ; q1
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx*2],xmm3
 
+        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi + rax], xmm6
+%endif
         ; roughly 1/7th difference across boundary
         ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
         ; s = vp8_signed_char_clamp(qs2 - u);
@@ -975,11 +575,15 @@
 
         packsswb    xmm1,                   xmm2
 
-
+%if %1
         movdqa      xmm6,                   XMMWORD PTR [rdi+rax*4]
         neg         rax
 
-        movdqa      xmm3,                   XMMWORD PTR [rdi+rax  ]
+        movdqa      xmm3,                   XMMWORD PTR [rdi+rax]
+%else
+        movdqa      xmm6,                   p2                ; p2
+        movdqa      xmm3,                   q2                ; q2
+%endif
 
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
@@ -989,11 +593,68 @@
 
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
-
-        movdqa      XMMWORD PTR [rdi+rax  ], xmm3
+%if %1
+        movdqa      XMMWORD PTR [rdi+rax  ],xmm3
         neg         rax
 
-        movdqa      XMMWORD PTR [rdi+rax*4], xmm6
+        movdqa      XMMWORD PTR [rdi+rax*4],xmm6
+%else
+        movq        MMWORD PTR [rsi+rax*2], xmm6              ; p2
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi+rax*2], xmm6
+
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
+        movq        MMWORD PTR [rsi+rcx*2  ],xmm3             ; q2
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi+rcx*2  ],xmm3
+%endif
+%endmacro
+
+
+;void vp8_mbloop_filter_horizontal_edge_sse2
+;(
+;    unsigned char *src_ptr,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    int            count
+;)
+global sym(vp8_mbloop_filter_horizontal_edge_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 32     ; reserve 32 bytes
+    %define t0 [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1 [rsp + 16]   ;__declspec(align(16)) char t1[16];
+
+        mov         rsi,                    arg(0)            ;src_ptr
+        movsxd      rax,                    dword ptr arg(1)  ;src_pixel_step
+
+        mov         rdx,                    arg(3)            ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rdi,                    [rsi+rax]         ; rdi points to row +1 for indirect addressing
+
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 1
+
+        ; calculate high edge variance
+        LFH_HEV_MASK
+
+        ; start work on filters
+        MBH_FILTER 1
+        ; write back the result
+        MBH_WRITEBACK 1
 
     add rsp, 32
     pop rsp
@@ -1001,11 +662,876 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
 
 
+;void vp8_mbloop_filter_horizontal_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 96       ; reserve 96 bytes
+    %define q2  [rsp + 0]     ;__declspec(align(16)) char q2[16];
+    %define q1  [rsp + 16]    ;__declspec(align(16)) char q1[16];
+    %define p2  [rsp + 32]    ;__declspec(align(16)) char p2[16];
+    %define p1  [rsp + 48]    ;__declspec(align(16)) char p1[16];
+    %define t0  [rsp + 64]    ;__declspec(align(16)) char t0[16];
+    %define t1  [rsp + 80]    ;__declspec(align(16)) char t1[16];
+
+        mov         rsi,                    arg(0)             ; u
+        mov         rdi,                    arg(5)             ; v
+        movsxd      rax,                    dword ptr arg(1)   ; src_pixel_step
+        mov         rcx,                    rax
+        neg         rax                     ; negate pitch to deal with above border
+
+        mov         rdx,                    arg(3)             ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
+
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 0
+
+        ; calculate high edge variance
+        LFH_HEV_MASK
+
+        ; start work on filters
+        MBH_FILTER 0
+        ; write back the result
+        MBH_WRITEBACK 0
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+%macro TRANSPOSE_16X8_1 0
+        movq        xmm4,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+        movq        xmm7,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
+
+        punpcklbw   xmm4,               xmm7            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+        movq        xmm0,               QWORD PTR [rsi+2*rax]  ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
+
+        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+
+        movq        xmm7,               QWORD PTR [rdi+2*rax]  ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+
+        movq        xmm5,               QWORD PTR [rsi+4*rax]  ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40
+        movq        xmm2,               QWORD PTR [rdi+4*rax]  ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50
+
+        punpcklbw   xmm5,               xmm2            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+        movq        xmm7,               QWORD PTR [rsi+2*rcx]  ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+        movq        xmm1,               QWORD PTR [rdi+2*rcx]  ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
+        movdqa      xmm6,               xmm5            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+
+        punpcklbw   xmm7,               xmm1            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
+        punpcklwd   xmm5,               xmm7            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+
+        punpckhwd   xmm6,               xmm7            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
+
+        punpcklwd   xmm3,               xmm0            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+        punpckhwd   xmm4,               xmm0            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+
+        movdqa      xmm7,               xmm4            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+        movdqa      xmm2,               xmm3            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+
+        punpckhdq   xmm7,               xmm6            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+        punpckldq   xmm4,               xmm6            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+
+        punpckhdq   xmm3,               xmm5            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+        punpckldq   xmm2,               xmm5            ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+
+        movdqa      t0,                 xmm2            ; save to free XMM2
+%endmacro
+
+%macro TRANSPOSE_16X8_2 1
+        movq        xmm2,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+        movq        xmm5,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
+
+        punpcklbw   xmm2,               xmm5            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+        movq        xmm0,               QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
+
+        movq        xmm5,               QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+
+        movq        xmm1,               QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
+        movq        xmm6,               QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
+
+        punpcklbw   xmm1,               xmm6            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+        movq        xmm5,               QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+
+        movq        xmm6,               QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
+        punpcklbw   xmm5,               xmm6            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
+
+        movdqa      xmm6,               xmm1            ;
+        punpckhwd   xmm6,               xmm5            ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
+
+        punpcklwd   xmm1,               xmm5            ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+        movdqa      xmm5,               xmm2            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+
+        punpcklwd   xmm5,               xmm0            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+        punpckhwd   xmm2,               xmm0            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+        movdqa      xmm0,               xmm5
+        punpckldq   xmm0,               xmm1            ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+
+
+        punpckhdq   xmm5,               xmm1            ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+        movdqa      xmm1,               xmm2            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+        punpckldq   xmm1,               xmm6            ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
+        punpckhdq   xmm2,               xmm6            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
+
+        movdqa      xmm6,               xmm7            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+        punpcklqdq  xmm6,               xmm2            ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+
+        punpckhqdq  xmm7,               xmm2            ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
+%if %1
+        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+        punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        movdqa      [rdx],              xmm2            ; save 2
+
+        movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+        punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+        movdqa      [rdx+16],           xmm3            ; save 3
+        punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+        movdqa      [rdx+32],           xmm4            ; save 4
+        movdqa      [rdx+48],           xmm5            ; save 5
+
+        movdqa      xmm1,               t0              ; get
+        movdqa      xmm2,               xmm1            ;
+
+        punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+%else
+        movdqa      [rdx+112],          xmm7            ; save 7
+        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+        movdqa      [rdx+96],           xmm6            ; save 6
+        punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        movdqa      [rdx+32],           xmm2            ; save 2
+
+        movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+        punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+        movdqa      [rdx+48],           xmm3            ; save 3
+        punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+        movdqa      [rdx+64],           xmm4            ; save 4
+        movdqa      [rdx+80],           xmm5            ; save 5
+
+        movdqa      xmm1,               t0              ; get
+        movdqa      xmm2,               xmm1
+
+        punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+
+        movdqa      [rdx+16],           xmm1
+        movdqa      [rdx],              xmm2
+%endif
+%endmacro
+
+%macro LFV_FILTER_MASK 1
+        movdqa      xmm0,               xmm6            ; q2
+        psubusb     xmm0,               xmm7            ; q2-q3
+
+        psubusb     xmm7,               xmm6            ; q3-q2
+        por         xmm7,               xmm0            ; abs (q3-q2)
+
+        movdqa      xmm4,               xmm5            ; q1
+        psubusb     xmm4,               xmm6            ; q1-q2
+
+        psubusb     xmm6,               xmm5            ; q2-q1
+        por         xmm6,               xmm4            ; abs (q2-q1)
+
+        movdqa      xmm0,               xmm1
+
+        psubusb     xmm0,               xmm2            ; p2 - p3;
+        psubusb     xmm2,               xmm1            ; p3 - p2;
+
+        por         xmm0,               xmm2            ; abs(p2-p3)
+%if %1
+        movdqa      xmm2,               [rdx]           ; p1
+%else
+        movdqa      xmm2,               [rdx+32]        ; p1
+%endif
+        movdqa      xmm5,               xmm2            ; p1
+
+        psubusb     xmm5,               xmm1            ; p1-p2
+        psubusb     xmm1,               xmm2            ; p2-p1
+
+        por         xmm1,               xmm5            ; abs(p2-p1)
+
+        mov         rdx,                arg(3)          ; limit
+        movdqa      xmm4,               [rdx]           ; limit
+
+        psubusb     xmm7,               xmm4
+
+        psubusb     xmm0,               xmm4            ; abs(p3-p2) > limit
+        psubusb     xmm1,               xmm4            ; abs(p2-p1) > limit
+
+        psubusb     xmm6,               xmm4            ; abs(q2-q1) > limit
+        por         xmm7,               xmm6            ; or
+
+        por         xmm0,               xmm1
+        por         xmm0,               xmm7            ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
+
+        movdqa      xmm1,               xmm2            ; p1
+
+        movdqa      xmm7,               xmm3            ; p0
+        psubusb     xmm7,               xmm2            ; p0-p1
+
+        psubusb     xmm2,               xmm3            ; p1-p0
+        por         xmm2,               xmm7            ; abs(p1-p0)
+
+        movdqa      t0,                 xmm2            ; save abs(p1-p0)
+        lea         rdx,                srct
+
+        psubusb     xmm2,               xmm4            ; abs(p1-p0)>limit
+        por         xmm0,               xmm2            ; mask
+%if %1
+        movdqa      xmm5,               [rdx+32]        ; q0
+        movdqa      xmm7,               [rdx+48]        ; q1
+%else
+        movdqa      xmm5,               [rdx+64]        ; q0
+        movdqa      xmm7,               [rdx+80]        ; q1
+%endif
+        movdqa      xmm6,               xmm5            ; q0
+        movdqa      xmm2,               xmm7            ; q1
+        psubusb     xmm5,               xmm7            ; q0-q1
+
+        psubusb     xmm7,               xmm6            ; q1-q0
+        por         xmm7,               xmm5            ; abs(q1-q0)
+
+        movdqa      t1,                 xmm7            ; save abs(q1-q0)
+        psubusb     xmm7,               xmm4            ; abs(q1-q0)> limit
+
+        por         xmm0,               xmm7            ; mask
+
+        movdqa      xmm5,               xmm2            ; q1
+        psubusb     xmm5,               xmm1            ; q1-=p1
+        psubusb     xmm1,               xmm2            ; p1-=q1
+        por         xmm5,               xmm1            ; abs(p1-q1)
+        pand        xmm5,               [tfe GLOBAL]    ; set lsb of each byte to zero
+        psrlw       xmm5,               1               ; abs(p1-q1)/2
+
+        mov         rdx,                arg(2)          ; flimit
+        movdqa      xmm2,               [rdx]           ; flimit
+
+        movdqa      xmm1,               xmm3            ; p0
+        movdqa      xmm7,               xmm6            ; q0
+        psubusb     xmm1,               xmm7            ; p0-q0
+        psubusb     xmm7,               xmm3            ; q0-p0
+        por         xmm1,               xmm7            ; abs(q0-p0)
+        paddusb     xmm1,               xmm1            ; abs(q0-p0)*2
+        paddusb     xmm1,               xmm5            ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+        paddb       xmm2,               xmm2            ; flimit*2 (less than 255)
+        paddb       xmm4,               xmm2            ; flimit * 2 + limit (less than 255)
+
+        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        por         xmm1,               xmm0;           ; mask
+        pxor        xmm0,               xmm0
+        pcmpeqb     xmm1,               xmm0
+%endmacro
+
+%macro LFV_HEV_MASK 0
+        mov         rdx,                arg(4)          ; get thresh
+        movdqa      xmm7,               XMMWORD PTR [rdx]
+
+        movdqa      xmm4,               t0              ; get abs (q1 - q0)
+        psubusb     xmm4,               xmm7            ; abs(q1 - q0) > thresh
+
+        movdqa      xmm3,               t1              ; get abs (p1 - p0)
+        psubusb     xmm3,               xmm7            ; abs(p1 - p0)> thresh
+
+        por         xmm4,               xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+        pcmpeqb     xmm4,               xmm0
+
+        pcmpeqb     xmm0,               xmm0
+        pxor        xmm4,               xmm0
+%endmacro
+
+%macro BV_FILTER 0
+        lea         rdx,                srct
+
+        movdqa      xmm2,               [rdx]           ; p1        lea         rsi,       [rsi+rcx*8]
+        movdqa      xmm7,               [rdx+48]        ; q1
+        movdqa      xmm6,               [rdx+16]        ; p0
+        movdqa      xmm0,               [rdx+32]        ; q0
+
+        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
+        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
+
+        psubsb      xmm2,               xmm7            ; p1 - q1
+        pand        xmm2,               xmm4            ; high var mask (hvm)(p1 - q1)
+
+        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
+
+        movdqa      xmm3,               xmm0            ; q0
+        psubsb      xmm0,               xmm6            ; q0 - p0
+
+        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
+
+        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
+        pand        xmm1,               xmm2            ; mask filter values we don't care about
+
+        movdqa      xmm2,               xmm1
+        paddsb      xmm1,               [t4 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+
+        paddsb      xmm2,               [t3 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+
+        punpckhbw   xmm5,               xmm2
+        punpcklbw   xmm2,               xmm2
+
+        psraw       xmm5,               11
+        psraw       xmm2,               11
+
+        packsswb    xmm2,               xmm5            ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+        punpcklbw   xmm0,               xmm1            ; exfxgxhx
+
+        punpckhbw   xmm1,               xmm1            ; axbxcxdx
+        psraw       xmm0,               11              ; sign extended shift right by 3
+
+        psraw       xmm1,               11              ; sign extended shift right by 3
+        movdqa      xmm5,               xmm0            ; save results
+
+        packsswb    xmm0,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+        paddsw      xmm5,               [ones GLOBAL]
+
+        paddsw      xmm1,               [ones GLOBAL]
+        psraw       xmm5,               1               ; partial shifted one more time for 2nd tap
+
+        psraw       xmm1,               1               ; partial shifted one more time for 2nd tap
+        packsswb    xmm5,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+        pandn       xmm4,               xmm5            ; high edge variance additive
+
+        paddsb      xmm6,               xmm2            ; p0+= p0 add
+        pxor        xmm6,               [t80 GLOBAL]    ; unoffset
+
+        movdqa      xmm1,               [rdx]           ; p1
+        pxor        xmm1,               [t80 GLOBAL]    ; reoffset
+
+        paddsb      xmm1,               xmm4            ; p1+= p1 add
+        pxor        xmm1,               [t80 GLOBAL]    ; unoffset
+
+        psubsb      xmm3,               xmm0            ; q0-= q0 add
+        pxor        xmm3,               [t80 GLOBAL]    ; unoffset
+
+        psubsb      xmm7,               xmm4            ; q1-= q1 add
+        pxor        xmm7,               [t80 GLOBAL]    ; unoffset
+%endmacro
+
+%macro BV_TRANSPOSE 0
+        ; xmm1 =    f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        ; xmm6 =    f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        ; xmm3 =    f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        ; xmm7 =    f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+        movdqa      xmm2,               xmm1            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        punpcklbw   xmm2,               xmm6            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+
+        movdqa      xmm4,               xmm3            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpckhbw   xmm1,               xmm6            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        punpcklbw   xmm4,               xmm7            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+        punpckhbw   xmm3,               xmm7            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+
+        movdqa      xmm6,               xmm2            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+        punpcklwd   xmm2,               xmm4            ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+
+        punpckhwd   xmm6,               xmm4            ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+        movdqa      xmm5,               xmm1            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        punpcklwd   xmm1,               xmm3            ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+        punpckhwd   xmm5,               xmm3            ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+        ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+        ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+        ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+        ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+%endmacro
+
+%macro BV_WRITEBACK 2
+        movd        [rsi+2],            %1
+        psrldq      %1,                 4
+
+        movd        [rdi+2],            %1
+        psrldq      %1,                 4
+
+        movd        [rsi+2*rax+2],      %1
+        psrldq      %1,                 4
+
+        movd        [rdi+2*rax+2],      %1
+
+        movd        [rsi+4*rax+2],      %2
+        psrldq      %2,                 4
+
+        movd        [rdi+4*rax+2],      %2
+        psrldq      %2,                 4
+
+        movd        [rsi+2*rcx+2],      %2
+        psrldq      %2,                 4
+
+        movd        [rdi+2*rcx+2],      %2
+%endmacro
+
+
+;void vp8_loop_filter_vertical_edge_sse2
+;(
+;    unsigned char *src_ptr,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    int            count
+;)
+global sym(vp8_loop_filter_vertical_edge_sse2)
+sym(vp8_loop_filter_vertical_edge_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub             rsp, 96      ; reserve 96 bytes
+    %define t0      [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1      [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct    [rsp + 32]   ;__declspec(align(16)) char srct[64];
+
+        mov         rsi,        arg(0)                  ; src_ptr
+        movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
+
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+        lea         rcx,        [rax*2+rax]
+
+        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+        TRANSPOSE_16X8_1
+
+        lea         rsi,        [rsi+rax*8]
+        lea         rdi,        [rdi+rax*8]
+        lea         rdx,        srct
+        TRANSPOSE_16X8_2 1
+
+        ; calculate filter mask
+        LFV_FILTER_MASK 1
+        ; calculate high edge variance
+        LFV_HEV_MASK
+
+        ; start work on filters
+        BV_FILTER
+
+        ; tranpose and write back - only work on q1, q0, p0, p1
+        BV_TRANSPOSE
+        ; store 16-line result
+
+        lea         rdx,        [rax]
+        neg         rdx
+
+        BV_WRITEBACK xmm1, xmm5
+
+        lea         rsi,        [rsi+rdx*8]
+        lea         rdi,        [rdi+rdx*8]
+        BV_WRITEBACK xmm2, xmm6
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_loop_filter_vertical_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_loop_filter_vertical_edge_uv_sse2)
+sym(vp8_loop_filter_vertical_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub             rsp, 96      ; reserve 96 bytes
+    %define t0      [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1      [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct    [rsp + 32]   ;__declspec(align(16)) char srct[64];
+
+        mov         rsi,        arg(0)                  ; u_ptr
+        movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
+
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+        lea         rcx,        [rax+2*rax]
+
+        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+        TRANSPOSE_16X8_1
+
+        mov         rsi,        arg(5)                  ; v_ptr
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+
+        lea         rdx,        srct
+        TRANSPOSE_16X8_2 1
+
+        ; calculate filter mask
+        LFV_FILTER_MASK 1
+        ; calculate high edge variance
+        LFV_HEV_MASK
+
+        ; start work on filters
+        BV_FILTER
+
+        ; tranpose and write back - only work on q1, q0, p0, p1
+        BV_TRANSPOSE
+
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+
+        ; store 16-line result
+        BV_WRITEBACK xmm1, xmm5
+
+        mov         rsi,        arg(0)                  ; u_ptr
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+        BV_WRITEBACK xmm2, xmm6
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+%macro MBV_FILTER 0
+        lea         rdx,                srct
+
+        movdqa      xmm2,               [rdx+32]        ; p1
+        movdqa      xmm7,               [rdx+80]        ; q1
+        movdqa      xmm6,               [rdx+48]        ; p0
+        movdqa      xmm0,               [rdx+64]        ; q0
+
+        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
+        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
+        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
+
+        psubsb      xmm2,               xmm7            ; p1 - q1
+
+        movdqa      xmm3,               xmm0            ; q0
+
+        psubsb      xmm0,               xmm6            ; q0 - p0
+        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + (p1 - q1)
+
+        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0)
+        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0)+ (p1 - q1)
+
+        pand        xmm1,               xmm2            ; mask filter values we don't care about
+
+        movdqa      xmm2,               xmm1            ; vp8_filter
+        pand        xmm2,               xmm4;           ; Filter2 = vp8_filter & hev
+
+        movdqa      xmm5,               xmm2
+        paddsb      xmm5,               [t3 GLOBAL]
+
+        punpckhbw   xmm7,               xmm5            ; axbxcxdx
+        punpcklbw   xmm5,               xmm5            ; exfxgxhx
+
+        psraw       xmm7,               11              ; sign extended shift right by 3
+        psraw       xmm5,               11              ; sign extended shift right by 3
+
+        packsswb    xmm5,               xmm7            ; Filter2 >>=3;
+
+        paddsb      xmm2,               [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
+
+        punpcklbw   xmm0,               xmm2            ; exfxgxhx
+        punpckhbw   xmm7,               xmm2            ; axbxcxdx
+
+        psraw       xmm0,               11              ; sign extended shift right by 3
+        psraw       xmm7,               11              ; sign extended shift right by 3
+
+        packsswb    xmm0,               xmm7            ; Filter2 >>=3;
+
+        psubsb      xmm3,               xmm0            ; qs0 =qs0 - filter1
+        paddsb      xmm6,               xmm5            ; ps0 =ps0 + Fitler2
+
+        ; vp8_filter &= ~hev;
+        ; Filter2 = vp8_filter;
+        pandn       xmm4,               xmm1            ; vp8_filter&=~hev
+
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
+        ; s = vp8_signed_char_clamp(qs0 - u);
+        ; *oq0 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps0 + u);
+        ; *op0 = s^0x80;
+        pxor        xmm1,               xmm1
+
+        pxor        xmm2,               xmm2
+        punpcklbw   xmm1,               xmm4
+
+        punpckhbw   xmm2,               xmm4
+        pmulhw      xmm1,               [s27 GLOBAL]
+
+        pmulhw      xmm2,               [s27 GLOBAL]
+        paddw       xmm1,               [s63 GLOBAL]
+
+        paddw       xmm2,               [s63 GLOBAL]
+        psraw       xmm1,               7
+
+        psraw       xmm2,               7
+        packsswb    xmm1,               xmm2
+
+        psubsb      xmm3,               xmm1
+        paddsb      xmm6,               xmm1
+
+        pxor        xmm3,               [t80 GLOBAL]
+        pxor        xmm6,               [t80 GLOBAL]
+
+        movdqa      [rdx+48],           xmm6
+        movdqa      [rdx+64],           xmm3
+
+        ; roughly 2/7th difference across boundary
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
+        ; s = vp8_signed_char_clamp(qs1 - u);
+        ; *oq1 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps1 + u);
+        ; *op1 = s^0x80;
+        pxor        xmm1,               xmm1
+        pxor        xmm2,               xmm2
+
+        punpcklbw   xmm1,               xmm4
+        punpckhbw   xmm2,               xmm4
+
+        pmulhw      xmm1,               [s18 GLOBAL]
+        pmulhw      xmm2,               [s18 GLOBAL]
+
+        paddw       xmm1,               [s63 GLOBAL]
+        paddw       xmm2,               [s63 GLOBAL]
+
+        psraw       xmm1,               7
+        psraw       xmm2,               7
+
+        packsswb    xmm1,               xmm2
+
+        movdqa      xmm3,               [rdx + 80]              ; q1
+        movdqa      xmm6,               [rdx + 32]              ; p1
+
+        pxor        xmm3,               [t80 GLOBAL]
+        pxor        xmm6,               [t80 GLOBAL]
+
+        paddsb      xmm6,               xmm1
+        psubsb      xmm3,               xmm1
+
+        pxor        xmm6,               [t80 GLOBAL]
+        pxor        xmm3,               [t80 GLOBAL]
+
+        movdqa      [rdx + 80],         xmm3
+        movdqa      [rdx + 32],         xmm6
+
+        ; roughly 1/7th difference across boundary
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
+        ; s = vp8_signed_char_clamp(qs2 - u);
+        ; *oq2 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps2 + u);
+        ; *op2 = s^0x80;
+        pxor        xmm1,               xmm1
+        pxor        xmm2,               xmm2
+
+        punpcklbw   xmm1,               xmm4
+        punpckhbw   xmm2,               xmm4
+
+        pmulhw      xmm1,               [s9 GLOBAL]
+        pmulhw      xmm2,               [s9 GLOBAL]
+
+        paddw       xmm1,               [s63 GLOBAL]
+        paddw       xmm2,               [s63 GLOBAL]
+
+        psraw       xmm1,               7
+        psraw       xmm2,               7
+
+        packsswb    xmm1,               xmm2
+
+        movdqa      xmm6,               [rdx+16]
+        movdqa      xmm3,               [rdx+96]
+
+        pxor        xmm6,               [t80 GLOBAL]
+        pxor        xmm3,               [t80 GLOBAL]
+
+        paddsb      xmm6,               xmm1
+        psubsb      xmm3,               xmm1
+
+        pxor        xmm6,               [t80 GLOBAL]        ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        pxor        xmm3,               [t80 GLOBAL]        ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
+%endmacro
+
+%macro MBV_TRANSPOSE 0
+        movdqa      xmm0,               [rdx]               ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+        movdqa      xmm1,               xmm0                ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+
+        punpcklbw   xmm0,               xmm6                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        punpckhbw   xmm1,               xmm6                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+
+        movdqa      xmm2,               [rdx+32]            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        movdqa      xmm6,               xmm2                ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpcklbw   xmm2,               [rdx+48]            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+        punpckhbw   xmm6,               [rdx+48]            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        movdqa      xmm5,               xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        punpcklwd   xmm0,               xmm2                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+
+        punpckhwd   xmm5,               xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        movdqa      xmm4,               xmm1                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+
+        punpcklwd   xmm1,               xmm6                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+        punpckhwd   xmm4,               xmm6                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+
+        movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpcklbw   xmm2,               [rdx+80]            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+
+        movdqa      xmm6,               xmm3                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+        punpcklbw   xmm6,               [rdx+112]           ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
+
+        movdqa      xmm7,               xmm2                ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+        punpcklwd   xmm2,               xmm6                ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
+
+        punpckhwd   xmm7,               xmm6                ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
+        movdqa      xmm6,               xmm0                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+
+        punpckldq   xmm0,               xmm2                ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
+        punpckhdq   xmm6,               xmm2                ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
+%endmacro
+
+%macro MBV_WRITEBACK_1 0
+        movq        QWORD PTR [rsi],    xmm0
+        psrldq      xmm0,               8
+
+        movq        QWORD PTR [rdi],    xmm0
+
+        movq        QWORD PTR [rsi+2*rax], xmm6
+        psrldq      xmm6,               8
+
+        movq        QWORD PTR [rdi+2*rax], xmm6
+
+        movdqa      xmm0,               xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        punpckldq   xmm0,               xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
+
+        punpckhdq   xmm5,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
+
+        movq        QWORD PTR [rsi+4*rax], xmm0
+        psrldq      xmm0,               8
+
+        movq        QWORD PTR [rdi+4*rax], xmm0
+
+        movq        QWORD PTR [rsi+2*rcx], xmm5
+        psrldq      xmm5,               8
+
+        movq        QWORD PTR [rdi+2*rcx], xmm5
+
+        movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpckhbw   xmm2,               [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+
+        punpckhbw   xmm3,               [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
+        movdqa      xmm0,               xmm2
+
+        punpcklwd   xmm0,               xmm3                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
+        punpckhwd   xmm2,               xmm3                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
+
+        movdqa      xmm3,               xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+        punpckldq   xmm1,               xmm0                ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
+
+        punpckhdq   xmm3,               xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
+%endmacro
+
+%macro MBV_WRITEBACK_2 0
+        movq        QWORD PTR [rsi], xmm1
+        psrldq      xmm1,               8
+
+        movq        QWORD PTR [rdi], xmm1
+
+        movq        QWORD PTR [rsi+2*rax], xmm3
+        psrldq      xmm3,               8
+
+        movq        QWORD PTR [rdi+2*rax], xmm3
+
+        movdqa      xmm1,               xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+        punpckldq   xmm1,               xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
+
+        punpckhdq   xmm4,               xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
+        movq        QWORD PTR [rsi+4*rax], xmm1
+
+        psrldq      xmm1,               8
+
+        movq        QWORD PTR [rdi+4*rax], xmm1
+
+        movq        QWORD PTR [rsi+2*rcx], xmm4
+        psrldq      xmm4,               8
+
+        movq        QWORD PTR [rdi+2*rcx], xmm4
+%endmacro
+
+
 ;void vp8_mbloop_filter_vertical_edge_sse2
 ;(
 ;    unsigned char *src_ptr,
@@ -1020,6 +1546,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1031,531 +1558,43 @@
     %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
     %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
 
+        mov         rsi,                arg(0)              ; src_ptr
+        movsxd      rax,                dword ptr arg(1)    ; src_pixel_step
 
-        mov         rsi,                arg(0) ;src_ptr
-        movsxd      rax,                dword ptr arg(1) ;src_pixel_step                    ; destination pitch?
-
-        lea         rsi,                [rsi + rax*4 - 4]
-        lea         rdi,                [rsi + rax]                     ; rdi points to row +1 for indirect addressing
-
-        mov         rcx,                rax
-        neg         rcx
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]         ; rdi points to row +1 for indirect addressing
+        lea         rcx,                [rax*2+rax]
 
         ; Transpose
-        movq        xmm0,               QWORD PTR [rdi+rax*2]           ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
-        movq        xmm7,               QWORD PTR [rsi+rax*2]           ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+        TRANSPOSE_16X8_1
 
-        punpcklbw   xmm7,               xmm0                            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
-        movq        xmm0,               QWORD PTR [rsi+rax]             ;
+        lea         rsi,        [rsi+rax*8]
+        lea         rdi,        [rdi+rax*8]
+        lea         rdx,        srct
+        TRANSPOSE_16X8_2 0
 
-        movq        xmm5,               QWORD PTR [rsi]                 ;
-        punpcklbw   xmm5,               xmm0                            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+        ; calculate filter mask
+        LFV_FILTER_MASK 0
+        ; calculate high edge variance
+        LFV_HEV_MASK
 
-        movdqa      xmm6,               xmm5                            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-        punpcklwd   xmm5,               xmm7                            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+        neg         rax
+        ; start work on filters
+        MBV_FILTER
 
-        punpckhwd   xmm6,               xmm7                            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-        movq        xmm7,               QWORD PTR [rsi + rcx]           ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
-
-        movq        xmm0,               QWORD PTR [rsi + rcx*2]         ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
-        punpcklbw   xmm0,               xmm7                            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
-
-        movq        xmm4,               QWORD PTR [rsi + rcx*4]         ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
-        movq        xmm7,               QWORD PTR [rdi + rcx*4]         ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm4,               xmm7                            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-        movdqa      xmm3,               xmm4                            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-
-        punpcklwd   xmm3,               xmm0                            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-        punpckhwd   xmm4,               xmm0                            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-
-        movdqa      xmm7,               xmm4                            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-        movdqa      xmm2,               xmm3                            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-
-        punpckhdq   xmm7,               xmm6                            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        punpckldq   xmm4,               xmm6                            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-
-        punpckhdq   xmm3,               xmm5                            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-        punpckldq   xmm2,               xmm5                            ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-
-        movdqa      t0,                 xmm2                            ; save to free XMM2
-        ;movdqa        t1,                 xmm3
-
-        ; XMM3 XMM4 XMM7 in use
         lea         rsi,                [rsi+rax*8]
         lea         rdi,                [rdi+rax*8]
 
-        movq        xmm6,               QWORD PTR [rdi+rax*2]           ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
-        movq        xmm5,               QWORD PTR [rsi+rax*2]           ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
-
-        punpcklbw   xmm5,               xmm6                            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-        movq        xmm6,               QWORD PTR [rsi+rax]             ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
-
-        movq        xmm1,               QWORD PTR [rsi]                 ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
-        punpcklbw   xmm1,               xmm6                            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
-
-        movdqa      xmm6,               xmm1                            ;
-        punpckhwd   xmm6,               xmm5                            ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
-
-        punpcklwd   xmm1,               xmm5                            ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-        movq        xmm5,               QWORD PTR [rsi+rcx]             ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
-
-        movq        xmm0,               QWORD PTR [rsi+rcx*2]           ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
-        punpcklbw   xmm0,               xmm5                            ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-
-        movq        xmm2,               QWORD PTR [rsi+rcx*4]           ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
-        movq        xmm5,               QWORD PTR [rdi+rcx*4]           ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
-
-        punpcklbw   xmm2,               xmm5                            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-        movdqa      xmm5,               xmm2                            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        punpcklwd   xmm5,               xmm0                            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-        punpckhwd   xmm2,               xmm0                            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        movdqa      xmm0,               xmm5
-        punpckldq   xmm0,               xmm1                            ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-
-
-        punpckhdq   xmm5,               xmm1                            ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-        movdqa      xmm1,               xmm2                            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        punpckldq   xmm1,               xmm6                            ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-        punpckhdq   xmm2,               xmm6                            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
-
-        movdqa      xmm6,               xmm7                            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        punpcklqdq  xmm6,               xmm2                            ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
-
-
-        lea         rdx,                srct
-        punpckhqdq  xmm7,               xmm2                            ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
-
-        movdqa      [rdx+112],          xmm7                            ; save 7
-        movdqa      xmm2,               xmm3                            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
-        movdqa      [rdx+96],           xmm6                            ; save 6
-        punpcklqdq  xmm2,               xmm5                            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        punpckhqdq  xmm3,               xmm5                            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        movdqa      [rdx+32],           xmm2                            ; save 2
-
-        movdqa      xmm5,               xmm4                            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-        punpcklqdq  xmm4,               xmm1                            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-
-        movdqa      [rdx+48],           xmm3                            ; save 3
-        punpckhqdq  xmm5,               xmm1                            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-
-        movdqa      [rdx+64],           xmm4                            ; save 4
-        movdqa      [rdx+80],           xmm5                            ; save 5
-
-        movdqa      xmm1,               t0                              ; get
-        movdqa      xmm2,               xmm1                            ;
-
-        punpckhqdq  xmm1,               xmm0                            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        punpcklqdq  xmm2,               xmm0                            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
-        movdqa      [rdx+16],           xmm1
-        movdqa      [rdx],              xmm2
-
-        movdqa      xmm0,               xmm6                            ; q2
-        psubusb     xmm0,               xmm7                            ; q2-q3
-
-        psubusb     xmm7,               xmm6                            ; q3-q2
-        por         xmm7,               xmm0                            ; abs (q3-q2)
-
-        movdqa      xmm1,               xmm5                            ; q1
-        psubusb     xmm1,               xmm6                            ; q1-q2
-
-        psubusb     xmm6,               xmm5                            ; q2-q1
-        por         xmm6,               xmm1                            ; abs (q2-q1)
-
-        ;/*
-        ;movdqa      xmm0,               xmm4                            ; q0
-        ;psubusb     xmm0                xmm5                            ; q0-q1
-        ;
-        ;pusbusb     xmm5,               xmm4                            ; q1-q0
-        ;por         xmm5,               xmm0                            ; abs (q1-q0)
-        ;*/
-
-        movdqa      xmm1,               [rdx+16]                        ; p2
-        movdqa      xmm0,               xmm1
-
-        psubusb     xmm0,               xmm2                            ; p2 - p3;
-        psubusb     xmm2,               xmm1                            ; p3 - p2;
-
-        por         xmm0,               xmm2                            ; abs(p2-p3)
-
-        movdqa      xmm2,               [rdx+32]                        ; p1
-        movdqa      xmm5,               xmm2                            ; p1
-
-        psubusb     xmm5,               xmm1                            ; p1-p2
-        psubusb     xmm1,               xmm2                            ; p2-p1
-
-        por         xmm1,               xmm5                            ; abs(p2-p1)
-        mov         rdx,                arg(3) ;limit
-
-        movdqa      xmm4,               [rdx]                           ; limit
-        psubusb     xmm7,               xmm4                            ;
-
-
-        psubusb     xmm0,               xmm4                            ; abs(p3-p2) > limit
-        psubusb     xmm1,               xmm4                            ; abs(p2-p1) > limit
-
-        psubusb     xmm6,               xmm4                            ; abs(q2-q1) > limit
-        por         xmm7,               xmm6                            ; or
-
-        por         xmm0,               xmm1                            ;
-        por         xmm0,               xmm7                            ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
-        movdqa      xmm1,               xmm2                            ; p1
-
-        movdqa      xmm7,               xmm3                            ; p0
-        psubusb     xmm7,               xmm2                            ; p0-p1
-
-        psubusb     xmm2,               xmm3                            ; p1-p0
-        por         xmm2,               xmm7                            ; abs(p1-p0)
-
-        movdqa      t0,                 xmm2                            ; save abs(p1-p0)
-        lea         rdx,                srct
-
-        psubusb     xmm2,               xmm4                            ; abs(p1-p0)>limit
-        por         xmm0,               xmm2                            ; mask
-
-        movdqa      xmm5,               [rdx+64]                        ; q0
-        movdqa      xmm7,               [rdx+80]                        ; q1
-
-        movdqa      xmm6,               xmm5                            ; q0
-        movdqa      xmm2,               xmm7                            ; q1
-        psubusb     xmm5,               xmm7                            ; q0-q1
-
-        psubusb     xmm7,               xmm6                            ; q1-q0
-        por         xmm7,               xmm5                            ; abs(q1-q0)
-
-        movdqa      t1,                 xmm7                            ; save abs(q1-q0)
-        psubusb     xmm7,               xmm4                            ; abs(q1-q0)> limit
-
-        por         xmm0,               xmm7                            ; mask
-
-        movdqa      xmm5,                xmm2                           ; q1
-        psubusb     xmm5,                xmm1                           ; q1-=p1
-        psubusb     xmm1,                xmm2                           ; p1-=q1
-        por         xmm5,                xmm1                           ; abs(p1-q1)
-        pand        xmm5,                [tfe GLOBAL]                   ; set lsb of each byte to zero
-        psrlw       xmm5,                1                              ; abs(p1-q1)/2
-
-        mov         rdx,                arg(2) ;flimit                          ;
-        movdqa      xmm2,               [rdx]                           ; flimit
-
-        movdqa      xmm1,               xmm3                            ; p0
-        movdqa      xmm7,               xmm6                            ; q0
-        psubusb     xmm1,               xmm7                            ; p0-q0
-        psubusb     xmm7,               xmm3                            ; q0-p0
-        por         xmm1,               xmm7                            ; abs(q0-p0)
-        paddusb     xmm1,               xmm1                            ; abs(q0-p0)*2
-        paddusb     xmm1,               xmm5                            ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        paddb       xmm2,               xmm2                            ; flimit*2 (less than 255)
-        paddb       xmm4,               xmm2                            ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm1,               xmm4                            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,               xmm0;                           ; mask
-        pxor        xmm0,               xmm0
-        pcmpeqb     xmm1,               xmm0
-
-        ; calculate high edge variance
-        mov         rdx,                arg(4) ;thresh                          ; get thresh
-        movdqa      xmm7,               [rdx]
-
-        movdqa      xmm4,               t0                              ; get abs (q1 - q0)
-        psubusb     xmm4,               xmm7                            ; abs(q1 - q0) > thresh
-
-        movdqa      xmm3,               t1                              ; get abs (p1 - p0)
-        psubusb     xmm3,               xmm7                            ; abs(p1 - p0)> thresh
-
-        por         xmm4,               xmm3                            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,               xmm0
-
-        pcmpeqb     xmm0,               xmm0
-        pxor        xmm4,               xmm0
-
-
-        ; start work on filters
-        lea         rdx,                srct
-
-        ; start work on filters
-        movdqa      xmm2,               [rdx+32]                        ; p1
-        movdqa      xmm7,               [rdx+80]                        ; q1
-
-        pxor        xmm2,               [t80 GLOBAL]                    ; p1 offset to convert to signed values
-        pxor        xmm7,               [t80 GLOBAL]                    ; q1 offset to convert to signed values
-
-        psubsb      xmm2,               xmm7                            ; p1 - q1
-        movdqa      xmm6,               [rdx+48]                        ; p0
-
-        movdqa      xmm0,               [rdx+64]                        ; q0
-        pxor        xmm6,               [t80 GLOBAL]                    ; offset to convert to signed values
-
-        pxor        xmm0,               [t80 GLOBAL]                    ; offset to convert to signed values
-        movdqa      xmm3,               xmm0                            ; q0
-
-        psubsb      xmm0,               xmm6                            ; q0 - p0
-        paddsb      xmm2,               xmm0                            ; 1 * (q0 - p0) + (p1 - q1)
-
-        paddsb      xmm2,               xmm0                            ; 2 * (q0 - p0)
-        paddsb      xmm2,               xmm0                            ; 3 * (q0 - p0)+ (p1 - q1)
-
-        pand        xmm1,               xmm2                            ; mask filter values we don't care about
-
-        ; xmm1 = vp8_filter, xmm4=hev, xmm6=ps0, xmm3=qs0
-        movdqa      xmm2,               xmm1                            ; vp8_filter
-        pand        xmm2,               xmm4;                           ; Filter2 = vp8_filter & hev
-
-        movdqa      xmm5,               xmm2
-        paddsb      xmm5,               [t3 GLOBAL]
-
-        pxor        xmm0,               xmm0                            ; 0
-        pxor        xmm7,               xmm7                            ; 0
-
-        punpcklbw   xmm0,               xmm5                            ; e0f0g0h0
-        psraw       xmm0,               11                              ; sign extended shift right by 3
-
-        punpckhbw   xmm7,               xmm5                            ; a0b0c0d0
-        psraw       xmm7,               11                              ; sign extended shift right by 3
-
-        packsswb    xmm0,               xmm7                            ; Filter2 >>=3;
-        movdqa      xmm5,               xmm0                            ; Filter2
-
-        paddsb      xmm2,               [t4 GLOBAL]                     ; vp8_signed_char_clamp(Filter2 + 4)
-        pxor        xmm0,               xmm0                            ; 0
-
-        pxor        xmm7,               xmm7                            ; 0
-        punpcklbw   xmm0,               xmm2                            ; e0f0g0h0
-
-        psraw       xmm0,               11                              ; sign extended shift right by 3
-        punpckhbw   xmm7,               xmm2                            ; a0b0c0d0
-
-        psraw       xmm7,               11                              ; sign extended shift right by 3
-        packsswb    xmm0,               xmm7                            ; Filter2 >>=3;
-
-        ; xmm0= filter2 xmm1 = vp8_filter,  xmm3 =qs0 xmm5=s xmm4 =hev xmm6=ps0
-        psubsb      xmm3,               xmm0                            ; qs0 =qs0 - filter1
-        paddsb      xmm6,               xmm5                            ; ps0 =ps0 + Fitler2
-
-
-        ; xmm1=vp8_filter, xmm3=qs0, xmm4 =hev xmm6=ps0
-        ; vp8_filter &= ~hev;
-        ; Filter2 = vp8_filter;
-        pandn       xmm4,                   xmm1                        ; vp8_filter&=~hev
-
-        ; xmm3=qs0, xmm4=filter2, xmm6=ps0
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
-        ; s = vp8_signed_char_clamp(qs0 - u);
-        ; *oq0 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps0 + u);
-        ; *op0 = s^0x80;
-        pxor        xmm0,                   xmm0
-        pxor        xmm1,                   xmm1
-
-        pxor        xmm2,                   xmm2
-        punpcklbw   xmm1,                   xmm4
-
-        punpckhbw   xmm2,                   xmm4
-        pmulhw      xmm1,                   [s27 GLOBAL]
-
-        pmulhw      xmm2,                   [s27 GLOBAL]
-        paddw       xmm1,                   [s63 GLOBAL]
-
-        paddw       xmm2,                   [s63 GLOBAL]
-        psraw       xmm1,                   7
-
-        psraw       xmm2,                   7
-        packsswb    xmm1,                   xmm2
-
-        psubsb      xmm3,                   xmm1
-        paddsb      xmm6,                   xmm1
-
-        pxor        xmm3,                   [t80 GLOBAL]
-        pxor        xmm6,                   [t80 GLOBAL]
-
-        movdqa      [rdx+48],               xmm6
-        movdqa      [rdx+64],               xmm3
-
-        ; roughly 2/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
-        ; s = vp8_signed_char_clamp(qs1 - u);
-        ; *oq1 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps1 + u);
-        ; *op1 = s^0x80;
-        pxor        xmm1,                       xmm1
-        pxor        xmm2,                       xmm2
-
-        punpcklbw   xmm1,                       xmm4
-        punpckhbw   xmm2,                       xmm4
-
-        pmulhw      xmm1,                       [s18 GLOBAL]
-        pmulhw      xmm2,                       [s18 GLOBAL]
-
-        paddw       xmm1,                       [s63 GLOBAL]
-        paddw       xmm2,                       [s63 GLOBAL]
-
-        psraw       xmm1,                       7
-        psraw       xmm2,                       7
-
-        packsswb    xmm1,                       xmm2
-
-        movdqa      xmm3,                       [rdx + 80]              ;/q1
-        movdqa      xmm6,                       [rdx + 32]              ; p1
-
-        pxor        xmm3,                       [t80 GLOBAL]
-        pxor        xmm6,                       [t80 GLOBAL]
-
-        paddsb      xmm6,                       xmm1
-        psubsb      xmm3,                       xmm1
-
-        pxor        xmm6,                       [t80 GLOBAL]
-        pxor        xmm3,                       [t80 GLOBAL]
-
-        movdqa      [rdx + 80],                 xmm3
-        movdqa      [rdx + 32],                 xmm6
-
-
-        ; roughly 1/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
-        ; s = vp8_signed_char_clamp(qs2 - u);
-        ; *oq2 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps2 + u);
-        ; *op2 = s^0x80;
-        pxor        xmm1,                       xmm1
-        pxor        xmm2,                       xmm2
-
-        punpcklbw   xmm1,                       xmm4
-        punpckhbw   xmm2,                       xmm4
-
-        pmulhw      xmm1,                       [s9 GLOBAL]
-        pmulhw      xmm2,                       [s9 GLOBAL]
-
-        paddw       xmm1,                       [s63 GLOBAL]
-        paddw       xmm2,                       [s63 GLOBAL]
-
-        psraw       xmm1,                       7
-        psraw       xmm2,                       7
-
-        packsswb    xmm1,                       xmm2
-
-        movdqa      xmm6,                       [rdx+16]
-        movdqa      xmm3,                       [rdx+96]
-
-        pxor        xmm6,                       [t80 GLOBAL]
-        pxor        xmm3,                       [t80 GLOBAL]
-
-        paddsb      xmm6,                       xmm1
-        psubsb      xmm3,                       xmm1
-
-        pxor        xmm6,                       [t80 GLOBAL]        ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        pxor        xmm3,                       [t80 GLOBAL]        ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
-
-
         ; transpose and write back
-        movdqa      xmm0,                       [rdx]               ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-        movdqa      xmm1,                       xmm0                ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+        MBV_TRANSPOSE
 
-        punpcklbw   xmm0,                       xmm6                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
-        punpckhbw   xmm1,                       xmm6                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+        neg         rax
 
-        movdqa      xmm2,                       [rdx+32]            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        movdqa      xmm6,                       xmm2                ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        MBV_WRITEBACK_1
 
-        punpcklbw   xmm2,                       [rdx+48]            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-        punpckhbw   xmm6,                       [rdx+48]            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
-        movdqa      xmm5,                       xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
-        punpcklwd   xmm0,                       xmm2                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
-
-        punpckhwd   xmm5,                       xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
-        movdqa      xmm4,                       xmm1                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
-
-        punpcklwd   xmm1,                       xmm6                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
-        punpckhwd   xmm4,                       xmm6                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
-
-        movdqa      xmm2,                       [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpcklbw   xmm2,                       [rdx+80]            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-
-        movdqa      xmm6,                       xmm3                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
-        punpcklbw   xmm6,                       [rdx+112]           ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
-
-        movdqa      xmm7,                       xmm2                ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-        punpcklwd   xmm2,                       xmm6                ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
-
-        punpckhwd   xmm7,                       xmm6                ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
-        movdqa      xmm6,                       xmm0                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
-
-        punpckldq   xmm0,                       xmm2                ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
-        punpckhdq   xmm6,                       xmm2                ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
-
-        lea         rsi,                        [rsi+rcx*8]
-        lea         rdi,                        [rdi+rcx*8]
-
-        movq        QWORD PTR [rsi+rcx*4],      xmm0
-        psrldq      xmm0,                       8
-
-        movq        QWORD PTR [rsi+rcx*2],      xmm6
-        psrldq      xmm6,                       8
-
-        movq        QWORD PTR [rdi+rcx*4],      xmm0
-        movq        QWORD PTR [rsi+rcx],        xmm6
-
-        movdqa      xmm0,                       xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
-        punpckldq   xmm0,                       xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
-
-        punpckhdq   xmm5,                       xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
-
-        movq        QWORD PTR [rsi],            xmm0
-        psrldq      xmm0,                       8
-
-        movq        QWORD PTR [rsi+rax*2],      xmm5
-        psrldq      xmm5,                       8
-
-        movq        QWORD PTR [rsi+rax],        xmm0
-        movq        QWORD PTR [rdi+rax*2],      xmm5
-
-        movdqa      xmm2,                       [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpckhbw   xmm2,                       [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
-
-        punpckhbw   xmm3,                       [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
-        movdqa      xmm0,                       xmm2
-
-        punpcklwd   xmm0,                       xmm3                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
-        punpckhwd   xmm2,                       xmm3                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
-
-        movdqa      xmm3,                       xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
-        punpckldq   xmm1,                       xmm0                ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
-
-        punpckhdq   xmm3,                       xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
-
-        lea         rsi,                        [rsi+rax*8]
-        lea         rdi,                        [rdi+rax*8]
-
-        movq        QWORD PTR [rsi+rcx*4],      xmm1
-        psrldq      xmm1,                       8
-
-        movq        QWORD PTR [rsi+rcx*2],      xmm3
-        psrldq      xmm3,                       8
-
-        movq        QWORD PTR [rdi+rcx*4],      xmm1
-        movq        QWORD PTR [rsi+rcx],        xmm3
-
-        movdqa      xmm1,                       xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
-        punpckldq   xmm1,                       xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
-
-        punpckhdq   xmm4,                       xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
-        movq        QWORD PTR [rsi],            xmm1
-
-        psrldq      xmm1,                       8
-
-        movq        QWORD PTR [rsi+rax*2],      xmm4
-        psrldq      xmm4,                       8
-
-        movq        QWORD PTR [rsi+rax],        xmm1
-        movq        QWORD PTR [rdi+rax*2],      xmm4
+        lea         rsi,                [rsi+rax*8]
+        lea         rdi,                [rdi+rax*8]
+        MBV_WRITEBACK_2
 
     add rsp, 160
     pop rsp
@@ -1563,6 +1602,82 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_mbloop_filter_vertical_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
+sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub          rsp, 160     ; reserve 160 bytes
+    %define t0   [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
+
+        mov         rsi,                arg(0)              ; u_ptr
+        movsxd      rax,                dword ptr arg(1)    ; src_pixel_step
+
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]         ; rdi points to row +1 for indirect addressing
+        lea         rcx,                [rax+2*rax]
+
+        ; Transpose
+        TRANSPOSE_16X8_1
+
+        ; XMM3 XMM4 XMM7 in use
+        mov         rsi,                arg(5)              ; v_ptr
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
+        lea         rdx,                srct
+        TRANSPOSE_16X8_2 0
+
+        ; calculate filter mask
+        LFV_FILTER_MASK 0
+        ; calculate high edge variance
+        LFV_HEV_MASK
+
+        ; start work on filters
+        MBV_FILTER
+
+        ; transpose and write back
+        MBV_TRANSPOSE
+
+        mov         rsi,                arg(0)             ;u_ptr
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
+        MBV_WRITEBACK_1
+        mov         rsi,                arg(5)             ;v_ptr
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
+        MBV_WRITEBACK_2
+
+    add rsp, 160
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1582,6 +1697,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1678,6 +1794,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1697,6 +1814,7 @@
     push        rbp         ; save old base pointer value.
     mov         rbp, rsp    ; set new base pointer value.
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx         ; save callee-saved reg
     push        rsi
     push        rdi
@@ -1941,6 +2059,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 143ee74..3ff8c4e 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -33,6 +34,11 @@
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
 prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
 
+extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
+
 #if HAVE_MMX
 // Horizontal MB filtering
 void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
@@ -156,10 +162,7 @@
     vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 
     if (u_ptr)
-        vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
-    if (v_ptr)
-        vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+        vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
 }
 
 
@@ -182,10 +185,7 @@
     vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 
     if (u_ptr)
-        vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
-    if (v_ptr)
-        vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+        vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
 }
 
 
@@ -210,10 +210,7 @@
     vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 
     if (u_ptr)
-        vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
-    if (v_ptr)
-        vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+        vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
 }
 
 
@@ -240,10 +237,7 @@
     vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 
     if (u_ptr)
-        vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
-    if (v_ptr)
-        vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+        vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
 }
 
 
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index c87f38a..80dbebc 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 721c8d6..349ac0d 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
index 095797b..6b6321a 100644
--- a/vp8/common/x86/postproc_mmx.c
+++ b/vp8/common/x86/postproc_mmx.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index bfa36fa..276f208 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -25,6 +26,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -239,6 +241,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -253,6 +256,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -438,6 +442,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -451,6 +456,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -611,6 +617,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h
index 49a1907..899dd2f 100644
--- a/vp8/common/x86/postproc_x86.h
+++ b/vp8/common/x86/postproc_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index ba60c5d..e7211fc 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index f2685a7..4ad3973 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -66,6 +67,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -118,6 +120,7 @@
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index c469778..40ee65a 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index c502118..06db0c6 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -730,7 +731,7 @@
     times 4 dw 0x40
 
 align 16
-global sym(vp8_six_tap_mmx)
+global HIDDEN_DATA(sym(vp8_six_tap_mmx))
 sym(vp8_six_tap_mmx):
     times 8 dw 0
     times 8 dw 0
@@ -790,7 +791,7 @@
 
 
 align 16
-global sym(vp8_bilinear_filters_mmx)
+global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
 sym(vp8_bilinear_filters_mmx):
     times 8 dw 128
     times 8 dw 0
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index dee04f2..2385abf 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -36,6 +37,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -128,6 +130,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -154,6 +157,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -303,6 +307,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -328,6 +333,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -396,6 +402,494 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d16_v6_sse2
+;(
+;    unsigned short *src_ptr,
+;    unsigned char *output_ptr,
+;    int dst_ptich,
+;    unsigned int pixels_per_line,
+;    unsigned int pixel_step,
+;    unsigned int output_height,
+;    unsigned int output_width,
+;    const short    *vp8_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
+; input pixel array has output_height rows.
+;*************************************************************************************/
+global sym(vp8_filter_block1d16_v6_sse2)
+sym(vp8_filter_block1d16_v6_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rax,        arg(7) ;vp8_filter
+        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
+
+        mov         rdi,        arg(1) ;output_ptr
+        mov         rsi,        arg(0) ;src_ptr
+
+        sub         rsi,        rdx
+        sub         rsi,        rdx
+
+        movsxd      rcx,        DWORD PTR arg(5) ;[output_height]
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(2) ; dst_ptich
+%endif
+
+vp8_filter_block1d16_v6_sse2_loop:
+; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
+        movdqa      xmm1,       XMMWORD PTR [rsi + rdx]       ; line 2
+        movdqa      xmm2,       XMMWORD PTR [rsi + rdx + 16]
+        pmullw      xmm1,       [rax + 16]
+        pmullw      xmm2,       [rax + 16]
+
+        movdqa      xmm3,       XMMWORD PTR [rsi + rdx * 4]       ; line 5
+        movdqa      xmm4,       XMMWORD PTR [rsi + rdx * 4 + 16]
+        pmullw      xmm3,       [rax + 64]
+        pmullw      xmm4,       [rax + 64]
+
+        movdqa      xmm5,       XMMWORD PTR [rsi + rdx * 2]       ; line 3
+        movdqa      xmm6,       XMMWORD PTR [rsi + rdx * 2 + 16]
+        pmullw      xmm5,       [rax + 32]
+        pmullw      xmm6,       [rax + 32]
+
+        movdqa      xmm7,       XMMWORD PTR [rsi]       ; line 1
+        movdqa      xmm0,       XMMWORD PTR [rsi + 16]
+        pmullw      xmm7,       [rax]
+        pmullw      xmm0,       [rax]
+
+        paddsw      xmm1,       xmm3
+        paddsw      xmm2,       xmm4
+        paddsw      xmm1,       xmm5
+        paddsw      xmm2,       xmm6
+        paddsw      xmm1,       xmm7
+        paddsw      xmm2,       xmm0
+
+        add         rsi,        rdx
+
+        movdqa      xmm3,       XMMWORD PTR [rsi + rdx * 2]       ; line 4
+        movdqa      xmm4,       XMMWORD PTR [rsi + rdx * 2 + 16]
+        pmullw      xmm3,       [rax + 48]
+        pmullw      xmm4,       [rax + 48]
+
+        movdqa      xmm5,       XMMWORD PTR [rsi + rdx * 4]       ; line 6
+        movdqa      xmm6,       XMMWORD PTR [rsi + rdx * 4 + 16]
+        pmullw      xmm5,       [rax + 80]
+        pmullw      xmm6,       [rax + 80]
+
+        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+        pxor        xmm0,       xmm0                        ; clear xmm0
+
+        paddsw      xmm1,       xmm3
+        paddsw      xmm2,       xmm4
+        paddsw      xmm1,       xmm5
+        paddsw      xmm2,       xmm6
+
+        paddsw      xmm1,       xmm7
+        paddsw      xmm2,       xmm7
+
+        psraw       xmm1,       7
+        psraw       xmm2,       7
+
+        packuswb    xmm1,       xmm2              ; pack and saturate
+        movdqa      XMMWORD PTR [rdi], xmm1       ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(2) ;[dst_ptich]
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx         ; decrement count
+        jnz         vp8_filter_block1d16_v6_sse2_loop               ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d8_h6_only_sse2
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    int dst_ptich,
+;    unsigned int    output_height,
+;    const short    *vp8_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d8_h6_only_sse2)
+sym(vp8_filter_block1d8_h6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rdx,        arg(5) ;vp8_filter
+        mov         rsi,        arg(0) ;src_ptr
+
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rax,        dword ptr arg(1) ;src_pixels_per_line            ; Pitch for Source
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ;dst_ptich
+%endif
+        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
+
+filter_block1d8_h6_only_rowloop:
+        movq        xmm3,       MMWORD PTR [rsi - 2]
+        movq        xmm1,       MMWORD PTR [rsi + 6]
+
+        prefetcht2  [rsi+rax-2]
+
+        pslldq      xmm1,       8
+        por         xmm1,       xmm3
+
+        movdqa      xmm4,       xmm1
+        movdqa      xmm5,       xmm1
+
+        movdqa      xmm6,       xmm1
+        movdqa      xmm7,       xmm1
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm1,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm1,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm1,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm1
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0
+
+        movq        QWORD PTR [rdi],   xmm4       ; store the results in the destination
+        lea         rsi,        [rsi + rax]
+
+%if ABI_IS_32BIT
+        add         rdi,        DWORD Ptr arg(3) ;dst_ptich
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx
+
+        jnz         filter_block1d8_h6_only_rowloop                ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d16_h6_only_sse2
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    int dst_ptich,
+;    unsigned int    output_height,
+;    const short    *vp8_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d16_h6_only_sse2)
+sym(vp8_filter_block1d16_h6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rdx,        arg(5) ;vp8_filter
+        mov         rsi,        arg(0) ;src_ptr
+
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rax,        dword ptr arg(1) ;src_pixels_per_line            ; Pitch for Source
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ;dst_ptich
+%endif
+
+        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
+
+filter_block1d16_h6_only_sse2_rowloop:
+        movq        xmm3,       MMWORD PTR [rsi - 2]
+        movq        xmm1,       MMWORD PTR [rsi + 6]
+
+        movq        xmm2,       MMWORD PTR [rsi +14]
+        pslldq      xmm2,       8
+
+        por         xmm2,       xmm1
+        prefetcht2  [rsi+rax-2]
+
+        pslldq      xmm1,       8
+        por         xmm1,       xmm3
+
+        movdqa      xmm4,       xmm1
+        movdqa      xmm5,       xmm1
+
+        movdqa      xmm6,       xmm1
+        movdqa      xmm7,       xmm1
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm1,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm1,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm1,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm1
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0                        ; lower 8 bytes
+
+        movq        QWORD Ptr [rdi],         xmm4           ; store the results in the destination
+
+        movdqa      xmm3,       xmm2
+        movdqa      xmm4,       xmm2
+
+        movdqa      xmm5,       xmm2
+        movdqa      xmm6,       xmm2
+
+        movdqa      xmm7,       xmm2
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm2,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm2,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm2,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm2
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0                        ; higher 8 bytes
+
+        movq        QWORD Ptr [rdi+8],      xmm4            ; store the results in the destination
+
+        lea         rsi,        [rsi + rax]
+%if ABI_IS_32BIT
+        add         rdi,        DWORD Ptr arg(3) ;dst_ptich
+%else
+        add         rdi,        r8
+%endif
+
+        dec         rcx
+        jnz         filter_block1d16_h6_only_sse2_rowloop                ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d8_v6_only_sse2
+;(
+;    unsigned char *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char *output_ptr,
+;    int dst_ptich,
+;    unsigned int output_height,
+;    const short    *vp8_filter
+;)
+; Second-pass filter only when xoffset==0
+global sym(vp8_filter_block1d8_v6_only_sse2)
+sym(vp8_filter_block1d8_v6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rsi,        arg(0) ;src_ptr
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
+
+        mov         rax,        arg(5) ;vp8_filter
+
+        pxor        xmm0,       xmm0                        ; clear xmm0
+
+        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ; dst_ptich
+%endif
+
+vp8_filter_block1d8_v6_only_sse2_loop:
+        movq        xmm1,       MMWORD PTR [rsi]
+        movq        xmm2,       MMWORD PTR [rsi + rdx]
+        movq        xmm3,       MMWORD PTR [rsi + rdx * 2]
+        movq        xmm5,       MMWORD PTR [rsi + rdx * 4]
+        add         rsi,        rdx
+        movq        xmm4,       MMWORD PTR [rsi + rdx * 2]
+        movq        xmm6,       MMWORD PTR [rsi + rdx * 4]
+
+        punpcklbw   xmm1,       xmm0
+        pmullw      xmm1,       [rax]
+
+        punpcklbw   xmm2,       xmm0
+        pmullw      xmm2,       [rax + 16]
+
+        punpcklbw   xmm3,       xmm0
+        pmullw      xmm3,       [rax + 32]
+
+        punpcklbw   xmm5,       xmm0
+        pmullw      xmm5,       [rax + 64]
+
+        punpcklbw   xmm4,       xmm0
+        pmullw      xmm4,       [rax + 48]
+
+        punpcklbw   xmm6,       xmm0
+        pmullw      xmm6,       [rax + 80]
+
+        paddsw      xmm2,       xmm5
+        paddsw      xmm2,       xmm3
+
+        paddsw      xmm2,       xmm1
+        paddsw      xmm2,       xmm4
+
+        paddsw      xmm2,       xmm6
+        paddsw      xmm2,       xmm7
+
+        psraw       xmm2,       7
+        packuswb    xmm2,       xmm0              ; pack and saturate
+
+        movq        QWORD PTR [rdi], xmm2         ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(3) ;[dst_ptich]
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx         ; decrement count
+        jnz         vp8_filter_block1d8_v6_only_sse2_loop               ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -414,6 +908,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    ;SAVE_XMM                          ;xmm6, xmm7 are not used here.
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -453,164 +948,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_unpack_block1d8_h6_sse2
-;(
-;    unsigned char  *src_ptr,
-;    unsigned short *output_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned int    output_height,
-;    unsigned int    output_width
-;)
-global sym(vp8_unpack_block1d8_h6_sse2)
-sym(vp8_unpack_block1d8_h6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        mov         rsi,        arg(0) ;src_ptr
-        mov         rdi,        arg(1) ;output_ptr
-
-        movsxd      rcx,        dword ptr arg(3) ;output_height
-        movsxd      rax,        dword ptr arg(2) ;src_pixels_per_line            ; Pitch for Source
-
-        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(4) ;output_width            ; Pitch for Source
-%endif
-
-unpack_block1d8_h6_sse2_rowloop:
-        movq        xmm1,       MMWORD PTR [rsi]            ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
-        lea         rsi,        [rsi + rax]
-
-        punpcklbw   xmm1,       xmm0
-        movdqa      XMMWORD Ptr [rdi],         xmm1
-
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(4) ;[output_width]
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx
-        jnz         unpack_block1d8_h6_sse2_rowloop                ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_pack_block1d8_v6_sse2
-;(
-;    short *src_ptr,
-;    unsigned char *output_ptr,
-;    int dst_ptich,
-;    unsigned int pixels_per_line,
-;    unsigned int output_height,
-;    unsigned int output_width
-;)
-global sym(vp8_pack_block1d8_v6_sse2)
-sym(vp8_pack_block1d8_v6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
-        mov         rdi,        arg(1) ;output_ptr
-
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rcx,        DWORD PTR arg(4) ;[output_height]
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(5) ;output_width            ; Pitch for Source
-%endif
-
-pack_block1d8_v6_sse2_loop:
-        movdqa      xmm0,       XMMWORD PTR [rsi]
-        packuswb    xmm0,       xmm0
-
-        movq        QWORD PTR [rdi], xmm0         ; store the results in the destination
-        lea         rsi,        [rsi+rdx]
-
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(5) ;[output_width]
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx         ; decrement count
-        jnz         pack_block1d8_v6_sse2_loop               ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_pack_block1d16_v6_sse2
-;(
-;    short *src_ptr,
-;    unsigned char *output_ptr,
-;    int dst_ptich,
-;    unsigned int pixels_per_line,
-;    unsigned int output_height,
-;    unsigned int output_width
-;)
-global sym(vp8_pack_block1d16_v6_sse2)
-sym(vp8_pack_block1d16_v6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
-        mov         rdi,        arg(1) ;output_ptr
-
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rcx,        DWORD PTR arg(4) ;[output_height]
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(2) ;dst_pitch
-%endif
-
-pack_block1d16_v6_sse2_loop:
-        movdqa      xmm0,       XMMWORD PTR [rsi]
-        movdqa      xmm1,       XMMWORD PTR [rsi+16]
-
-        packuswb    xmm0,       xmm1
-        movdqa      XMMWORD PTR [rdi], xmm0         ; store the results in the destination
-
-        add         rsi,        rdx
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(2) ;dst_pitch
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx         ; decrement count
-        jnz         pack_block1d16_v6_sse2_loop               ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
+    ;RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -631,6 +969,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -878,6 +1217,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -898,6 +1238,7 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1021,6 +1362,7 @@
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
new file mode 100644
index 0000000..f7209cc
--- /dev/null
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -0,0 +1,1508 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP8_FILTER_WEIGHT 128
+%define VP8_FILTER_SHIFT  7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;
+; This is an implementation of some of the SSE optimizations first seen in ffvp8
+;
+;*************************************************************************************/
+;void vp8_filter_block1d8_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_h6_ssse3)
+sym(vp8_filter_block1d8_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4
+
+    movdqa      xmm7, [rd GLOBAL]
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    mov         rdi, arg(2)             ;output_ptr
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_h4_ssse3
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pmaddubsw   xmm1, xmm5
+
+    pshufb      xmm2, [shuf3b GLOBAL]
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+    jnz         filter_block1d8_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movdqa      xmm3, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm4, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h4_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm0
+    pshufb      xmm0, xmm3 ;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm4 ;[shuf3b GLOBAL]
+
+    pmaddubsw   xmm0, xmm5
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+
+    jnz         filter_block1d8_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d16_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_h6_ssse3)
+sym(vp8_filter_block1d16_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    mov         rdi, arg(2)             ;output_ptr
+    movdqa      xmm7, [rd GLOBAL]
+
+;;
+;;    cmp         esi, DWORD PTR [rax]
+;;    je          vp8_filter_block1d16_h4_ssse3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+    movdqa      xmm2, xmm1
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    paddsw      xmm0, xmm1
+    movdqa      xmm1, xmm3
+    pshufb      xmm3, [shuf1b GLOBAL]
+    paddsw      xmm0, xmm7
+    pmaddubsw   xmm3, xmm4
+    paddsw      xmm0, xmm2
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+    pmaddubsw   xmm2, xmm6
+
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm1
+    paddsw      xmm3, xmm7
+    paddsw      xmm3, xmm2
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm0, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm0
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h6_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    movdqa      xmm0, xmm3
+    pshufb      xmm3, [shuf3b GLOBAL]
+    pshufb      xmm0, [shuf2b GLOBAL]
+
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+
+    pmaddubsw   xmm0, xmm5
+    pmaddubsw   xmm3, xmm6
+
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm0
+    paddsw      xmm3, xmm7
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm1, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h4_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d4_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_h6_ssse3)
+sym(vp8_filter_block1d4_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    movdqa      xmm7, [rd GLOBAL]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_h4_ssse3
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+;xmm3 free
+filter_block1d4_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    pxor        xmm1, xmm1
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movd        DWORD PTR [rdi], xmm0
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d4_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm0, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm3, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d4_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, xmm0 ;;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm3 ;;[shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+
+    movd        DWORD PTR [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+;void vp8_filter_block1d16_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_v6_ssse3)
+sym(vp8_filter_block1d16_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d16_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+
+vp8_filter_block1d16_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2          ;store the results
+
+    movq        xmm1, MMWORD PTR [rsi + 8]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4 + 8]        ;F
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi+8], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+vp8_filter_block1d16_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    movq        xmm5, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm1, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, xmm3
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    punpcklbw   xmm5, xmm4                  ;B D
+    punpcklbw   xmm1, xmm0                  ;C E
+
+    pmaddubsw   xmm1, xmm6
+    pmaddubsw   xmm5, xmm7
+
+    movdqa      xmm4, [rd GLOBAL]
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm5, xmm1
+    paddsw      xmm5, xmm4
+    psraw       xmm5, 7
+    packuswb    xmm5, xmm5
+
+    punpcklqdq  xmm2, xmm5
+
+    movdqa       XMMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d8_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_v6_ssse3)
+sym(vp8_filter_block1d8_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+    movdqa      xmm4, [rd GLOBAL]
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, xmm4
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm5, [rd GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm5
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d4_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_v6_ssse3)
+sym(vp8_filter_block1d4_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_v4_ssse3
+
+    movq        mm5, MMWORD PTR [rax]         ;k0_k5
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d4_v6_ssse3_loop:
+    movd        mm1, DWORD PTR [rsi]                  ;A
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
+
+    movd        mm0, DWORD PTR [rax + rdx * 4]        ;F
+
+    movq        mm4, [rd GLOBAL]
+
+    pmaddubsw   mm3, mm6
+    punpcklbw   mm1, mm0                  ;A F
+    pmaddubsw   mm2, mm7
+    pmaddubsw   mm1, mm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm1
+    paddsw      mm2, mm4
+    psraw       mm2, 7
+    packuswb    mm2, mm2
+
+    movd        DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d4_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d4_v4_ssse3:
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
+    movq        mm5, MMWORD PTR [rd GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d4_v4_ssse3_loop:
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
+
+    pmaddubsw   mm3, mm6
+    pmaddubsw   mm2, mm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm5
+    psraw       mm2, 7
+    packuswb    mm2, mm2
+
+    movd        DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d4_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_bilinear_predict16x16_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    int   src_pixels_per_line,
+;    int  xoffset,
+;    int  yoffset,
+;    unsigned char *dst_ptr,
+;    int dst_pitch
+;)
+global sym(vp8_bilinear_predict16x16_ssse3)
+sym(vp8_bilinear_predict16x16_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+        movsxd      rax,        dword ptr arg(2)    ; xoffset
+
+        cmp         rax,        0                   ; skip first_pass filter if xoffset=0
+        je          b16x16_sp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; HFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        mov         rsi,        arg(0)              ; src_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm1,       [rax]
+
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+
+        cmp         rax,        0                   ; skip second_pass filter if yoffset=0
+        je          b16x16_fp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rdx,        dword ptr arg(1)    ; src_pixels_per_line
+
+        movdqa      xmm2,       [rax]
+
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(5)    ; dst_pitch
+%endif
+        movdqu      xmm3,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+        movdqa      xmm4,       xmm3
+
+        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
+        lea         rsi,        [rsi + rdx]         ; next line
+
+        punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+        pmaddubsw   xmm3,       xmm1                ; 00 02 04 06 08 10 12 14
+
+        punpckhbw   xmm4,       xmm5                ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
+        pmaddubsw   xmm4,       xmm1                ; 01 03 05 07 09 11 13 15
+
+        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
+
+        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
+
+        movdqa      xmm7,       xmm3
+        packuswb    xmm7,       xmm4                ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+        movdqu      xmm6,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+        movdqa      xmm4,       xmm6
+
+        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
+        lea         rsi,        [rsi + rdx]         ; next line
+
+        punpcklbw   xmm6,       xmm5
+        pmaddubsw   xmm6,       xmm1
+
+        punpckhbw   xmm4,       xmm5
+        pmaddubsw   xmm4,       xmm1
+
+        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
+
+        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
+
+        packuswb    xmm6,       xmm4
+        movdqa      xmm5,       xmm7
+
+        punpcklbw   xmm5,       xmm6
+        pmaddubsw   xmm5,       xmm2
+
+        punpckhbw   xmm7,       xmm6
+        pmaddubsw   xmm7,       xmm2
+
+        paddw       xmm5,       [rd GLOBAL]         ; xmm5 += round value
+        psraw       xmm5,       VP8_FILTER_SHIFT    ; xmm5 /= 128
+
+        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
+
+        packuswb    xmm5,       xmm7
+        movdqa      xmm7,       xmm6
+
+        movdqa      [rdi],      xmm5                ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(5)    ; dst_pitch
+%else
+        add         rdi,        r8
+%endif
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done
+
+b16x16_sp_only:
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        mov         rsi,        arg(0)              ; src_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm1,       [rax]               ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
+
+        ; get the first horizontal line done
+        movdqu      xmm2,       [rsi]               ; load row 0
+
+        lea         rsi,        [rsi + rax]         ; next line
+.next_row:
+        movdqu      xmm3,       [rsi]               ; load row + 1
+
+        movdqu      xmm4,       xmm2
+        punpcklbw   xmm4,       xmm3
+
+        pmaddubsw   xmm4,       xmm1
+        movdqu      xmm7,       [rsi + rax]         ; load row + 2
+
+        punpckhbw   xmm2,       xmm3
+        movdqu      xmm6,       xmm3
+
+        pmaddubsw   xmm2,       xmm1
+        punpcklbw   xmm6,       xmm7
+
+        paddw       xmm4,       [rd GLOBAL]
+        pmaddubsw   xmm6,       xmm1
+
+        psraw       xmm4,       VP8_FILTER_SHIFT
+        punpckhbw   xmm3,       xmm7
+
+        paddw       xmm2,       [rd GLOBAL]
+        pmaddubsw   xmm3,       xmm1
+
+        psraw       xmm2,       VP8_FILTER_SHIFT
+        paddw       xmm6,       [rd GLOBAL]
+
+        packuswb    xmm4,       xmm2
+        psraw       xmm6,       VP8_FILTER_SHIFT
+
+        movdqa      [rdi],      xmm4                ; store row 0
+        paddw       xmm3,       [rd GLOBAL]
+
+        psraw       xmm3,       VP8_FILTER_SHIFT
+        lea         rsi,        [rsi + 2*rax]
+
+        packuswb    xmm6,       xmm3
+        movdqa      xmm2,       xmm7
+
+        movdqa      [rdi + rdx],xmm6                ; store row 1
+        lea         rdi,        [rdi + 2*rdx]
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done
+
+b16x16_fp_only:
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
+
+.next_row:
+        movdqu      xmm2,       [rsi]               ; row 0
+        movdqa      xmm3,       xmm2
+
+        movdqu      xmm4,       [rsi + 1]           ; row 0 + 1
+        lea         rsi,        [rsi + rax]         ; next line
+
+        punpcklbw   xmm2,       xmm4
+        movdqu      xmm5,       [rsi]               ; row 1
+
+        pmaddubsw   xmm2,       xmm1
+        movdqa      xmm6,       xmm5
+
+        punpckhbw   xmm3,       xmm4
+        movdqu      xmm7,       [rsi + 1]           ; row 1 + 1
+
+        pmaddubsw   xmm3,       xmm1
+        paddw       xmm2,       [rd GLOBAL]
+
+        psraw       xmm2,       VP8_FILTER_SHIFT
+        punpcklbw   xmm5,       xmm7
+
+        paddw       xmm3,       [rd GLOBAL]
+        pmaddubsw   xmm5,       xmm1
+
+        psraw       xmm3,       VP8_FILTER_SHIFT
+        punpckhbw   xmm6,       xmm7
+
+        packuswb    xmm2,       xmm3
+        pmaddubsw   xmm6,       xmm1
+
+        movdqa      [rdi],      xmm2                ; store the results in the destination
+        paddw       xmm5,       [rd GLOBAL]
+
+        lea         rdi,        [rdi + rdx]         ; dst_pitch
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        paddw       xmm6,       [rd GLOBAL]
+        psraw       xmm6,       VP8_FILTER_SHIFT
+
+        packuswb    xmm5,       xmm6
+        lea         rsi,        [rsi + rax]         ; next line
+
+        movdqa      [rdi],      xmm5                ; store the results in the destination
+        lea         rdi,        [rdi + rdx]         ; dst_pitch
+
+        cmp         rdi,        rcx
+
+        jne         .next_row
+
+done:
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_bilinear_predict8x8_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    int   src_pixels_per_line,
+;    int  xoffset,
+;    int  yoffset,
+;    unsigned char *dst_ptr,
+;    int dst_pitch
+;)
+global sym(vp8_bilinear_predict8x8_ssse3)
+sym(vp8_bilinear_predict8x8_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 144                         ; reserve 144 bytes
+
+        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+
+        mov         rsi,        arg(0) ;src_ptr
+        movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
+
+    ;Read 9-line unaligned data in and put them on stack. This gives a big
+    ;performance boost.
+        movdqu      xmm0,       [rsi]
+        lea         rax,        [rdx + rdx*2]
+        movdqu      xmm1,       [rsi+rdx]
+        movdqu      xmm2,       [rsi+rdx*2]
+        add         rsi,        rax
+        movdqu      xmm3,       [rsi]
+        movdqu      xmm4,       [rsi+rdx]
+        movdqu      xmm5,       [rsi+rdx*2]
+        add         rsi,        rax
+        movdqu      xmm6,       [rsi]
+        movdqu      xmm7,       [rsi+rdx]
+
+        movdqa      XMMWORD PTR [rsp],            xmm0
+
+        movdqu      xmm0,       [rsi+rdx*2]
+
+        movdqa      XMMWORD PTR [rsp+16],         xmm1
+        movdqa      XMMWORD PTR [rsp+32],         xmm2
+        movdqa      XMMWORD PTR [rsp+48],         xmm3
+        movdqa      XMMWORD PTR [rsp+64],         xmm4
+        movdqa      XMMWORD PTR [rsp+80],         xmm5
+        movdqa      XMMWORD PTR [rsp+96],         xmm6
+        movdqa      XMMWORD PTR [rsp+112],        xmm7
+        movdqa      XMMWORD PTR [rsp+128],        xmm0
+
+        movsxd      rax,        dword ptr arg(2)    ; xoffset
+        cmp         rax,        0                   ; skip first_pass filter if xoffset=0
+        je          b8x8_sp_only
+
+        shl         rax,        4
+        add         rax,        rcx                 ; HFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm0,       [rax]
+
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        cmp         rax,        0                   ; skip second_pass filter if yoffset=0
+        je          b8x8_fp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+
+        movdqa      xmm1,       [rax]
+
+        ; get the first horizontal line done
+        movdqa      xmm3,       [rsp]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+        movdqa      xmm5,       xmm3                ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx
+
+        psrldq      xmm5,       1
+        lea         rsp,        [rsp + 16]          ; next line
+
+        punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+        pmaddubsw   xmm3,       xmm0                ; 00 02 04 06 08 10 12 14
+
+        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
+
+        movdqa      xmm7,       xmm3
+        packuswb    xmm7,       xmm7                ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+        movdqa      xmm6,       [rsp]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+        lea         rsp,        [rsp + 16]          ; next line
+
+        movdqa      xmm5,       xmm6
+
+        psrldq      xmm5,       1
+
+        punpcklbw   xmm6,       xmm5
+        pmaddubsw   xmm6,       xmm0
+
+        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
+
+        packuswb    xmm6,       xmm6
+
+        punpcklbw   xmm7,       xmm6
+        pmaddubsw   xmm7,       xmm1
+
+        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
+
+        packuswb    xmm7,       xmm7
+
+        movq        [rdi],      xmm7                ; store the results in the destination
+        lea         rdi,        [rdi + rdx]
+
+        movdqa      xmm7,       xmm6
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done8x8
+
+b8x8_sp_only:
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        mov         rdi,        arg(4) ;dst_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm0,       [rax]               ; VFilter
+
+        movq        xmm1,       XMMWORD PTR [rsp]
+        movq        xmm2,       XMMWORD PTR [rsp+16]
+
+        movq        xmm3,       XMMWORD PTR [rsp+32]
+        punpcklbw   xmm1,       xmm2
+
+        movq        xmm4,       XMMWORD PTR [rsp+48]
+        punpcklbw   xmm2,       xmm3
+
+        movq        xmm5,       XMMWORD PTR [rsp+64]
+        punpcklbw   xmm3,       xmm4
+
+        movq        xmm6,       XMMWORD PTR [rsp+80]
+        punpcklbw   xmm4,       xmm5
+
+        movq        xmm7,       XMMWORD PTR [rsp+96]
+        punpcklbw   xmm5,       xmm6
+
+        pmaddubsw   xmm1,       xmm0
+        pmaddubsw   xmm2,       xmm0
+
+        pmaddubsw   xmm3,       xmm0
+        pmaddubsw   xmm4,       xmm0
+
+        pmaddubsw   xmm5,       xmm0
+        punpcklbw   xmm6,       xmm7
+
+        pmaddubsw   xmm6,       xmm0
+        paddw       xmm1,       [rd GLOBAL]
+
+        paddw       xmm2,       [rd GLOBAL]
+        psraw       xmm1,       VP8_FILTER_SHIFT
+
+        paddw       xmm3,       [rd GLOBAL]
+        psraw       xmm2,       VP8_FILTER_SHIFT
+
+        paddw       xmm4,       [rd GLOBAL]
+        psraw       xmm3,       VP8_FILTER_SHIFT
+
+        paddw       xmm5,       [rd GLOBAL]
+        psraw       xmm4,       VP8_FILTER_SHIFT
+
+        paddw       xmm6,       [rd GLOBAL]
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        psraw       xmm6,       VP8_FILTER_SHIFT
+        packuswb    xmm1,       xmm1
+
+        packuswb    xmm2,       xmm2
+        movq        [rdi],      xmm1
+
+        packuswb    xmm3,       xmm3
+        movq        [rdi+rdx],  xmm2
+
+        packuswb    xmm4,       xmm4
+        movq        xmm1,       XMMWORD PTR [rsp+112]
+
+        lea         rdi,        [rdi + 2*rdx]
+        movq        xmm2,       XMMWORD PTR [rsp+128]
+
+        packuswb    xmm5,       xmm5
+        movq        [rdi],      xmm3
+
+        packuswb    xmm6,       xmm6
+        movq        [rdi+rdx],  xmm4
+
+        lea         rdi,        [rdi + 2*rdx]
+        punpcklbw   xmm7,       xmm1
+
+        movq        [rdi],      xmm5
+        pmaddubsw   xmm7,       xmm0
+
+        movq        [rdi+rdx],  xmm6
+        punpcklbw   xmm1,       xmm2
+
+        pmaddubsw   xmm1,       xmm0
+        paddw       xmm7,       [rd GLOBAL]
+
+        psraw       xmm7,       VP8_FILTER_SHIFT
+        paddw       xmm1,       [rd GLOBAL]
+
+        psraw       xmm1,       VP8_FILTER_SHIFT
+        packuswb    xmm7,       xmm7
+
+        packuswb    xmm1,       xmm1
+        lea         rdi,        [rdi + 2*rdx]
+
+        movq        [rdi],      xmm7
+
+        movq        [rdi+rdx],  xmm1
+        lea         rsp,        [rsp + 144]
+
+        jmp         done8x8
+
+b8x8_fp_only:
+        lea         rcx,        [rdi+rdx*8]
+
+.next_row:
+        movdqa      xmm1,       XMMWORD PTR [rsp]
+        movdqa      xmm3,       XMMWORD PTR [rsp+16]
+
+        movdqa      xmm2,       xmm1
+        movdqa      xmm5,       XMMWORD PTR [rsp+32]
+
+        psrldq      xmm2,       1
+        movdqa      xmm7,       XMMWORD PTR [rsp+48]
+
+        movdqa      xmm4,       xmm3
+        psrldq      xmm4,       1
+
+        movdqa      xmm6,       xmm5
+        psrldq      xmm6,       1
+
+        punpcklbw   xmm1,       xmm2
+        pmaddubsw   xmm1,       xmm0
+
+        punpcklbw   xmm3,       xmm4
+        pmaddubsw   xmm3,       xmm0
+
+        punpcklbw   xmm5,       xmm6
+        pmaddubsw   xmm5,       xmm0
+
+        movdqa      xmm2,       xmm7
+        psrldq      xmm2,       1
+
+        punpcklbw   xmm7,       xmm2
+        pmaddubsw   xmm7,       xmm0
+
+        paddw       xmm1,       [rd GLOBAL]
+        psraw       xmm1,       VP8_FILTER_SHIFT
+
+        paddw       xmm3,       [rd GLOBAL]
+        psraw       xmm3,       VP8_FILTER_SHIFT
+
+        paddw       xmm5,       [rd GLOBAL]
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        paddw       xmm7,       [rd GLOBAL]
+        psraw       xmm7,       VP8_FILTER_SHIFT
+
+        packuswb    xmm1,       xmm1
+        packuswb    xmm3,       xmm3
+
+        packuswb    xmm5,       xmm5
+        movq        [rdi],      xmm1
+
+        packuswb    xmm7,       xmm7
+        movq        [rdi+rdx],  xmm3
+
+        lea         rdi,        [rdi + 2*rdx]
+        movq        [rdi],      xmm5
+
+        lea         rsp,        [rsp + 4*16]
+        movq        [rdi+rdx],  xmm7
+
+        lea         rdi,        [rdi + 2*rdx]
+        cmp         rdi,        rcx
+
+        jne         .next_row
+
+        lea         rsp,        [rsp + 16]
+
+done8x8:
+    ;add rsp, 144
+    pop         rsp
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+    db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
+shuf2b:
+    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3b:
+    db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
+
+align 16
+rd:
+    times 8 dw 0x40
+
+align 16
+k0_k5:
+    times 8 db 0, 0             ;placeholder
+    times 8 db 0, 0
+    times 8 db 2, 1
+    times 8 db 0, 0
+    times 8 db 3, 3
+    times 8 db 0, 0
+    times 8 db 1, 2
+    times 8 db 0, 0
+k1_k3:
+    times 8 db  0,    0         ;placeholder
+    times 8 db  -6,  12
+    times 8 db -11,  36
+    times 8 db  -9,  50
+    times 8 db -16,  77
+    times 8 db  -6,  93
+    times 8 db  -8, 108
+    times 8 db  -1, 123
+k2_k4:
+    times 8 db 128,    0        ;placeholder
+    times 8 db 123,   -1
+    times 8 db 108,   -8
+    times 8 db  93,   -6
+    times 8 db  77,  -16
+    times 8 db  50,   -9
+    times 8 db  36,  -11
+    times 8 db  12,   -6
+align 16
+vp8_bilinear_filters_ssse3:
+    times 8 db 128, 0
+    times 8 db 112, 16
+    times 8 db 96,  32
+    times 8 db 80,  48
+    times 8 db 64,  64
+    times 8 db 48,  80
+    times 8 db 32,  96
+    times 8 db 16,  112
+
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index efa7b2e..75991cc 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -85,4 +86,37 @@
 #endif
 #endif
 
+#if HAVE_SSSE3
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
+extern prototype_subpixel_predict(vp8_bilinear_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x8_ssse3);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
+
+#undef  vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
+
+#undef  vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
+
+#undef  vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
+
+
+#undef  vp8_subpix_bilinear16x16
+#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_ssse3
+
+#undef  vp8_subpix_bilinear8x8
+#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_ssse3
+
+#endif
+#endif
+
+
+
 #endif
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 68454f7..950d962 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -67,6 +68,17 @@
     unsigned int output_width,
     const short    *vp8_filter
 );
+extern void vp8_filter_block1d16_v6_sse2
+(
+    unsigned short *src_ptr,
+    unsigned char *output_ptr,
+    int dst_ptich,
+    unsigned int pixels_per_line,
+    unsigned int pixel_step,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short    *vp8_filter
+);
 extern void vp8_unpack_block1d16_h6_sse2
 (
     unsigned char  *src_ptr,
@@ -75,31 +87,32 @@
     unsigned int    output_height,
     unsigned int    output_width
 );
-extern void vp8_unpack_block1d8_h6_sse2
+extern void vp8_filter_block1d8_h6_only_sse2
 (
     unsigned char  *src_ptr,
-    unsigned short *output_ptr,
     unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    int dst_ptich,
     unsigned int    output_height,
-    unsigned int    output_width
+    const short    *vp8_filter
 );
-extern void vp8_pack_block1d8_v6_sse2
+extern void vp8_filter_block1d16_h6_only_sse2
 (
-    unsigned short *src_ptr,
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    int dst_ptich,
+    unsigned int    output_height,
+    const short    *vp8_filter
+);
+extern void vp8_filter_block1d8_v6_only_sse2
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pixels_per_line,
     unsigned char *output_ptr,
     int dst_ptich,
-    unsigned int pixels_per_line,
-    unsigned int output_height,
-    unsigned int output_width
-);
-extern void vp8_pack_block1d16_v6_sse2
-(
-    unsigned short *src_ptr,
-    unsigned char *output_ptr,
-    int dst_ptich,
-    unsigned int pixels_per_line,
-    unsigned int output_height,
-    unsigned int output_width
+    unsigned int   output_height,
+    const short   *vp8_filter
 );
 extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
 
@@ -246,23 +259,26 @@
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, 16, VFilter);
-        vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
-    }
-    else
-    {
-        vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32,  16, 16);
+        vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
+        vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
     }
 }
 
@@ -282,25 +298,26 @@
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 13, 16);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+        vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
     }
-    else
-    {
-        vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16,  8, dst_pitch);
-    }
-
-
 }
 
 
@@ -319,24 +336,218 @@
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 9, 16);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+        vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
+    }
+}
+
+#endif
+
+#if HAVE_SSSE3
+
+extern void vp8_filter_block1d8_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+extern void vp8_filter_block1d8_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+void vp8_sixtap_predict16x16_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
+            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
+        }
     }
     else
     {
-        vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16,  4, dst_pitch);
+        // Second-pass only
+        vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
     }
+}
 
+void vp8_sixtap_predict8x8_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
+        }
+        else
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
+    }
+}
+
+
+void vp8_sixtap_predict8x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+    }
+}
+
+void vp8_sixtap_predict4x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
+
+  if (xoffset)
+  {
+      if (yoffset)
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
+          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
+      }
+      else
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+      }
+  }
+  else
+  {
+      vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+  }
 
 }
+
 #endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 5312e06..b983bc2 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -26,6 +27,7 @@
     int mmx_enabled = flags & HAS_MMX;
     int xmm_enabled = flags & HAS_SSE;
     int wmt_enabled = flags & HAS_SSE2;
+    int SSSE3Enabled = flags & HAS_SSSE3;
 
     /* Note:
      *
@@ -41,7 +43,7 @@
     {
         rtcd->idct.idct1        = vp8_short_idct4x4llm_1_mmx;
         rtcd->idct.idct16       = vp8_short_idct4x4llm_mmx;
-        rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx;
+        rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
         rtcd->idct.iwalsh1     = vp8_short_inv_walsh4x4_1_mmx;
 
@@ -113,5 +115,19 @@
     }
 
 #endif
+
+#if HAVE_SSSE3
+
+    if (SSSE3Enabled)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_ssse3;
+    }
+#endif
+
 #endif
 }
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index eb3f030..de3648a 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index 143e33e..6515804 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
new file mode 100644
index 0000000..6bebda2
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -0,0 +1,218 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT |vp8_dequant_dc_idct_add_v6|
+
+    AREA |.text|, CODE, READONLY
+
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride, int Dc)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch  ; +4 = 40
+; sp + 40 = stride  ; +4 = 44
+; sp + 44 = Dc  ; +4 = 48
+
+
+|vp8_dequant_dc_idct_add_v6| PROC
+    stmdb   sp!, {r4-r11, lr}
+
+    ldr     r6, [sp, #44]
+
+    ldr     r4, [r0]                ;input
+    ldr     r5, [r1], #4            ;dq
+
+    sub     sp, sp, #4
+    str     r3, [sp]
+
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    mov     r12, #3
+
+vp8_dequant_dc_add_loop
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    subs    r12, r12, #1
+
+    ldrne   r4, [r0, #4]
+    ldrne   r5, [r1], #4
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    bne     vp8_dequant_dc_add_loop
+
+    sub     r0, r0, #32
+    mov     r1, r0
+
+; short_idct4x4llm_v6_dual
+    ldr     r3, cospi8sqrt2minus1
+    ldr     r4, sinpi8sqrt2
+    ldr     r6, [r0, #8]
+    mov     r5, #2
+vp8_dequant_dc_idct_loop1_v6
+    ldr     r12, [r0, #24]
+    ldr     r14, [r0, #16]
+    smulwt  r9, r3, r6
+    smulwb  r7, r3, r6
+    smulwt  r10, r4, r6
+    smulwb  r8, r4, r6
+    pkhbt   r7, r7, r9, lsl #16
+    smulwt  r11, r3, r12
+    pkhbt   r8, r8, r10, lsl #16
+    uadd16  r6, r6, r7
+    smulwt  r7, r4, r12
+    smulwb  r9, r3, r12
+    smulwb  r10, r4, r12
+    subs    r5, r5, #1
+    pkhbt   r9, r9, r11, lsl #16
+    ldr     r11, [r0], #4
+    pkhbt   r10, r10, r7, lsl #16
+    uadd16  r7, r12, r9
+    usub16  r7, r8, r7
+    uadd16  r6, r6, r10
+    uadd16  r10, r11, r14
+    usub16  r8, r11, r14
+    uadd16  r9, r10, r6
+    usub16  r10, r10, r6
+    uadd16  r6, r8, r7
+    usub16  r7, r8, r7
+    str     r6, [r1, #8]
+    ldrne   r6, [r0, #8]
+    str     r7, [r1, #16]
+    str     r10, [r1, #24]
+    str     r9, [r1], #4
+    bne     vp8_dequant_dc_idct_loop1_v6
+
+    mov     r5, #2
+    sub     r0, r1, #8
+vp8_dequant_dc_idct_loop2_v6
+    ldr     r6, [r0], #4
+    ldr     r7, [r0], #4
+    ldr     r8, [r0], #4
+    ldr     r9, [r0], #4
+    smulwt  r1, r3, r6
+    smulwt  r12, r4, r6
+    smulwt  lr, r3, r8
+    smulwt  r10, r4, r8
+    pkhbt   r11, r8, r6, lsl #16
+    pkhbt   r1, lr, r1, lsl #16
+    pkhbt   r12, r10, r12, lsl #16
+    pkhtb   r6, r6, r8, asr #16
+    uadd16  r6, r1, r6
+    pkhbt   lr, r9, r7, lsl #16
+    uadd16  r10, r11, lr
+    usub16  lr, r11, lr
+    pkhtb   r8, r7, r9, asr #16
+    subs    r5, r5, #1
+    smulwt  r1, r3, r8
+    smulwb  r7, r3, r8
+    smulwt  r11, r4, r8
+    smulwb  r9, r4, r8
+    pkhbt   r1, r7, r1, lsl #16
+    uadd16  r8, r1, r8
+    pkhbt   r11, r9, r11, lsl #16
+    usub16  r1, r12, r8
+    uadd16  r8, r11, r6
+    ldr     r9, c0x00040004
+    ldr     r12, [sp, #40]
+    uadd16  r6, r10, r8
+    usub16  r7, r10, r8
+    uadd16  r7, r7, r9
+    uadd16  r6, r6, r9
+    uadd16  r10, r14, r1
+    usub16  r1, r14, r1
+    uadd16  r10, r10, r9
+    uadd16  r1, r1, r9
+    ldr     r11, [r2], r12
+    mov     r8, r7, asr #3
+    pkhtb   r9, r8, r10, asr #19
+    mov     r8, r1, asr #3
+    pkhtb   r8, r8, r6, asr #19
+    uxtb16  lr, r11, ror #8
+    qadd16  r9, r9, lr
+    uxtb16  lr, r11
+    qadd16  r8, r8, lr
+    usat16  r9, #8, r9
+    usat16  r8, #8, r8
+    orr     r9, r8, r9, lsl #8
+    ldr     r11, [r2], r12
+    ldr     lr, [sp]
+    ldr     r12, [sp, #44]
+    mov     r7, r7, lsl #16
+    mov     r1, r1, lsl #16
+    mov     r10, r10, lsl #16
+    mov     r6, r6, lsl #16
+    mov     r7, r7, asr #3
+    pkhtb   r7, r7, r10, asr #19
+    mov     r1, r1, asr #3
+    pkhtb   r1, r1, r6, asr #19
+    uxtb16  r8, r11, ror #8
+    qadd16  r7, r7, r8
+    uxtb16  r8, r11
+    qadd16  r1, r1, r8
+    usat16  r7, #8, r7
+    usat16  r1, #8, r1
+    orr     r1, r1, r7, lsl #8
+    str     r9, [lr], r12
+    str     r1, [lr], r12
+    str     lr, [sp]
+    bne     vp8_dequant_dc_idct_loop2_v6
+
+; vpx_memset
+    sub     r0, r0, #32
+    add     sp, sp, #4
+
+    mov     r12, #0
+    str     r12, [r0]
+    str     r12, [r0, #4]
+    str     r12, [r0, #8]
+    str     r12, [r0, #12]
+    str     r12, [r0, #16]
+    str     r12, [r0, #20]
+    str     r12, [r0, #24]
+    str     r12, [r0, #28]
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP    ; |vp8_dequant_dc_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2       DCD 0x00008A8C
+c0x00040004       DCD 0x00040004
+
+    END
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
new file mode 100644
index 0000000..47b671c
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -0,0 +1,196 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT |vp8_dequant_idct_add_v6|
+
+    AREA |.text|, CODE, READONLY
+;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch  ; +4 = 40
+; sp + 40 = stride  ; +4 = 44
+
+
+|vp8_dequant_idct_add_v6| PROC
+    stmdb   sp!, {r4-r11, lr}
+
+    ldr     r4, [r0]                ;input
+    ldr     r5, [r1], #4            ;dq
+
+    sub     sp, sp, #4
+    str     r3, [sp]
+
+    mov     r12, #4
+
+vp8_dequant_add_loop
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    subs    r12, r12, #1
+
+    ldrne   r4, [r0, #4]
+    ldrne   r5, [r1], #4
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    bne     vp8_dequant_add_loop
+
+    sub     r0, r0, #32
+    mov     r1, r0
+
+; short_idct4x4llm_v6_dual
+    ldr     r3, cospi8sqrt2minus1
+    ldr     r4, sinpi8sqrt2
+    ldr     r6, [r0, #8]
+    mov     r5, #2
+vp8_dequant_idct_loop1_v6
+    ldr     r12, [r0, #24]
+    ldr     r14, [r0, #16]
+    smulwt  r9, r3, r6
+    smulwb  r7, r3, r6
+    smulwt  r10, r4, r6
+    smulwb  r8, r4, r6
+    pkhbt   r7, r7, r9, lsl #16
+    smulwt  r11, r3, r12
+    pkhbt   r8, r8, r10, lsl #16
+    uadd16  r6, r6, r7
+    smulwt  r7, r4, r12
+    smulwb  r9, r3, r12
+    smulwb  r10, r4, r12
+    subs    r5, r5, #1
+    pkhbt   r9, r9, r11, lsl #16
+    ldr     r11, [r0], #4
+    pkhbt   r10, r10, r7, lsl #16
+    uadd16  r7, r12, r9
+    usub16  r7, r8, r7
+    uadd16  r6, r6, r10
+    uadd16  r10, r11, r14
+    usub16  r8, r11, r14
+    uadd16  r9, r10, r6
+    usub16  r10, r10, r6
+    uadd16  r6, r8, r7
+    usub16  r7, r8, r7
+    str     r6, [r1, #8]
+    ldrne   r6, [r0, #8]
+    str     r7, [r1, #16]
+    str     r10, [r1, #24]
+    str     r9, [r1], #4
+    bne     vp8_dequant_idct_loop1_v6
+
+    mov     r5, #2
+    sub     r0, r1, #8
+vp8_dequant_idct_loop2_v6
+    ldr     r6, [r0], #4
+    ldr     r7, [r0], #4
+    ldr     r8, [r0], #4
+    ldr     r9, [r0], #4
+    smulwt  r1, r3, r6
+    smulwt  r12, r4, r6
+    smulwt  lr, r3, r8
+    smulwt  r10, r4, r8
+    pkhbt   r11, r8, r6, lsl #16
+    pkhbt   r1, lr, r1, lsl #16
+    pkhbt   r12, r10, r12, lsl #16
+    pkhtb   r6, r6, r8, asr #16
+    uadd16  r6, r1, r6
+    pkhbt   lr, r9, r7, lsl #16
+    uadd16  r10, r11, lr
+    usub16  lr, r11, lr
+    pkhtb   r8, r7, r9, asr #16
+    subs    r5, r5, #1
+    smulwt  r1, r3, r8
+    smulwb  r7, r3, r8
+    smulwt  r11, r4, r8
+    smulwb  r9, r4, r8
+    pkhbt   r1, r7, r1, lsl #16
+    uadd16  r8, r1, r8
+    pkhbt   r11, r9, r11, lsl #16
+    usub16  r1, r12, r8
+    uadd16  r8, r11, r6
+    ldr     r9, c0x00040004
+    ldr     r12, [sp, #40]
+    uadd16  r6, r10, r8
+    usub16  r7, r10, r8
+    uadd16  r7, r7, r9
+    uadd16  r6, r6, r9
+    uadd16  r10, r14, r1
+    usub16  r1, r14, r1
+    uadd16  r10, r10, r9
+    uadd16  r1, r1, r9
+    ldr     r11, [r2], r12
+    mov     r8, r7, asr #3
+    pkhtb   r9, r8, r10, asr #19
+    mov     r8, r1, asr #3
+    pkhtb   r8, r8, r6, asr #19
+    uxtb16  lr, r11, ror #8
+    qadd16  r9, r9, lr
+    uxtb16  lr, r11
+    qadd16  r8, r8, lr
+    usat16  r9, #8, r9
+    usat16  r8, #8, r8
+    orr     r9, r8, r9, lsl #8
+    ldr     r11, [r2], r12
+    ldr     lr, [sp]
+    ldr     r12, [sp, #44]
+    mov     r7, r7, lsl #16
+    mov     r1, r1, lsl #16
+    mov     r10, r10, lsl #16
+    mov     r6, r6, lsl #16
+    mov     r7, r7, asr #3
+    pkhtb   r7, r7, r10, asr #19
+    mov     r1, r1, asr #3
+    pkhtb   r1, r1, r6, asr #19
+    uxtb16  r8, r11, ror #8
+    qadd16  r7, r7, r8
+    uxtb16  r8, r11
+    qadd16  r1, r1, r8
+    usat16  r7, #8, r7
+    usat16  r1, #8, r1
+    orr     r1, r1, r7, lsl #8
+    str     r9, [lr], r12
+    str     r1, [lr], r12
+    str     lr, [sp]
+    bne     vp8_dequant_idct_loop2_v6
+
+; vpx_memset
+    sub     r0, r0, #32
+    add     sp, sp, #4
+
+    mov     r12, #0
+    str     r12, [r0]
+    str     r12, [r0, #4]
+    str     r12, [r0, #8]
+    str     r12, [r0, #12]
+    str     r12, [r0, #16]
+    str     r12, [r0, #20]
+    str     r12, [r0, #24]
+    str     r12, [r0, #28]
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP    ; |vp8_dequant_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2       DCD 0x00008A8C
+c0x00040004       DCD 0x00040004
+
+    END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
deleted file mode 100644
index 3daa9b3..0000000
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ /dev/null
@@ -1,202 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_dc_idct_v6|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
-|vp8_dequant_dc_idct_v6| PROC
-    stmdb   sp!, {r4-r11, lr}
-
-    ldr     r6, [sp, #36]           ;load Dc
-
-    ldr     r4, [r0]                ;input
-    ldr     r5, [r1], #4            ;dq
-
-    sub     sp, sp, #4
-    str     r0, [sp]
-
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    mov     r12, #3
-
-dequant_dc_idct_loop
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    subs    r12, r12, #1
-
-    ldrne   r4, [r0, #4]
-    ldrne   r5, [r1], #4
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    bne     dequant_dc_idct_loop
-
-    sub     r0, r0, #32
-    mov     r1, r2
-    mov     r2, r3
-
-; short_idct4x4llm_v6_dual
-
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-    mov r5, #0x2    ; i=2                           i
-loop1_dual_11
-    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
-    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
-    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
-
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
-    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
-    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
-    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
-    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
-    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
-    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
-    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
-    subs    r5, r5, #0x1    ; i--                           --
-    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
-    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
-    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
-    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
-    usub16  r7, r8, r7  ; c                                 c
-    uadd16  r6, r6, r10 ; d                             d
-    uadd16  r10, r11, r14   ; a                                             a
-    usub16  r8, r11, r14    ; b                                     b
-    uadd16  r9, r10, r6 ; a+d                                           a+d
-    usub16  r10, r10, r6    ; a-d                                               a-d
-    uadd16  r6, r8, r7  ; b+c                               b+c
-    usub16  r7, r8, r7  ; b-c                                   b-c
-    str r6, [r1, r2]    ; o5 | o4
-    add r6, r2, r2  ; pitch * 2                             p2
-    str r7, [r1, r6]    ; o9 | o8
-    add r6,  r6, r2 ; pitch * 3                             p3
-    str r10, [r1, r6]   ; o13 | o12
-    str r9, [r1], #0x4  ; o1 | o0           ++
-    bne loop1_dual_11   ;
-    mov r5, #0x2    ; i=2                           i
-    sub r0, r1, #8  ; reset input/output        i/o
-loop2_dual_22
-    ldr r6, [r0, r2]    ; i5 | i4                               5|4
-    ldr r1, [r0]    ; i1 | i0           1|0
-    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
-    add r14, r2, #0x4   ; pitch + 2                                                             p+2
-    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
-    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
-    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
-    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 ©                                     tc1
-    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
-    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
-    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
-    uadd16  r10, r11, r9    ; a                                             a
-    usub16  r9, r11, r9 ; b                                         b
-    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
-    subs    r5, r5, #0x1    ; i--                           --
-    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
-    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
-    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
-    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
-
-    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
-    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
-    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
-    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
-    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
-    uadd16  r7, r10, r6 ; a+d                                   a+d
-    mov r8, #0x4    ; set up 4's                                        4
-    orr r8, r8, #0x40000    ;                                       4|4
-    usub16  r6, r10, r6 ; a-d                               a-d
-    uadd16  r6, r6, r8  ; a-d+4                             3|7
-    uadd16  r7, r7, r8  ; a+d+4                                 0|4
-    uadd16  r10, r9, r12    ; b+c                                               b+c
-    usub16  r1, r9, r12 ; b-c           b-c
-    uadd16  r10, r10, r8    ; b+c+4                                             1|5
-    uadd16  r1, r1, r8  ; b-c+4         2|6
-    mov r8, r10, asr #19    ; o1 >> 3
-    strh    r8, [r0, #2]    ; o1
-    mov r8, r1, asr #19 ; o2 >> 3
-    strh    r8, [r0, #4]    ; o2
-    mov r8, r6, asr #19 ; o3 >> 3
-    strh    r8, [r0, #6]    ; o3
-    mov r8, r7, asr #19 ; o0 >> 3
-    strh    r8, [r0], r2    ; o0        +p
-    sxth    r10, r10    ;
-    mov r8, r10, asr #3 ; o5 >> 3
-    strh    r8, [r0, #2]    ; o5
-    sxth    r1, r1  ;
-    mov r8, r1, asr #3  ; o6 >> 3
-    strh    r8, [r0, #4]    ; o6
-    sxth    r6, r6  ;
-    mov r8, r6, asr #3  ; o7 >> 3
-    strh    r8, [r0, #6]    ; o7
-    sxth    r7, r7  ;
-    mov r8, r7, asr #3  ; o4 >> 3
-    strh    r8, [r0], r2    ; o4        +p
-;;;;;   subs    r5, r5, #0x1    ; i--                           --
-    bne loop2_dual_22   ;
-
-
-;vpx_memset
-    ldr     r0, [sp]
-    add     sp, sp, #4
-
-    mov     r12, #0
-    str     r12, [r0]
-    str     r12, [r0, #4]
-    str     r12, [r0, #8]
-    str     r12, [r0, #12]
-    str     r12, [r0, #16]
-    str     r12, [r0, #20]
-    str     r12, [r0, #24]
-    str     r12, [r0, #28]
-
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
-
-    ENDP    ;|vp8_dequant_dc_idct_v68|
-
-    END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
deleted file mode 100644
index 61bb48d..0000000
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_idct_v6|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
-|vp8_dequant_idct_v6| PROC
-    stmdb   sp!, {r4-r11, lr}
-
-    ldr     r4, [r0]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    sub     sp, sp, #4
-    str     r0, [sp]
-
-    mov     r12, #4
-
-dequant_idct_loop
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4        ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    subs    r12, r12, #1
-
-    ldrne   r4, [r0, #4]
-    ldrne   r5, [r1], #4
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    bne     dequant_idct_loop
-
-    sub     r0, r0, #32
-    mov     r1, r2
-    mov     r2, r3
-
-; short_idct4x4llm_v6_dual
-
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-    mov r5, #0x2    ; i=2                           i
-loop1_dual_1
-    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
-    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
-    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
-
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
-    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
-    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
-    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
-    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
-    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
-    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
-    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
-    subs    r5, r5, #0x1    ; i--                           --
-    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
-    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
-    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
-    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
-    usub16  r7, r8, r7  ; c                                 c
-    uadd16  r6, r6, r10 ; d                             d
-    uadd16  r10, r11, r14   ; a                                             a
-    usub16  r8, r11, r14    ; b                                     b
-    uadd16  r9, r10, r6 ; a+d                                           a+d
-    usub16  r10, r10, r6    ; a-d                                               a-d
-    uadd16  r6, r8, r7  ; b+c                               b+c
-    usub16  r7, r8, r7  ; b-c                                   b-c
-    str r6, [r1, r2]    ; o5 | o4
-    add r6, r2, r2  ; pitch * 2                             p2
-    str r7, [r1, r6]    ; o9 | o8
-    add r6,  r6, r2 ; pitch * 3                             p3
-    str r10, [r1, r6]   ; o13 | o12
-    str r9, [r1], #0x4  ; o1 | o0           ++
-    bne loop1_dual_1    ;
-    mov r5, #0x2    ; i=2                           i
-    sub r0, r1, #8  ; reset input/output        i/o
-loop2_dual_2
-    ldr r6, [r0, r2]    ; i5 | i4                               5|4
-    ldr r1, [r0]    ; i1 | i0           1|0
-    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
-    add r14, r2, #0x4   ; pitch + 2                                                             p+2
-    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
-    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
-    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
-    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 ©                                     tc1
-    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
-    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
-    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
-    uadd16  r10, r11, r9    ; a                                             a
-    usub16  r9, r11, r9 ; b                                         b
-    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
-    subs    r5, r5, #0x1    ; i--                           --
-    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
-    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
-    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
-    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
-
-    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
-    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
-    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
-    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
-    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
-    uadd16  r7, r10, r6 ; a+d                                   a+d
-    mov r8, #0x4    ; set up 4's                                        4
-    orr r8, r8, #0x40000    ;                                       4|4
-    usub16  r6, r10, r6 ; a-d                               a-d
-    uadd16  r6, r6, r8  ; a-d+4                             3|7
-    uadd16  r7, r7, r8  ; a+d+4                                 0|4
-    uadd16  r10, r9, r12    ; b+c                                               b+c
-    usub16  r1, r9, r12 ; b-c           b-c
-    uadd16  r10, r10, r8    ; b+c+4                                             1|5
-    uadd16  r1, r1, r8  ; b-c+4         2|6
-    mov r8, r10, asr #19    ; o1 >> 3
-    strh    r8, [r0, #2]    ; o1
-    mov r8, r1, asr #19 ; o2 >> 3
-    strh    r8, [r0, #4]    ; o2
-    mov r8, r6, asr #19 ; o3 >> 3
-    strh    r8, [r0, #6]    ; o3
-    mov r8, r7, asr #19 ; o0 >> 3
-    strh    r8, [r0], r2    ; o0        +p
-    sxth    r10, r10    ;
-    mov r8, r10, asr #3 ; o5 >> 3
-    strh    r8, [r0, #2]    ; o5
-    sxth    r1, r1  ;
-    mov r8, r1, asr #3  ; o6 >> 3
-    strh    r8, [r0, #4]    ; o6
-    sxth    r6, r6  ;
-    mov r8, r6, asr #3  ; o7 >> 3
-    strh    r8, [r0, #6]    ; o7
-    sxth    r7, r7  ;
-    mov r8, r7, asr #3  ; o4 >> 3
-    strh    r8, [r0], r2    ; o4        +p
-;;;;;   subs    r5, r5, #0x1    ; i--                           --
-    bne loop2_dual_2    ;
-            ;
-
-;vpx_memset
-    ldr     r0, [sp]
-    add     sp, sp, #4
-
-    mov     r12, #0
-    str     r12, [r0]
-    str     r12, [r0, #4]
-    str     r12, [r0, #8]
-    str     r12, [r0, #12]
-    str     r12, [r0, #16]
-    str     r12, [r0, #20]
-    str     r12, [r0, #24]
-    str     r12, [r0, #28]
-
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
-
-    ENDP    ;|vp8_dequant_idct_v6|
-
-    END
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index 95e3859..72f7e0e 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
new file mode 100644
index 0000000..3c7bc50
--- /dev/null
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index 495004f..d2ebc71 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -16,9 +16,6 @@
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_v6
 
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_v6
-
 #undef vp8_dbool_fill
 #define vp8_dbool_fill vp8_bool_decoder_fill_v6
 
@@ -33,9 +30,6 @@
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_neon
 
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_neon
-
 #undef vp8_dbool_fill
 #define vp8_dbool_fill vp8_bool_decoder_fill_neon
 
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 54006a9..3926587 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index c8a61a4..40151e0 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -13,32 +14,56 @@
 
 #if HAVE_ARMV6
 extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct(vp8_dequant_idct_v6);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_v6
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
 
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
 
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct(vp8_dequant_idct_neon);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_neon
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
 
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
 #endif
 
 #endif
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644
index 0000000..45e068a
--- /dev/null
+++ b/vp8/decoder/arm/detokenize.asm
@@ -0,0 +1,320 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_decode_mb_tokens_v6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+    INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff    EQU     0
+l_i         EQU     4
+l_type      EQU     8
+l_stop      EQU     12
+l_c         EQU     16
+l_l_ptr     EQU     20
+l_a_ptr     EQU     24
+l_bc        EQU     28
+l_coef_ptr  EQU     32
+l_stacksize EQU     64
+
+
+;; constant offsets -- these should be created at build time
+c_block2above_offset         EQU 25
+c_entropy_nodes              EQU 11
+c_dct_eob_token              EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+    stmdb       sp!, {r4 - r11, lr}
+    sub         sp, sp, #l_stacksize
+    mov         r7, r1                      ; type
+    mov         r9, r0                      ; detoken
+
+    ldr         r1, [r9, #detok_current_bc]
+    ldr         r0, [r9, #detok_qcoeff_start_ptr]
+    mov         r11, #0                     ; i
+    mov         r3, #16                     ; stop
+
+    cmp         r7, #1                      ; type ?= 1
+    addeq       r11, r11, #24               ; i = 24
+    addeq       r3, r3, #8                  ; stop = 24
+    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16
+
+    str         r0, [sp, #l_qcoeff]
+    str         r11, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+    str         r1, [sp, #l_bc]
+
+    add         lr, r9, r7, lsl #2          ; detoken + type*4
+
+    ldr         r8, [r1, #bool_decoder_user_buffer]
+
+    ldr         r10, [lr, #detok_coef_probs]
+    ldr         r5, [r1, #bool_decoder_count]
+    ldr         r6, [r1, #bool_decoder_range]
+    ldr         r4, [r1, #bool_decoder_value]
+
+    str         r10, [sp, #l_coef_ptr]
+
+BLOCK_LOOP
+    ldr         r3, [r9, #detok_ptr_block2leftabove]
+    ldr         r1, [r9, #detok_L]
+    ldr         r2, [r9, #detok_A]
+    ldrb        r12, [r3, r11]!             ; block2left[i]
+    ldrb        r3, [r3, #c_block2above_offset]; block2above[i]
+
+    cmp         r7, #0                      ; c = !type
+    moveq       r7, #1
+    movne       r7, #0
+
+    ldrb        r0, [r1, r12]!              ; *(L += block2left[i])
+    ldrb        r3, [r2, r3]!               ; *(A += block2above[i])
+    mov         lr, #c_entropy_nodes        ; ENTROPY_NODES = 11
+
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+    cmp         r0, #0                      ; *l ?= 0
+    movne       r0, #1
+    cmp         r3, #0                      ; *a ?= 0
+    addne       r0, r0, #1                  ; t
+
+    str         r1, [sp, #l_l_ptr]          ; save &l
+    str         r2, [sp, #l_a_ptr]          ; save &a
+    smlabb      r0, r0, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
+    mov         r1, #0                      ; t = 0
+    str         r7, [sp, #l_c]
+
+    ;align 4
+COEFF_LOOP
+    ldr         r3, [r9, #detok_ptr_coef_bands_x]
+    ldr         lr, [r9, #detok_coef_tree_ptr]
+    ;STALL
+    ldrb        r3, [r3, r7]                ; coef_bands_x[c]
+    ;STALL
+    ;STALL
+    add         r0, r0, r3                  ; Prob += coef_bands_x[c]
+
+get_token_loop
+    ldrb        r2, [r0, +r1, asr #1]       ; Prob[t >> 1]
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; (range << 8) - (1 << 8)
+    mov         r10, #1                     ; 1
+
+    smlawb      r2, r3, r2, r10             ; split = 1 + (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; load cx data byte in stall slot : r8 = bufptr
+    ;++
+
+    subs        r3, r4, r2, lsl #24         ; value-(split<<24): used later to calculate shift for NORMALIZE
+    addhs       r1, r1, #1                  ; t += 1
+    movhs       r4, r3                      ; value -= bigsplit (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+ ;   movlo       r6, r2                      ; range = split
+
+    ldrsb     r1, [lr, r1]                  ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+    clz         r3, r2                      ; vp8dx_bitreader_norm[range] + 24
+    sub         r3, r3, #24                 ; vp8dx_bitreader_norm[range]
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+    addle         r5, r5, #8                ; count += 8
+    rsble         r3, r5, #24               ; 24 - count
+    addle         r8, r8, #1                ; bufptr++
+    orrle         r4, r4, r12, lsl r3       ; value |= *bufptr << shift + 16
+
+    cmp         r1, #0                      ; t ?= 0
+    bgt         get_token_loop              ; while (t > 0)
+
+    cmn         r1, #c_dct_eob_token        ; if(t == -DCT_EOB_TOKEN)
+    beq         END_OF_BLOCK                ; break
+
+    rsb         lr, r1, #0                  ; v = -t;
+
+    cmp         lr, #4                      ; if(v > FOUR_TOKEN)
+    ble         SKIP_EXTRABITS
+
+    ldr         r3, [r9, #detok_teb_base_ptr]
+    mov         r11, #1                     ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+    add         r7, r3, lr, lsl #4          ; detok_teb_base_ptr + (v << 4)
+
+    ldrsh       lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+    ldrsh       r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+    add         r3, r0, r7                  ; &teb_ptr->Probs[bits_count]
+
+    ldrb        r2, [r3, #4]                ; probability. why +4?
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; range << 8 + 1 << 8
+
+    smlawb      r2, r3, r2, r11             ; split = 1 +  (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; *bufptr
+    ;++
+
+    subs        r10, r4, r2, lsl #24        ; value - (split<<24)
+    movhs       r4, r10                     ; value = value - (split << 24)
+    subhs       r2, r6, r2                  ; range = range - split
+    addhs       lr, lr, r11, lsl r0         ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+    clz         r3, r2                      ; shift - leading zeros in split
+    sub         r3, r3, #24                 ; don't count first 3 bytes
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range = range << shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+    addle       r5, r5, #8                  ; count += BR_COUNT
+    addle       r8, r8, #1                  ; bufptr++
+    rsble       r3, r5, #24                 ; BR_COUNT - count
+    orrle       r4, r4, r12, lsl r3         ; value |= *bufptr << (BR_COUNT - count)
+
+    subs        r0, r0, #1                  ; bits_count --
+    bpl         extrabits_loop
+
+
+SKIP_EXTRABITS
+    ldr         r11, [sp, #l_qcoeff]
+    ldr         r0, [sp, #l_coef_ptr]       ; Prob = coef_probs
+
+    cmp         r1, #0                      ; check for nonzero token - if (t)
+    beq         SKIP_EOB_CHECK              ; if t is zero, we will skip the eob table chec
+
+    add         r3, r6, #1                  ; range + 1
+    mov         r2, r3, lsr #1              ; split = (range + 1) >> 1
+
+    subs        r3, r4, r2, lsl #24         ; value - (split<<24)
+    movhs       r4, r3                      ; value -= (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+    mvnhs       r3, lr                      ; -v
+    addhs       lr, r3, #1                  ; v = (v ^ -1) + 1
+
+; NORMALIZE
+    clz         r3, r2                      ; leading 0s in split
+    sub         r3, r3, #24                 ; shift
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+    ldrleb      r2, [r8], #1                ; *(bufptr++)
+    addle       r5, r5, #8                  ; count += 8
+    rsble       r3, r5, #24                 ; BR_COUNT - count
+    orrle       r4, r4, r2, lsl r3          ; value |= *bufptr << (BR_COUNT - count)
+
+    add         r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
+
+    cmn         r1, #1                      ; t < -ONE_TOKEN
+
+    addlt       r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
+
+    mvn         r1, #1                      ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+    ldr         r7, [sp, #l_c]              ; c
+    ldr         r3, [r9, #detok_scan]
+    add         r1, r1, #2                  ; t+= 2
+    cmp         r7, #15                     ; c should will be one higher
+
+    ldr         r3, [r3, +r7, lsl #2]       ; scan[c] this needs pre-inc c value
+    add         r7, r7, #1                  ; c++
+    add         r3, r11, r3, lsl #1         ; qcoeff + scan[c]
+
+    str         r7, [sp, #l_c]              ; store c
+    strh        lr, [r3]                    ; qcoef_ptr[scan[c]] = v
+
+    blt         COEFF_LOOP
+
+    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c
+
+END_OF_BLOCK
+    ldr         r3, [sp, #l_type]           ; type
+    ldr         r10, [sp, #l_coef_ptr]      ; coef_ptr
+    ldr         r0, [sp, #l_qcoeff]         ; qcoeff
+    ldr         r11, [sp, #l_i]             ; i
+    ldr         r12, [sp, #l_stop]          ; stop
+
+    cmp         r3, #0                      ; type ?= 0
+    moveq       r1, #1
+    movne       r1, #0
+    add         r3, r11, r9                 ; detok + i
+
+    cmp         r7, r1                      ; c ?= !type
+    strb        r7, [r3, #detok_eob]        ; eob[i] = c
+
+    ldr         r7, [sp, #l_l_ptr]          ; l
+    ldr         r2, [sp, #l_a_ptr]          ; a
+    movne       r3, #1                      ; t
+    moveq       r3, #0
+
+    add         r0, r0, #32                 ; qcoeff += 32 (16 * 2?)
+    add         r11, r11, #1                ; i++
+    strb        r3, [r7]                    ; *l = t
+    strb        r3, [r2]                    ; *a = t
+    str         r0, [sp, #l_qcoeff]         ; qcoeff
+    str         r11, [sp, #l_i]             ; i
+
+    cmp         r11, r12                    ; i < stop
+    ldr         r7, [sp, #l_type]           ; type
+
+    blt         BLOCK_LOOP
+
+    cmp         r11, #25                    ; i ?= 25
+    bne         ln2_decode_mb_to
+
+    ldr         r12, [r9, #detok_qcoeff_start_ptr]
+    ldr         r10, [r9, #detok_coef_probs]
+    mov         r7, #0                      ; type/i = 0
+    mov         r3, #16                     ; stop = 16
+    str         r12, [sp, #l_qcoeff]        ; qcoeff_ptr = qcoeff_start_ptr
+    str         r7, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type=0]
+
+    b           BLOCK_LOOP
+
+ln2_decode_mb_to
+    cmp         r11, #16                    ; i ?= 16
+    bne         ln1_decode_mb_to
+
+    mov         r10, #detok_coef_probs
+    add         r10, r10, #2*4              ; coef_probs[type]
+    ldr         r10, [r9, r10]              ; detok + detok_coef_probs[type]
+
+    mov         r7, #2                      ; type = 2
+    mov         r3, #24                     ; stop = 24
+
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type]
+    b           BLOCK_LOOP
+
+ln1_decode_mb_to
+    ldr         r2, [sp, #l_bc]
+    mov         r0, #0
+    nop
+
+    str         r8, [r2, #bool_decoder_user_buffer]
+    str         r5, [r2, #bool_decoder_count]
+    str         r4, [r2, #bool_decoder_value]
+    str         r6, [r2, #bool_decoder_range]
+
+    add         sp, sp, #l_stacksize
+    ldmia       sp!, {r4 - r11, pc}
+
+    ENDP  ; |vp8_decode_mb_tokens_v6|
+
+    END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644
index 0000000..9bb19b6
--- /dev/null
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
deleted file mode 100644
index c714452..0000000
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "type_aliases.h"
-#include "blockd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#define BR_COUNT 8
-#define BOOL_DATA UINT8
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
-
-#define EOB_CONTEXT_NODE            0
-#define ZERO_CONTEXT_NODE           1
-#define ONE_CONTEXT_NODE            2
-#define LOW_VAL_CONTEXT_NODE        3
-#define TWO_CONTEXT_NODE            4
-#define THREE_CONTEXT_NODE          5
-#define HIGH_LOW_CONTEXT_NODE       6
-#define CAT_ONE_CONTEXT_NODE        7
-#define CAT_THREEFOUR_CONTEXT_NODE  8
-#define CAT_THREE_CONTEXT_NODE      9
-#define CAT_FIVE_CONTEXT_NODE       10
-
-
-
-
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //ZERO_TOKEN
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //DCT_VAL_CATEGORY1
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY2
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY3
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY4
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY5
-    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, //DCT_VAL_CATEGORY6
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  // EOB TOKEN
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
-{
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
-    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
-{
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    for (i = 0; i < 24; i++)
-    {
-
-        a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-        l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
-        *a = *l = 0;
-    }
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        a = A[Y2CONTEXT] + vp8_block2above[24];
-        l = L[Y2CONTEXT] + vp8_block2left[24];
-        *a = *l = 0;
-    }
-
-
-}
-
-#define ONYXBLOCK2CONTEXT_OFFSET    0
-#define ONYXBLOCK2LEFT_OFFSET       25
-#define ONYXBLOCK2ABOVE_OFFSET 50
-
-DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
-{
-    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-void init_detokenizer(VP8D_COMP *dx)
-{
-    const VP8_COMMON *const oc = & dx->common;
-    MACROBLOCKD *x = & dx->mb;
-
-    dx->detoken.norm_ptr = (unsigned char *)norm;
-    dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
-    dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
-    dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
-    dx->detoken.scan = (int *)vp8_default_zig_zag1d;
-    dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
-
-    dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-
-    dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
-    dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
-    dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
-    dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
-
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-
-//shift = norm[range]; \
-//      shift = norm_ptr[range]; \
-
-#define NORMALIZE \
-    /*if(range < 0x80)*/                            \
-    { \
-        shift = detoken->norm_ptr[range]; \
-        range <<= shift; \
-        value <<= shift; \
-        count -= shift; \
-        if(count <= 0) \
-        { \
-            count += BR_COUNT ; \
-            value |= (*bufptr) << (BR_COUNT-count); \
-            bufptr++; \
-        } \
-    }
-#if 1
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
-    split = (range + 1) >> 1; \
-    if ( (value >> 24) < split ) \
-    { \
-        range = split; \
-        v= value_to_sign; \
-    } \
-    else \
-    { \
-        range = range-split; \
-        value = value-(split<<24); \
-        v = -value_to_sign; \
-    } \
-    range +=range;                   \
-    value +=value;                   \
-    if (!--count) \
-    { \
-        count = BR_COUNT; \
-        value |= *bufptr; \
-        bufptr++; \
-    }
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
-    { \
-        split = 1 +  ((( probability*(range-1) ) )>> 8); \
-        if ( (value >> 24) < split ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            goto branch; \
-        } \
-        value -= (split<<24); \
-        range = range - split; \
-        NORMALIZE \
-    }
-
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
-    { \
-        split = 1 + ((( probability*(range-1) ) ) >> 8); \
-        if ( (value >> 24) < split ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            Prob = coef_probs; \
-            ++c; \
-            Prob += vp8_coef_bands_x[c]; \
-            goto branch; \
-        } \
-        value -= (split<<24); \
-        range = range - split; \
-        NORMALIZE \
-    }
-
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
-    DECODE_AND_APPLYSIGN(val) \
-    Prob = coef_probs + (ENTROPY_NODES*2); \
-    if(c < 15){\
-        qcoeff_ptr [ scan[c] ] = (INT16) v; \
-        ++c; \
-        goto DO_WHILE; }\
-    qcoeff_ptr [ scan[15] ] = (INT16) v; \
-    goto BLOCK_FINISHED;
-
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
-    split = 1 +  (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
-    if(value >= (split<<24))\
-    {\
-        range = range-split;\
-        value = value-(split<<24);\
-        val += ((UINT16)1<<bits_count);\
-    }\
-    else\
-    {\
-        range = split;\
-    }\
-    NORMALIZE
-#endif
-
-#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-    const VP8_COMMON *const oc = & dx->common;
-
-    BOOL_DECODER *bc = x->current_bc;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    int eobtotal = 0;
-
-    register int count;
-
-    BOOL_DATA *bufptr;
-    register unsigned int range;
-    register unsigned int value;
-    const int *scan;
-    register unsigned int shift;
-    UINT32 split;
-    INT16 *qcoeff_ptr;
-
-    UINT8 *coef_probs;
-    int type;
-    int stop;
-    INT16 val, bits_count;
-    INT16 c;
-    INT16 t;
-    INT16 v;
-    vp8_prob *Prob;
-
-    //int *scan;
-    type = 3;
-    i = 0;
-    stop = 16;
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        i = 24;
-        stop = 24;
-        type = 1;
-        qcoeff_ptr = &x->qcoeff[24*16];
-        scan = vp8_default_zig_zag1d;
-        eobtotal -= 16;
-    }
-    else
-    {
-        scan = vp8_default_zig_zag1d;
-        qcoeff_ptr = &x->qcoeff[0];
-    }
-
-    count   = bc->count;
-    range   = bc->range;
-    value   = bc->value;
-    bufptr  = &bc->buffer[bc->pos];
-
-
-    coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
-    a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-    l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-    c = (INT16)(!type);
-
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-    Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
-
-DO_WHILE:
-    Prob += vp8_coef_bands_x[c];
-    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-
-CHECK_0_:
-    DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
-    bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
-    do
-    {
-        DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
-        bits_count -- ;
-    }
-    while (bits_count >= 0);
-
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
-
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
-    DECODE_AND_APPLYSIGN(1);
-    Prob = coef_probs + ENTROPY_NODES;
-
-    if (c < 15)
-    {
-        qcoeff_ptr [ scan[c] ] = (INT16) v;
-        ++c;
-        goto DO_WHILE;
-    }
-
-    qcoeff_ptr [ scan[15] ] = (INT16) v;
-BLOCK_FINISHED:
-    t = ((x->Block[i].eob = c) != !type);   // any nonzero data?
-    eobtotal += x->Block[i].eob;
-    *a = *l = t;
-    qcoeff_ptr += 16;
-
-    i++;
-
-    if (i < stop)
-        goto BLOCK_LOOP;
-
-    if (i == 25)
-    {
-        scan = vp8_default_zig_zag1d;//x->scan_order1d;
-        type = 0;
-        i = 0;
-        stop = 16;
-        coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        qcoeff_ptr = &x->qcoeff[0];
-        goto BLOCK_LOOP;
-    }
-
-    if (i == 16)
-    {
-        type = 2;
-        coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        stop = 24;
-        goto BLOCK_LOOP;
-    }
-
-    bc->count = count;
-    bc->value = value;
-    bc->range = range;
-    bc->pos  = bufptr - bc->buffer;
-    return eobtotal;
-
-}
-//#endif
-#else
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-#if 0
-//uses relative offsets
-
-const vp8_tree_index vp8_coef_tree_x[ 22] =   /* corresponding _CONTEXT_NODEs */
-{
-    -DCT_EOB_TOKEN, 1,                             /* 0 = EOB */
-    -ZERO_TOKEN, 1,                               /* 1 = ZERO */
-    -ONE_TOKEN, 1,                               /* 2 = ONE */
-    2, 5,                                       /* 3 = LOW_VAL */
-    -TWO_TOKEN, 1,                         /* 4 = TWO */
-    -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
-    2, 3,                                  /* 6 = HIGH_LOW */
-    -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
-    2, 3,                                 /* 8 = CAT_THREEFOUR */
-    -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,  /* 9 = CAT_THREE */
-    -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6   /* 10 = CAT_FIVE */
-};
-#endif
-
-#define _SCALEDOWN 8 //16 //8
-
-int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
-
-int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
-{
-    BOOL_DECODER *bc = detoken->current_bc;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    register int count;
-
-    BOOL_DATA *bufptr;
-    register unsigned int range;
-    register unsigned int value;
-    register unsigned int shift;
-    UINT32 split;
-    INT16 *qcoeff_ptr;
-
-    UINT8 *coef_probs;
-//  int type;
-    int stop;
-    INT16 c;
-    INT16 t;
-    INT16 v;
-    vp8_prob *Prob;
-
-
-
-//  type = 3;
-    i = 0;
-    stop = 16;
-    qcoeff_ptr = detoken->qcoeff_start_ptr;
-
-//  if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
-    if (type == 1)
-    {
-        i += 24;
-        stop += 8; //24;
-//      type = 1;
-        qcoeff_ptr += 24 * 16;
-//      eobtotal-=16;
-    }
-
-    count   = bc->count;
-    range   = bc->range;
-    value   = bc->value;
-    bufptr  = &bc->buffer[bc->pos];
-
-
-    coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
-    a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
-    l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
-    c = !type;
-    a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
-    l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
-
-    //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
-    //Dest = ((A)!=0) + ((B)!=0);
-
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-
-    Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
-    t = 0;
-
-    do
-    {
-
-        {
-//                  onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
-
-            Prob += detoken->ptr_onyx_coef_bands_x[c];
-
-        GET_TOKEN_START:
-
-            do
-            {
-                split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
-
-                if (value >> 24 >= split)
-                {
-                    range = range - split;
-                    value = value - (split << 24);
-                    t += 1;
-
-                    //used to eliminate else branch
-                    split = range;
-                }
-
-                range = split;
-
-                t = detoken->vp8_coef_tree_ptr[ t ];
-
-                NORMALIZE
-
-            }
-            while (t  > 0) ;
-        }
-    GET_TOKEN_STOP:
-
-        if (t == -DCT_EOB_TOKEN)
-        {
-            break;
-        }
-
-        v = -t;
-
-        if (v > FOUR_TOKEN)
-        {
-            INT16 bits_count;
-            TOKENEXTRABITS *teb_ptr;
-
-//                      teb_ptr = &onyxd_token_extra_bits2[t];
-//                  teb_ptr = &onyxd_token_extra_bits2[v];
-            teb_ptr = &detoken->teb_base_ptr[v];
-
-
-            v = teb_ptr->min_val;
-            bits_count = teb_ptr->Length;
-
-            do
-            {
-                split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
-
-                if ((value >> 24) >= split)
-                {
-                    range = range - split;
-                    value = value - (split << 24);
-                    v += ((UINT16)1 << bits_count);
-
-                    //used to eliminate else branch
-                    split = range;
-                }
-
-                range = split;
-
-                NORMALIZE
-
-                bits_count -- ;
-            }
-            while (bits_count >= 0);
-        }
-
-        Prob = coef_probs;
-
-        if (t)
-        {
-            split = 1 + (((range - 1) * vp8_prob_half) >> 8);
-
-            if ((value >> 24) >= split)
-            {
-                range = range - split;
-                value = value - (split << 24);
-                v = (v ^ -1) + 1;           /* negate w/out conditionals */
-
-                //used to eliminate else branch
-                split = range;
-            }
-
-            range = split;
-
-            NORMALIZE
-            Prob += ENTROPY_NODES;
-
-            if (t < -ONE_TOKEN)
-                Prob += ENTROPY_NODES;
-
-            t = -2;
-        }
-
-        //if t is zero, we will skip the eob table check
-        t += 2;
-        qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
-
-    }
-    while (++c < 16);
-
-    if (t != -DCT_EOB_TOKEN)
-    {
-        --c;
-    }
-
-    t = ((detoken->eob[i] = c) != !type);   // any nonzero data?
-//  eobtotal += detoken->eob[i];
-    *a = *l = t;
-    qcoeff_ptr += 16;
-
-    i++;
-
-    if (i < stop)
-        goto BLOCK_LOOP;
-
-    if (i == 25)
-    {
-        type = 0;
-        i = 0;
-        stop = 16;
-//      coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        qcoeff_ptr = detoken->qcoeff_start_ptr;
-        goto BLOCK_LOOP;
-    }
-
-    if (i == 16)
-    {
-        type = 2;
-//      coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        stop = 24;
-        goto BLOCK_LOOP;
-    }
-
-    bc->count = count;
-    bc->value = value;
-    bc->range = range;
-    bc->pos  = bufptr - bc->buffer;
-    return 0;
-}
-//#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-//  const ONYX_COMMON * const oc = & dx->common;
-    int eobtotal = 0;
-    int i, type;
-    /*
-        dx->detoken.norm_ptr = norm;
-        dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
-        dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
-        dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
-        dx->detoken.scan = default_zig_zag1d;
-        dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
-
-        dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-        dx->detoken.A = x->above_context;
-        dx->detoken.L = x->left_context;
-
-        dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
-        dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
-        dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
-        dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
-    */
-
-    dx->detoken.current_bc = x->current_bc;
-    dx->detoken.A = x->above_context;
-    dx->detoken.L = x->left_context;
-
-    type = 3;
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        type = 1;
-        eobtotal -= 16;
-    }
-
-    vp8_decode_mb_tokens_v5(&dx->detoken, type);
-
-    for (i = 0; i < 25; i++)
-    {
-        x->Block[i].eob = dx->detoken.eob[i];
-        eobtotal += dx->detoken.eob[i];
-    }
-
-    return eobtotal;
-}
-#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
deleted file mode 100644
index 4d87ee5..0000000
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ /dev/null
@@ -1,364 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_decode_mb_tokens_v5|
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-
-    INCLUDE vpx_asm_offsets.asm
-
-l_qcoeff    EQU     0
-l_i         EQU     4
-l_type      EQU     8
-l_stop      EQU     12
-l_c         EQU     16
-l_l_ptr      EQU     20
-l_a_ptr      EQU     24
-l_bc        EQU     28
-l_coef_ptr   EQU     32
-l_stacksize EQU     64
-
-
-;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset      EQU 25
-c_onyxblock2above_offset     EQU 50
-c_entropy_nodes              EQU 11
-c_dct_eob_token              EQU 11
-
-|vp8_decode_mb_tokens_v5| PROC
-    stmdb       sp!, {r4 - r11, lr}
-    sub         sp, sp, #l_stacksize
-    mov         r7, r1
-    mov         r9, r0                      ;DETOK *detoken
-
-    ldr         r1, [r9, #detok_current_bc]
-    ldr         r0, [r9, #detok_qcoeff_start_ptr]
-    mov         r11, #0
-    mov         r3, #0x10
-
-    cmp         r7, #1
-    addeq       r11, r11, #24
-    addeq       r3, r3, #8
-    addeq       r0, r0, #3, 24
-
-    str         r0, [sp, #l_qcoeff]
-    str         r11, [sp, #l_i]
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-    str         r1, [sp, #l_bc]
-
-    add         lr, r9, r7, lsl #2
-
-    ldr         r2, [r1, #bool_decoder_buffer]
-    ldr         r3, [r1, #bool_decoder_pos]
-
-    ldr         r10, [lr, #detok_coef_probs]
-    ldr         r5, [r1, #bool_decoder_count]
-    ldr         r6, [r1, #bool_decoder_range]
-    ldr         r4, [r1, #bool_decoder_value]
-    add         r8, r2, r3
-
-    str         r10, [sp, #l_coef_ptr]
-
-
-    ;align 4
-BLOCK_LOOP
-    ldr         r3, [r9, #detok_ptr_onyxblock2context_leftabove]
-    ldr         r2, [r9, #DETOK_A]
-    ldr         r1, [r9, #DETOK_L]
-    ldrb        r12, [r3, +r11]                                 ; detoken->ptr_onyxblock2context_leftabove[i]
-
-    cmp         r7, #0                                          ; check type
-    moveq       r7, #1
-    movne       r7, #0
-
-    ldr         r0, [r2, +r12, lsl #2]                          ; a
-    add         r1, r1, r12, lsl #4
-    add         r3, r3, r11
-
-    ldrb        r2, [r3, #c_onyxblock2above_offset]
-    ldrb        r3, [r3, #c_onyxblock2left_offset]
-    mov         lr, #c_entropy_nodes
-;;  ;++
-
-    ldr         r2, [r0, +r2, lsl #2]!
-    add         r3, r1, r3, lsl #2
-    str         r3, [sp, #l_l_ptr]
-    ldr         r3, [r3]
-
-    cmp         r2, #0
-    movne       r2, #1
-    cmp         r3, #0
-    addne       r2, r2, #1
-
-    str         r0, [sp, #l_a_ptr]
-    smlabb      r0, r2, lr, r10
-    mov         r1, #0                                          ; t = 0
-    str         r7, [sp, #l_c]
-
-    ;align 4
-COEFF_LOOP
-    ldr         r3, [r9, #detok_ptr_onyx_coef_bands_x]
-    ldr         lr, [r9, #detok_onyx_coef_tree_ptr]
-
-;;the following two lines are used if onyx_coef_bands_x is UINT16
-;;  add         r3, r3, r7, lsl #1
-;;  ldrh        r3, [r3]
-
-;;the following line is used if onyx_coef_bands_x is UINT8
-    ldrb        r3, [r7, +r3]
-
-
-;;  ;++
-;;  pld         [r8]
-    ;++
-    add         r0, r0, r3
-
-    ;align 4
-get_token_loop
-    ldrb        r2, [r0, +r1, asr #1]
-    mov         r3, r6, lsl #8
-    sub         r3, r3, #256                    ;split = 1 +  (((range-1) * probability) >> 8)
-    mov         r10, #1
-
-    smlawb      r2, r3, r2, r10
-    ldrb        r12, [r8]                       ;load cx data byte in stall slot
-    ;++
-
-    subs        r3, r4, r2, lsl #24             ;x = value-(split<<24)
-    addhs       r1, r1, #1                      ;t += 1
-    movhs       r4, r3                          ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    movlo       r6, r2
-
-;;; ldrsbhs     r1, [r1, +lr]
-    ldrsb     r1, [r1, +lr]
-
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN22@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3
-    mov         r4, r4, lsl r3
-
-;; use branch for short pipelines ???
-;;  bgt         |$LN22@decode_mb_to|
-
-    addle         r5, r5, #8
-    rsble         r3, r5, #8
-    addle         r8, r8, #1
-    orrle         r4, r4, r12, lsl r3
-
-;;|$LN22@decode_mb_to|
-
-    cmp         r1, #0
-    bgt         get_token_loop
-
-    cmn         r1, #c_dct_eob_token             ;if(t == -DCT_EOB_TOKEN)
-    beq         END_OF_BLOCK
-
-    rsb         lr, r1, #0                      ;v = -t;
-
-    cmp         lr, #4                          ;if(v > FOUR_TOKEN)
-    ble         SKIP_EXTRABITS
-
-    ldr         r3, [r9, #detok_teb_base_ptr]
-    mov         r11, #1
-    add         r7, r3, lr, lsl #4
-
-    ldrsh       lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
-    ldrsh       r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
-
-extrabits_loop
-    add         r3, r0, r7
-
-    ldrb        r2, [r3, #4]
-    mov         r3, r6, lsl #8
-    sub         r3, r3, #256                    ;split = 1 +  (((range-1) * probability) >> 8)
-    mov         r10, #1
-
-    smlawb      r2, r3, r2, r10
-    ldrb        r12, [r8]
-    ;++
-
-    subs        r10, r4, r2, lsl #24            ;x = value-(split<<24)
-    movhs       r4, r10                         ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    addhs       lr, lr, r11, lsl r0             ;v += ((UINT16)1<<bits_count)
-    movlo       r6, r2                          ;range = split
-
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN10@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3                  ;range
-    mov         r4, r4, lsl r3                  ;value
-
-    addle       r5, r5, #8
-    addle       r8, r8, #1
-    rsble       r3, r5, #8
-    orrle       r4, r4, r12, lsl r3
-
-;;|$LN10@decode_mb_to|
-    subs         r0, r0, #1
-    bpl         extrabits_loop
-
-
-SKIP_EXTRABITS
-    ldr         r11, [sp, #l_qcoeff]
-    ldr         r0, [sp, #l_coef_ptr]
-
-    cmp         r1, #0                          ;check for nonzero token
-    beq         SKIP_EOB_CHECK              ;if t is zero, we will skip the eob table chec
-
-    sub         r3, r6, #1                      ;range - 1
-    ;++
-    mov         r3, r3, lsl #7                  ; *= onyx_prob_half  (128)
-    ;++
-    mov         r3, r3, lsr #8
-    add         r2, r3, #1                      ;split
-
-    subs        r3, r4, r2, lsl #24             ;x = value-(split<<24)
-    movhs       r4, r3                          ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    mvnhs       r3, lr
-    addhs       lr, r3, #1                      ;v = (v ^ -1) + 1
-    movlo       r6, r2                          ;range = split
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN6@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3
-    mov         r4, r4, lsl r3
-    ldrleb      r2, [r8], #1
-    addle       r5, r5, #8
-    rsble       r3, r5, #8
-    orrle       r4, r4, r2, lsl r3
-
-;;|$LN6@decode_mb_to|
-    add         r0, r0, #0xB
-
-    cmn         r1, #1
-
-    addlt       r0, r0, #0xB
-
-    mvn         r1, #1
-
-SKIP_EOB_CHECK
-    ldr         r7, [sp, #l_c]
-    ldr         r3, [r9, #detok_scan]
-    add         r1, r1, #2
-    cmp         r7, #(0x10 - 1)                     ;assume one less for now.... increment below
-
-    ldr         r3, [r3, +r7, lsl #2]
-    add         r7, r7, #1
-    add         r3, r11, r3, lsl #1
-
-    str         r7, [sp, #l_c]
-    strh        lr, [r3]
-
-    blt         COEFF_LOOP
-
-    sub         r7, r7, #1                          ;if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
-    ldr         r3, [sp, #l_type]
-    ldr         r10, [sp, #l_coef_ptr]
-    ldr         r0, [sp, #l_qcoeff]
-    ldr         r11, [sp, #l_i]
-    ldr         r12, [sp, #l_stop]
-
-    cmp         r3, #0
-    moveq       r1, #1
-    movne       r1, #0
-    add         r3, r11, r9
-
-    cmp         r7, r1
-    strb        r7, [r3, #detok_eob]
-
-    ldr         r7, [sp, #l_l_ptr]
-    ldr         r2, [sp, #l_a_ptr]
-    movne       r3, #1
-    moveq       r3, #0
-
-    add         r0, r0, #0x20
-    add         r11, r11, #1
-    str         r3, [r7]
-    str         r3, [r2]
-    str         r0, [sp, #l_qcoeff]
-    str         r11, [sp, #l_i]
-
-    cmp         r11, r12                            ;i >= stop ?
-    ldr         r7, [sp, #l_type]
-    mov         lr, #0xB
-
-    blt         BLOCK_LOOP
-
-    cmp         r11, #0x19
-    bne         ln2_decode_mb_to
-
-    ldr         r12, [r9, #detok_qcoeff_start_ptr]
-    ldr         r10, [r9, #detok_coef_probs]
-    mov         r7, #0
-    mov         r3, #0x10
-    str         r12, [sp, #l_qcoeff]
-    str         r7, [sp, #l_i]
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-
-    str         r10, [sp, #l_coef_ptr]
-
-    b           BLOCK_LOOP
-
-ln2_decode_mb_to
-    cmp         r11, #0x10
-    bne         ln1_decode_mb_to
-
-    ldr         r10, [r9, #0x30]
-
-    mov         r7, #2
-    mov         r3, #0x18
-
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-
-    str         r10, [sp, #l_coef_ptr]
-    b           BLOCK_LOOP
-
-ln1_decode_mb_to
-    ldr         r2, [sp, #l_bc]
-    mov         r0, #0
-    nop
-
-    ldr         r3, [r2, #bool_decoder_buffer]
-    str         r5, [r2, #bool_decoder_count]
-    str         r4, [r2, #bool_decoder_value]
-    sub         r3, r8, r3
-    str         r3, [r2, #bool_decoder_pos]
-    str         r6, [r2, #bool_decoder_range]
-
-    add         sp, sp, #l_stacksize
-    ldmia       sp!, {r4 - r11, pc}
-
-    ENDP  ; |vp8_decode_mb_tokens_v5|
-
-    END
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 455c83a..9dcf7b6 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -22,20 +23,14 @@
     pbi->mb.rtcd         = &pbi->common.rtcd;
 #if HAVE_ARMV7
     pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dequant.idct    = vp8_dequant_idct_neon;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
 
 #elif HAVE_ARMV6
     pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dequant.idct    = vp8_dequant_idct_v6;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 7ec62a3..ff3ffda 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
new file mode 100644
index 0000000..f68a780
--- /dev/null
+++ b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
@@ -0,0 +1,136 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_dequant_dc_idct_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred,
+;                                  unsigned char *dest, int pitch, int stride,
+;                                  int Dc);
+; r0    short *input,
+; r1    short *dq,
+; r2    unsigned char *pred
+; r3    unsigned char *dest
+; sp    int pitch
+; sp+4  int stride
+; sp+8  int Dc
+|vp8_dequant_dc_idct_add_neon| PROC
+    vld1.16         {q3, q4}, [r0]
+    vld1.16         {q5, q6}, [r1]
+
+    ldr             r1, [sp, #8]            ;load Dc from stack
+
+    ldr             r12, _CONSTANTS_
+
+    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
+    vmul.i16        q2, q4, q6
+
+    vmov.16         d2[0], r1
+
+    ldr             r1, [sp]                ; pitch
+    vld1.32         {d14[0]}, [r2], r1
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
+
+    ldr             r1, [sp, #4]            ; stride
+
+;|short_idct4x4llm_neon| PROC
+    vld1.16         {d0}, [r12]
+    vswp            d3, d4                  ;q2(vp[4] vp[12])
+
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2
+    vqadd.s16       q4, q4, q2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+; memset(input, 0, 32) -- 32bytes
+    vmov.i16        q14, #0
+
+    vswp            d3, d4
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vmov            q15, q14
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2
+    vqadd.s16       q4, q4, q2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vst1.16         {q14, q15}, [r0]
+
+    vrshr.s16       d2, d2, #3
+    vrshr.s16       d3, d3, #3
+    vrshr.s16       d4, d4, #3
+    vrshr.s16       d5, d5, #3
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+    vaddw.u8        q1, q1, d14
+    vaddw.u8        q2, q2, d15
+
+    vqmovun.s16     d0, q1
+    vqmovun.s16     d1, q2
+
+    vst1.32         {d0[0]}, [r3], r1
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
+
+    bx             lr
+
+    ENDP           ; |vp8_dequant_dc_idct_add_neon|
+
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
+
+    END
diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
new file mode 100644
index 0000000..1923be4
--- /dev/null
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -0,0 +1,130 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_dequant_idct_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
+;                           unsigned char *dest, int pitch, int stride)
+; r0    short *input,
+; r1    short *dq,
+; r2    unsigned char *pred
+; r3    unsigned char *dest
+; sp    int pitch
+; sp+4  int stride
+
+|vp8_dequant_idct_add_neon| PROC
+    vld1.16         {q3, q4}, [r0]
+    vld1.16         {q5, q6}, [r1]
+    ldr             r1, [sp]                ; pitch
+    vld1.32         {d14[0]}, [r2], r1
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
+
+    ldr             r1, [sp, #4]            ; stride
+
+    ldr             r12, _CONSTANTS_
+
+    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
+    vmul.i16        q2, q4, q6
+
+;|short_idct4x4llm_neon| PROC
+    vld1.16         {d0}, [r12]
+    vswp            d3, d4                  ;q2(vp[4] vp[12])
+
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2
+    vqadd.s16       q4, q4, q2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+; memset(input, 0, 32) -- 32bytes
+    vmov.i16        q14, #0
+
+    vswp            d3, d4
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+
+    vqadd.s16       d12, d2, d3             ;a1
+    vqsub.s16       d13, d2, d3             ;b1
+
+    vmov            q15, q14
+
+    vshr.s16        q3, q3, #1
+    vshr.s16        q4, q4, #1
+
+    vqadd.s16       q3, q3, q2
+    vqadd.s16       q4, q4, q2
+
+    vqsub.s16       d10, d6, d9             ;c1
+    vqadd.s16       d11, d7, d8             ;d1
+
+    vqadd.s16       d2, d12, d11
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+
+    vst1.16         {q14, q15}, [r0]
+
+    vrshr.s16       d2, d2, #3
+    vrshr.s16       d3, d3, #3
+    vrshr.s16       d4, d4, #3
+    vrshr.s16       d5, d5, #3
+
+    vtrn.32         d2, d4
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+
+    vaddw.u8        q1, q1, d14
+    vaddw.u8        q2, q2, d15
+
+    vqmovun.s16     d0, q1
+    vqmovun.s16     d1, q2
+
+    vst1.32         {d0[0]}, [r3], r1
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
+
+    bx             lr
+
+    ENDP           ; |vp8_dequant_idct_add_neon|
+
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
+
+    END
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
deleted file mode 100644
index 3392f2c..0000000
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ /dev/null
@@ -1,133 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_dc_idct_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
-; r0    short *input,
-; r1    short *dq,
-; r2    short *output,
-; r3    int pitch,
-; (stack)   int Dc
-|vp8_dequant_dc_idct_neon| PROC
-    vld1.16         {q3, q4}, [r0]
-    vld1.16         {q5, q6}, [r1]
-
-    ldr             r1, [sp]                ;load Dc from stack
-
-    ldr             r12, _dcidct_coeff_
-
-    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
-    vmul.i16        q2, q4, q6
-
-    vmov.16         d2[0], r1
-
-;|short_idct4x4llm_neon| PROC
-    vld1.16         {d0}, [r12]
-    vswp            d3, d4                  ;q2(vp[4] vp[12])
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    ;d6 - c1:temp1
-    ;d7 - d1:temp2
-    ;d8 - d1:temp1
-    ;d9 - c1:temp2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-; memset(input, 0, 32) -- 32bytes
-    vmov.i16        q14, #0
-
-    vswp            d3, d4
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vmov            q15, q14
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vst1.16         {q14, q15}, [r0]
-
-    vrshr.s16       d2, d2, #3
-    vrshr.s16       d3, d3, #3
-    vrshr.s16       d4, d4, #3
-    vrshr.s16       d5, d5, #3
-
-    add             r1, r2, r3
-    add             r12, r1, r3
-    add             r0, r12, r3
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    vst1.16         {d2}, [r2]
-    vst1.16         {d3}, [r1]
-    vst1.16         {d4}, [r12]
-    vst1.16         {d5}, [r0]
-
-    bx             lr
-
-    ENDP
-
-;-----------------
-    AREA    dcidct4x4_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_dcidct_coeff_
-    DCD     dcidct_coeff
-dcidct_coeff
-    DCD     0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
-    END
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequantidct_neon.asm
deleted file mode 100644
index bba4d5d..0000000
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ /dev/null
@@ -1,128 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_idct_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
-; r0    short *input,
-; r1    short *dq,
-; r2    short *output,
-; r3    int pitch,
-|vp8_dequant_idct_neon| PROC
-    vld1.16         {q3, q4}, [r0]
-    vld1.16         {q5, q6}, [r1]
-
-    ldr             r12, _didct_coeff_
-
-    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
-    vmul.i16        q2, q4, q6
-
-;|short_idct4x4llm_neon| PROC
-    vld1.16         {d0}, [r12]
-    vswp            d3, d4                  ;q2(vp[4] vp[12])
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    ;d6 - c1:temp1
-    ;d7 - d1:temp2
-    ;d8 - d1:temp1
-    ;d9 - c1:temp2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-; memset(input, 0, 32) -- 32bytes
-    vmov.i16        q14, #0
-
-    vswp            d3, d4
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vmov            q15, q14
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vst1.16         {q14, q15}, [r0]
-
-    vrshr.s16       d2, d2, #3
-    vrshr.s16       d3, d3, #3
-    vrshr.s16       d4, d4, #3
-    vrshr.s16       d5, d5, #3
-
-    add             r1, r2, r3
-    add             r12, r1, r3
-    add             r0, r12, r3
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    vst1.16         {d2}, [r2]
-    vst1.16         {d3}, [r1]
-    vst1.16         {d4}, [r12]
-    vst1.16         {d5}, [r0]
-
-    bx             lr
-
-    ENDP
-
-;-----------------
-    AREA    didct4x4_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_didct_coeff_
-    DCD     didct_coeff
-didct_coeff
-    DCD     0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
-
-    END
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 1bde946..c8e0c31 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
new file mode 100644
index 0000000..4725e62
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 442054e..57cba16 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -12,7 +13,7 @@
 #include "vpx_ports/mem.h"
 #include "vpx_mem/vpx_mem.h"
 
-DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
+DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
 {
     0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -25,86 +26,41 @@
 };
 
 
-static void copy_in(BOOL_DECODER *br, unsigned int to_write)
-{
-    if (to_write > br->user_buffer_sz)
-        to_write = br->user_buffer_sz;
-
-    memcpy(br->write_ptr, br->user_buffer, to_write);
-    br->user_buffer += to_write;
-    br->user_buffer_sz -= to_write;
-    br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
-}
-
 int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
                         unsigned int source_sz)
 {
-    br->lowvalue = 0;
+    br->user_buffer_end = source+source_sz;
+    br->user_buffer     = source;
+    br->value    = 0;
+    br->count    = -8;
     br->range    = 255;
-    br->count    = 0;
-    br->user_buffer    = source;
-    br->user_buffer_sz = source_sz;
 
     if (source_sz && !source)
         return 1;
 
-    /* Allocate the ring buffer backing store with alignment equal to the
-     * buffer size*2 so that a single pointer can be used for wrapping rather
-     * than a pointer+offset.
-     */
-    br->decode_buffer  = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
-                                      VP8_BOOL_DECODER_SZ);
-
-    if (!br->decode_buffer)
-        return 1;
-
     /* Populate the buffer */
-    br->read_ptr = br->decode_buffer;
-    br->write_ptr = br->decode_buffer;
-    copy_in(br, VP8_BOOL_DECODER_SZ);
+    vp8dx_bool_decoder_fill_c(br);
 
-    /* Read the first byte */
-    br->value = (*br->read_ptr++) << 8;
     return 0;
 }
 
 
 void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
 {
-    int          left, right;
+    const unsigned char *bufptr;
+    const unsigned char *bufend;
+    VP8_BD_VALUE         value;
+    int                  count;
+    bufend = br->user_buffer_end;
+    bufptr = br->user_buffer;
+    value = br->value;
+    count = br->count;
 
-    /* Find available room in the buffer */
-    left = 0;
-    right = br->read_ptr - br->write_ptr;
+    VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
 
-    if (right < 0)
-    {
-        /* Read pointer is behind the write pointer. We can write from the
-         * write pointer to the end of the buffer.
-         */
-        right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
-        left = br->read_ptr - br->decode_buffer;
-    }
-
-    if (right + left < 128)
-        return;
-
-    if (right)
-        copy_in(br, right);
-
-    if (left)
-    {
-        br->write_ptr = br->decode_buffer;
-        copy_in(br, left);
-    }
-
-}
-
-
-void vp8dx_stop_decode_c(BOOL_DECODER *bc)
-{
-    vpx_free(bc->decode_buffer);
-    bc->decode_buffer = 0;
+    br->user_buffer = bufptr;
+    br->value = value;
+    br->count = count;
 }
 
 #if 0
@@ -119,13 +75,18 @@
 int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
 {
     unsigned int bit=0;
+    VP8_BD_VALUE value;
     unsigned int split;
-    unsigned int bigsplit;
-    register unsigned int range = br->range;
-    register unsigned int value = br->value;
+    VP8_BD_VALUE bigsplit;
+    int count;
+    unsigned int range;
+
+    value = br->value;
+    count = br->count;
+    range = br->range;
 
     split = 1 + (((range-1) * probability) >> 8);
-    bigsplit = (split<<8);
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
 
     range = split;
     if(value >= bigsplit)
@@ -143,21 +104,16 @@
     }*/
 
     {
-        int count = br->count;
         register unsigned int shift = vp8dx_bitreader_norm[range];
         range <<= shift;
         value <<= shift;
         count -= shift;
-        if(count <= 0)
-        {
-            value |= (*br->read_ptr) << (-count);
-            br->read_ptr = br_ptr_advance(br->read_ptr, 1);
-            count += 8 ;
-        }
-        br->count = count;
     }
     br->value = value;
+    br->count = count;
     br->range = range;
+    if (count < 0)
+        vp8dx_bool_decoder_fill_c(br);
     return bit;
 }
 
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index f5c9822..c72bc03 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,60 +1,41 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef DBOOLHUFF_H
 #define DBOOLHUFF_H
+#include <stddef.h>
+#include <limits.h>
 #include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
-#include "vpx_ports/vpx_integer.h"
+#include "vpx/vpx_integer.h"
 
-/* Size of the bool decoder backing storage
- *
- * This size was chosen to be greater than the worst case encoding of a
- * single macroblock. This was calcluated as follows (python):
- *
- *     def max_cost(prob):
- *         return max(prob_costs[prob], prob_costs[255-prob]) / 256;
- *
- *     tree_nodes_cost = 7 * max_cost(255)
- *     extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
- *     sign_bit_cost = max_cost(128)
- *     total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
- *
- * where the prob_costs table was taken from the C vp8_prob_cost table in
- * boolhuff.c and the extra_bits table was taken from the 11 extrabits for
- * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
- *
- * This equation produced a maximum of 79 bits per coefficient. Scaling up
- * to the macroblock level:
- *
- *     79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
- *
- *     4096 bytes = 32768 bits > 31600
- */
-#define VP8_BOOL_DECODER_SZ       4096
-#define VP8_BOOL_DECODER_MASK     (VP8_BOOL_DECODER_SZ-1)
-#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
+typedef size_t VP8_BD_VALUE;
+
+# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define VP8_LOTS_OF_BITS (0x40000000)
+
+
 
 struct vp8_dboolhuff_rtcd_vtable;
 
 typedef struct
 {
-    unsigned int         lowvalue;
-    unsigned int         range;
-    unsigned int         value;
-    int                  count;
+    const unsigned char *user_buffer_end;
     const unsigned char *user_buffer;
-    unsigned int         user_buffer_sz;
-    unsigned char       *decode_buffer;
-    const unsigned char *read_ptr;
-    unsigned char       *write_ptr;
+    VP8_BD_VALUE         value;
+    int                  count;
+    unsigned int         range;
 #if CONFIG_RUNTIME_CPU_DETECT
     struct vp8_dboolhuff_rtcd_vtable *rtcd;
 #endif
@@ -62,7 +43,6 @@
 
 #define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
     const unsigned char *source, unsigned int source_sz)
-#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
 #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
 #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
 #define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
@@ -75,10 +55,6 @@
 #define vp8_dbool_start vp8dx_start_decode_c
 #endif
 
-#ifndef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_c
-#endif
-
 #ifndef vp8_dbool_fill
 #define vp8_dbool_fill vp8dx_bool_decoder_fill_c
 #endif
@@ -92,20 +68,17 @@
 #endif
 
 extern prototype_dbool_start(vp8_dbool_start);
-extern prototype_dbool_stop(vp8_dbool_stop);
 extern prototype_dbool_fill(vp8_dbool_fill);
 extern prototype_dbool_debool(vp8_dbool_debool);
 extern prototype_dbool_devalue(vp8_dbool_devalue);
 
 typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
-typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
 typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
 typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
 typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
 
 typedef struct vp8_dboolhuff_rtcd_vtable {
     vp8_dbool_start_fn_t   start;
-    vp8_dbool_stop_fn_t    stop;
     vp8_dbool_fill_fn_t    fill;
     vp8_dbool_debool_fn_t  debool;
     vp8_dbool_devalue_fn_t devalue;
@@ -122,18 +95,7 @@
 #define IF_RTCD(x) NULL
 //#endif
 
-static unsigned char *br_ptr_advance(const unsigned char *_ptr,
-                                     unsigned int n)
-{
-    uintptr_t  ptr = (uintptr_t)_ptr;
-
-    ptr += n;
-    ptr &= VP8_BOOL_DECODER_PTR_MASK;
-
-    return (void *)ptr;
-}
-
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
 
 /* wrapper functions to hide RTCD. static means inline means hopefully no
  * penalty
@@ -146,12 +108,34 @@
 #endif
     return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
 }
-static void vp8dx_stop_decode(BOOL_DECODER *br) {
-    DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
-}
 static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
     DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
 }
+
+/*The refill loop is used in several places, so define it in a macro to make
+   sure they're all consistent.
+  An inline function would be cleaner, but has a significant penalty, because
+   multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+   enough to eliminate the stores to those fields and the subsequent reloads
+   from them when inlining the function.*/
+#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+    do \
+    { \
+        int shift; \
+        for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+        { \
+            if((_bufptr) >= (_bufend)) { \
+                (_count) = VP8_LOTS_OF_BITS; \
+                break; \
+            } \
+            (_count) += 8; \
+            (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
+            shift -= 8; \
+        } \
+    } \
+    while(0)
+
+
 static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
   /*
    * Until optimized versions of this function are available, we
@@ -160,13 +144,18 @@
    *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
    */
     unsigned int bit = 0;
+    VP8_BD_VALUE value;
     unsigned int split;
-    unsigned int bigsplit;
-    register unsigned int range = br->range;
-    register unsigned int value = br->value;
+    VP8_BD_VALUE bigsplit;
+    int count;
+    unsigned int range;
+
+    value = br->value;
+    count = br->count;
+    range = br->range;
 
     split = 1 + (((range - 1) * probability) >> 8);
-    bigsplit = (split << 8);
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
 
     range = split;
 
@@ -185,23 +174,16 @@
     }*/
 
     {
-        int count = br->count;
         register unsigned int shift = vp8dx_bitreader_norm[range];
         range <<= shift;
         value <<= shift;
         count -= shift;
-
-        if (count <= 0)
-        {
-            value |= (*br->read_ptr) << (-count);
-            br->read_ptr = br_ptr_advance(br->read_ptr, 1);
-            count += 8 ;
-        }
-
-        br->count = count;
     }
     br->value = value;
+    br->count = count;
     br->range = range;
+    if(count < 0)
+        vp8dx_bool_decoder_fill(br);
     return bit;
 }
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
old mode 100644
new mode 100755
index 6035f3e..e9281f7
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -13,10 +14,126 @@
 #include "entropymode.h"
 #include "onyxd_int.h"
 #include "findnearmv.h"
-#include "demode.h"
+
 #if CONFIG_DEBUG
 #include <assert.h>
 #endif
+static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
+
+    return i;
+}
+
+
+static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
+
+    return i;
+}
+
+static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
+
+    return i;
+}
+
+
+
+static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
+
+    return i;
+}
+
+static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+    // Is segmentation enabled
+    if (x->segmentation_enabled && x->update_mb_segmentation_map)
+    {
+        // If so then read the segment id.
+        if (vp8_read(r, x->mb_segment_tree_probs[0]))
+            mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+        else
+            mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+    }
+}
+
+static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
+{
+    vp8_reader *const bc = & pbi->bc;
+    const int mis = pbi->common.mode_info_stride;
+
+        {
+            MB_PREDICTION_MODE y_mode;
+
+            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+            // By default on a key frame reset all MBs to segment 0
+            m->mbmi.segment_id = 0;
+
+            if (pbi->mb.update_mb_segmentation_map)
+                vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
+
+            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+            if (pbi->common.mb_no_coeff_skip)
+                m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+            else
+                m->mbmi.mb_skip_coeff = 0;
+
+            y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob);
+
+            m->mbmi.ref_frame = INTRA_FRAME;
+
+            if ((m->mbmi.mode = y_mode) == B_PRED)
+            {
+                int i = 0;
+
+                do
+                {
+                    const B_PREDICTION_MODE A = vp8_above_bmi(m, i, mis)->mode;
+                    const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
+
+                    m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
+                }
+                while (++i < 16);
+            }
+            else
+            {
+                int BMode;
+                int i = 0;
+
+                switch (y_mode)
+                {
+                case DC_PRED:
+                    BMode = B_DC_PRED;
+                    break;
+                case V_PRED:
+                    BMode = B_VE_PRED;
+                    break;
+                case H_PRED:
+                    BMode = B_HE_PRED;
+                    break;
+                case TM_PRED:
+                    BMode = B_TM_PRED;
+                    break;
+                default:
+                    BMode = B_DC_PRED;
+                    break;
+                }
+
+                do
+                {
+                    m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
+                }
+                while (++i < 16);
+            }
+
+            m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+        }
+}
 
 static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
 {
@@ -98,6 +215,8 @@
 
     return (MB_PREDICTION_MODE)i;
 }
+
+#ifdef VPX_MODE_COUNT
 unsigned int vp8_mv_cont_count[5][4] =
 {
     { 0, 0, 0, 0 },
@@ -106,309 +225,327 @@
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 }
 };
+#endif
 
-void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+unsigned char vp8_mbsplit_offset[4][16] = {
+    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
+unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
+unsigned char vp8_mbsplit_fill_offset[4][16] = {
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+    { 0,  1,  4,  5,  8,  9, 12, 13,  2,  3,   6,  7, 10, 11, 14, 15},
+    { 0,  1,  4,  5,  2,  3,  6,  7,  8,  9,  12, 13, 10, 11, 14, 15},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
+
+
+
+void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
+{
+    vp8_reader *const bc = & pbi->bc;
+    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+
+    pbi->prob_skip_false = 0;
+    if (pbi->common.mb_no_coeff_skip)
+        pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
+
+    if(pbi->common.frame_type != KEY_FRAME)
+    {
+        pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
+        pbi->prob_last  = (vp8_prob)vp8_read_literal(bc, 8);
+        pbi->prob_gf    = (vp8_prob)vp8_read_literal(bc, 8);
+
+        if (vp8_read_bit(bc))
+        {
+            int i = 0;
+
+            do
+            {
+                pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            }
+            while (++i < 4);
+        }
+
+        if (vp8_read_bit(bc))
+        {
+            int i = 0;
+
+            do
+            {
+                pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            }
+            while (++i < 3);
+        }
+
+        read_mvcontexts(bc, mvc);
+    }
+}
+
+void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+                            int mb_row, int mb_col)
 {
     const MV Zero = { 0, 0};
-
-    VP8_COMMON *const pc = & pbi->common;
     vp8_reader *const bc = & pbi->bc;
+    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+    const int mis = pbi->common.mode_info_stride;
 
-    MODE_INFO *mi = pc->mi, *ms;
-    const int mis = pc->mode_info_stride;
+    MV *const mv = & mbmi->mv.as_mv;
+    int mb_to_left_edge;
+    int mb_to_right_edge;
+    int mb_to_top_edge;
+    int mb_to_bottom_edge;
 
-    MV_CONTEXT *const mvc = pc->fc.mvc;
+    mb_to_top_edge = pbi->mb.mb_to_top_edge;
+    mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
+    mb_to_top_edge -= LEFT_TOP_MARGIN;
+    mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
-    int mb_row = -1;
+    mbmi->need_to_clamp_mvs = 0;
+    // Distance of Mb to the various image edges.
+    // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+    pbi->mb.mb_to_left_edge =
+    mb_to_left_edge = -((mb_col * 16) << 3);
+    mb_to_left_edge -= LEFT_TOP_MARGIN;
 
-    vp8_prob prob_intra;
-    vp8_prob prob_last;
-    vp8_prob prob_gf;
-    vp8_prob prob_skip_false = 0;
+    pbi->mb.mb_to_right_edge =
+    mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+    mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
 
-    if (pc->mb_no_coeff_skip)
-        prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
+    // If required read in new segmentation data for this MB
+    if (pbi->mb.update_mb_segmentation_map)
+        vp8_read_mb_features(bc, mbmi, &pbi->mb);
 
-    prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
-    prob_last  = (vp8_prob)vp8_read_literal(bc, 8);
-    prob_gf    = (vp8_prob)vp8_read_literal(bc, 8);
+    // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+    if (pbi->common.mb_no_coeff_skip)
+        mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+    else
+        mbmi->mb_skip_coeff = 0;
 
-    ms = pc->mi - 1;
-
-    if (vp8_read_bit(bc))
+    if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra)))    /* inter MB */
     {
-        int i = 0;
+        int rct[4];
+        vp8_prob mv_ref_p [VP8_MVREFS-1];
+        MV nearest, nearby, best_mv;
 
-        do
+        if (vp8_read(bc, pbi->prob_last))
         {
-            pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf)));
         }
-        while (++i < 4);
-    }
 
-    if (vp8_read_bit(bc))
-    {
-        int i = 0;
+        vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
 
-        do
+        vp8_mv_ref_probs(mv_ref_p, rct);
+
+        mbmi->uv_mode = DC_PRED;
+        switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
         {
-            pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
-        }
-        while (++i < 3);
-    }
-
-    read_mvcontexts(bc, mvc);
-
-    while (++mb_row < pc->mb_rows)
-    {
-        int mb_col = -1;
-
-        while (++mb_col < pc->mb_cols)
+        case SPLITMV:
         {
-            MB_MODE_INFO *const mbmi = & mi->mbmi;
-            MV *const mv = & mbmi->mv.as_mv;
-            VP8_COMMON *const pc = &pbi->common;
-            MACROBLOCKD *xd = &pbi->mb;
+            const int s = mbmi->partitioning =
+                      vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs);
+            const int num_p = vp8_mbsplit_count [s];
+            int j = 0;
 
-            vp8dx_bool_decoder_fill(bc);
-
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-            xd->mb_to_left_edge = -((mb_col * 16) << 3);
-            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
-            // If required read in new segmentation data for this MB
-            if (pbi->mb.update_mb_segmentation_map)
-                vp8_read_mb_features(bc, mbmi, &pbi->mb);
-
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
-            if (pc->mb_no_coeff_skip)
-                mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false);
-            else
-                mbmi->mb_skip_coeff = 0;
-
-            mbmi->uv_mode = DC_PRED;
-
-            if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra)))    /* inter MB */
+            do  /* for each subset j */
             {
-                int rct[4];
-                vp8_prob mv_ref_p [VP8_MVREFS-1];
-                MV nearest, nearby, best_mv;
+                B_MODE_INFO bmi;
+                MV *const mv = & bmi.mv.as_mv;
 
-                if (vp8_read(bc, prob_last))
+                int k;  /* first block in subset j */
+                int mv_contz;
+                k = vp8_mbsplit_offset[s][j];
+
+                mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
+
+                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
                 {
-                    mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf)));
-                }
-
-                vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
-
-                vp8_mv_ref_probs(mv_ref_p, rct);
-
-                switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
-                {
-                case SPLITMV:
-                {
-                    const int s = mbmi->partitioning = vp8_treed_read(
-                                                           bc, vp8_mbsplit_tree, vp8_mbsplit_probs
-                                                       );
-                    const int num_p = vp8_mbsplit_count [s];
-                    const int *const  L = vp8_mbsplits [s];
-                    int j = 0;
-
-                    do  /* for each subset j */
-                    {
-                        B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
-                        MV *const mv = & bmi->mv.as_mv;
-
-                        int k = -1;  /* first block in subset j */
-                        int mv_contz;
-
-                        while (j != L[++k])
-                            if (k >= 16)
-#if CONFIG_DEBUG
-                                assert(0);
-
-#else
-                                ;
-#endif
-
-                        mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
-
-                        switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
-                        {
-                        case NEW4X4:
-                            read_mv(bc, mv, (const MV_CONTEXT *) mvc);
-                            mv->row += best_mv.row;
-                            mv->col += best_mv.col;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][3]++;
-#endif
-                            break;
-                        case LEFT4X4:
-                            *mv = vp8_left_bmi(mi, k)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][0]++;
-#endif
-                            break;
-                        case ABOVE4X4:
-                            *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][1]++;
-#endif
-                            break;
-                        case ZERO4X4:
-                            *mv = Zero;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][2]++;
-#endif
-                            break;
-                        default:
-                            break;
-                        }
-
-                        /* Fill (uniform) modes, mvs of jth subset.
-                           Must do it here because ensuing subsets can
-                           refer back to us via "left" or "above". */
-                        do
-                            if (j == L[k])
-                                mi->bmi[k] = *bmi;
-
-                        while (++k < 16);
-                    }
-                    while (++j < num_p);
-                }
-
-                *mv = mi->bmi[15].mv.as_mv;
-
-                break;  /* done with SPLITMV */
-
-                case NEARMV:
-                    *mv = nearby;
-
-                    // Clip "next_nearest" so that it does not extend to far out of image
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
-                    goto propagate_mv;
-
-                case NEARESTMV:
-                    *mv = nearest;
-
-                    // Clip "next_nearest" so that it does not extend to far out of image
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
-                    goto propagate_mv;
-
-                case ZEROMV:
-                    *mv = Zero;
-                    goto propagate_mv;
-
-                case NEWMV:
+                case NEW4X4:
                     read_mv(bc, mv, (const MV_CONTEXT *) mvc);
                     mv->row += best_mv.row;
                     mv->col += best_mv.col;
-                    /* Encoder should not produce invalid motion vectors, but since
-                     * arbitrary length MVs can be parsed from the bitstream, we
-                     * need to clamp them here in case we're reading bad data to
-                     * avoid a crash.
-                     */
-#if CONFIG_DEBUG
-                    assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
-                    assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
-                    assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
-                    assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
-#endif
-
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
-                propagate_mv:  /* same MV throughout */
-                    {
-                        //int i=0;
-                        //do
-                        //{
-                        //  mi->bmi[i].mv.as_mv = *mv;
-                        //}
-                        //while( ++i < 16);
-
-                        mi->bmi[0].mv.as_mv = *mv;
-                        mi->bmi[1].mv.as_mv = *mv;
-                        mi->bmi[2].mv.as_mv = *mv;
-                        mi->bmi[3].mv.as_mv = *mv;
-                        mi->bmi[4].mv.as_mv = *mv;
-                        mi->bmi[5].mv.as_mv = *mv;
-                        mi->bmi[6].mv.as_mv = *mv;
-                        mi->bmi[7].mv.as_mv = *mv;
-                        mi->bmi[8].mv.as_mv = *mv;
-                        mi->bmi[9].mv.as_mv = *mv;
-                        mi->bmi[10].mv.as_mv = *mv;
-                        mi->bmi[11].mv.as_mv = *mv;
-                        mi->bmi[12].mv.as_mv = *mv;
-                        mi->bmi[13].mv.as_mv = *mv;
-                        mi->bmi[14].mv.as_mv = *mv;
-                        mi->bmi[15].mv.as_mv = *mv;
-                    }
-
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][3]++;
+  #endif
                     break;
+                case LEFT4X4:
+                    *mv = vp8_left_bmi(mi, k)->mv.as_mv;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][0]++;
+  #endif
+                    break;
+                case ABOVE4X4:
+                    *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][1]++;
+  #endif
+                    break;
+                case ZERO4X4:
+                    *mv = Zero;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][2]++;
+  #endif
+                    break;
+                default:
+                    break;
+                }
 
-                default:;
-#if CONFIG_DEBUG
-                    assert(0);
-#endif
+                mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+                {
+                    /* Fill (uniform) modes, mvs of jth subset.
+                     Must do it here because ensuing subsets can
+                     refer back to us via "left" or "above". */
+                    unsigned char *fill_offset;
+                    unsigned int fill_count = vp8_mbsplit_fill_count[s];
+
+                    fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
+
+                    do {
+                        mi->bmi[ *fill_offset] = bmi;
+                      fill_offset++;
+
+                    }while (--fill_count);
                 }
 
             }
-            else
+            while (++j < num_p);
+        }
+
+        *mv = mi->bmi[15].mv.as_mv;
+
+        break;  /* done with SPLITMV */
+
+        case NEARMV:
+            *mv = nearby;
+            // Clip "next_nearest" so that it does not extend to far out of image
+            mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+            mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+            mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+            mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+            goto propagate_mv;
+
+        case NEARESTMV:
+            *mv = nearest;
+            // Clip "next_nearest" so that it does not extend to far out of image
+            mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+            mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+            mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+            mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+            goto propagate_mv;
+
+        case ZEROMV:
+            *mv = Zero;
+            goto propagate_mv;
+
+        case NEWMV:
+            read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+            mv->row += best_mv.row;
+            mv->col += best_mv.col;
+
+            /* Don't need to check this on NEARMV and NEARESTMV modes
+             * since those modes clamp the MV. The NEWMV mode does not,
+             * so signal to the prediction stage whether special
+             * handling may be required.
+             */
+            mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+        propagate_mv:  /* same MV throughout */
             {
-                /* MB is intra coded */
+                //int i=0;
+                //do
+                //{
+                //  mi->bmi[i].mv.as_mv = *mv;
+                //}
+                //while( ++i < 16);
 
-                int j = 0;
-
-                do
-                {
-                    mi->bmi[j].mv.as_mv = Zero;
-                }
-                while (++j < 16);
-
-                *mv = Zero;
-
-                if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED)
-                {
-                    int j = 0;
-
-                    do
-                    {
-                        mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob);
-                    }
-                    while (++j < 16);
-                }
-
-                mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob);
+                mi->bmi[0].mv.as_mv = *mv;
+                mi->bmi[1].mv.as_mv = *mv;
+                mi->bmi[2].mv.as_mv = *mv;
+                mi->bmi[3].mv.as_mv = *mv;
+                mi->bmi[4].mv.as_mv = *mv;
+                mi->bmi[5].mv.as_mv = *mv;
+                mi->bmi[6].mv.as_mv = *mv;
+                mi->bmi[7].mv.as_mv = *mv;
+                mi->bmi[8].mv.as_mv = *mv;
+                mi->bmi[9].mv.as_mv = *mv;
+                mi->bmi[10].mv.as_mv = *mv;
+                mi->bmi[11].mv.as_mv = *mv;
+                mi->bmi[12].mv.as_mv = *mv;
+                mi->bmi[13].mv.as_mv = *mv;
+                mi->bmi[14].mv.as_mv = *mv;
+                mi->bmi[15].mv.as_mv = *mv;
             }
+            break;
+        default:;
+  #if CONFIG_DEBUG
+            assert(0);
+  #endif
+        }
+    }
+    else
+    {
+        /* MB is intra coded */
+        int j = 0;
+        do
+        {
+            mi->bmi[j].mv.as_mv = Zero;
+        }
+        while (++j < 16);
+
+        if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
+        {
+            j = 0;
+            do
+            {
+                mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+            }
+            while (++j < 16);
+        }
+
+        mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
+    }
+
+}
+
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+    MODE_INFO *mi = pbi->common.mi;
+    int mb_row = -1;
+
+    vp8_mb_mode_mv_init(pbi);
+
+    while (++mb_row < pbi->common.mb_rows)
+    {
+        int mb_col = -1;
+        int mb_to_top_edge;
+        int mb_to_bottom_edge;
+
+        pbi->mb.mb_to_top_edge =
+        mb_to_top_edge = -((mb_row * 16)) << 3;
+        mb_to_top_edge -= LEFT_TOP_MARGIN;
+
+        pbi->mb.mb_to_bottom_edge =
+        mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+        mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+        while (++mb_col < pbi->common.mb_cols)
+        {
+//          vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);
+            if(pbi->common.frame_type == KEY_FRAME)
+                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+            else
+                vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
             mi++;       // next macroblock
         }
@@ -416,3 +553,4 @@
         mi++;           // skip left predictor each row
     }
 }
+
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 4030071..9403424 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index ebc5c27..c8e3f02 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -17,6 +18,7 @@
 
 extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
                                  MACROBLOCKD *xd);
+extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
 extern void vp8_stop_lfthread(VP8D_COMP *pbi);
 extern void vp8_start_lfthread(VP8D_COMP *pbi);
 extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 4edf4f6..4f5b7c7 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -20,9 +21,9 @@
 #include "alloccommon.h"
 #include "entropymode.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+
 #include "setupintrarecon.h"
-#include "demode.h"
+
 #include "decodemv.h"
 #include "extend.h"
 #include "vpx_mem/vpx_mem.h"
@@ -112,7 +113,7 @@
 // to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
 static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
-    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
+    if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
 
         vp8_build_intra_predictors_mbuv_s(xd);
@@ -125,42 +126,114 @@
     }
 }
 
-static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 {
-    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
+    /* If the MV points so far into the UMV border that no visible pixels
+     * are used for reconstruction, the subpel part of the MV can be
+     * discarded and the MV limited to 16 pixels with equivalent results.
+     *
+     * This limit kicks in at 19 pixels for the top and left edges, for
+     * the 16 pixels plus 3 taps right of the central pixel when subpel
+     * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+     * left of the central pixel when filtering.
+     */
+    if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+        mv->col = xd->mb_to_left_edge - (16 << 3);
+    else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+        mv->col = xd->mb_to_right_edge + (16 << 3);
+
+    if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+        mv->row = xd->mb_to_top_edge - (16 << 3);
+    else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+        mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+    mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+    mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
+
+    mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+    mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+}
+
+static void clamp_mvs(MACROBLOCKD *xd)
+{
+    if (xd->mode_info_context->mbmi.mode == SPLITMV)
+    {
+        int i;
+
+        for (i=0; i<16; i++)
+            clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+        for (i=16; i<24; i++)
+            clamp_uvmv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+    }
+    else
+    {
+        clamp_mv_to_umv_border(&xd->mode_info_context->mbmi.mv.as_mv, xd);
+        clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
+    }
+
+}
+
+void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
+{
+    int eobtotal = 0;
+    int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+
+    if (xd->mode_info_context->mbmi.mb_skip_coeff)
+    {
+        vp8_reset_mb_tokens_context(xd);
+    }
+    else
+    {
+        eobtotal = vp8_decode_mb_tokens(pbi, xd);
+    }
+
+    /* Perform temporary clamping of the MV to be used for prediction */
+    if (do_clamp)
+    {
+        clamp_mvs(xd);
+    }
+
+    xd->mode_info_context->mbmi.dc_diff = 1;
+
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+    {
+        xd->mode_info_context->mbmi.dc_diff = 0;
+        skip_recon_mb(pbi, xd);
+        return;
+    }
+
+    if (xd->segmentation_enabled)
+        mb_init_dequantizer(pbi, xd);
+
+    // do prediction
+    if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8_build_intra_predictors_mbuv(xd);
 
-        if (xd->mbmi.mode != B_PRED)
+        if (xd->mode_info_context->mbmi.mode != B_PRED)
         {
             vp8_build_intra_predictors_mby_ptr(xd);
-            vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
-        }
-        else
-        {
-            vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd);
+        } else {
+            vp8_intra_prediction_down_copy(xd);
         }
     }
     else
     {
         vp8_build_inter_predictors_mb(xd);
-        vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
     }
-}
 
-
-static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
-    int i;
-    BLOCKD *b = &xd->block[24];
-
-
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
+    // dequantization and idct
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
+        BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
         // do 2nd order transform on the dc block
-        if (b->eob > 1)
+        if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
             ((int *)b->qcoeff)[0] = 0;
@@ -178,84 +251,47 @@
             ((int *)b->qcoeff)[0] = 0;
         }
 
-
+        DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs, xd->block[24].diff);
+    }
+    else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
+    {
         for (i = 0; i < 16; i++)
         {
 
-            b = &xd->block[i];
+            BLOCKD *b = &xd->block[i];
+            vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
 
-            if (b->eob > 1)
+            if (xd->eobs[i] > 1)
             {
-                DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]);
+                DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
             }
             else
             {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32);
-            }
-        }
-
-        for (i = 16; i < 24; i++)
-        {
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16);
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16);
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
                 ((int *)b->qcoeff)[0] = 0;
             }
         }
+
     }
     else
     {
-        for (i = 0; i < 24; i++)
-        {
-
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16)));
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16)));
-                ((int *)b->qcoeff)[0] = 0;
-            }
-        }
-    }
-}
-
-void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
-    int eobtotal = 0;
-
-    if (xd->mbmi.mb_skip_coeff)
-    {
-        vp8_reset_mb_tokens_context(xd);
-    }
-    else
-    {
-        eobtotal = vp8_decode_mb_tokens(pbi, xd);
+        DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs);
     }
 
-    xd->mode_info_context->mbmi.dc_diff = 1;
-
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
-    {
-        xd->mode_info_context->mbmi.dc_diff = 0;
-        skip_recon_mb(pbi, xd);
-        return;
-    }
-
-    if (xd->segmentation_enabled)
-        mb_init_dequantizer(pbi, xd);
-
-    de_quantand_idct(pbi, xd);
-    reconstruct_mb(pbi, xd);
+    DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                     xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+                     xd->dst.uv_stride, xd->eobs+16);
 }
 
 static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
@@ -293,18 +329,17 @@
     int i;
     int recon_yoffset, recon_uvoffset;
     int mb_col;
-    int recon_y_stride = pc->last_frame.y_stride;
-    int recon_uv_stride = pc->last_frame.uv_stride;
+    int ref_fb_idx = pc->lst_fb_idx;
+    int dst_fb_idx = pc->new_fb_idx;
+    int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-    vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+    vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
     recon_yoffset = mb_row * recon_y_stride * 16;
     recon_uvoffset = mb_row * recon_uv_stride * 8;
     // reset above block coeffs
 
-    xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-    xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-    xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-    xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+    xd->above_context = pc->above_context;
     xd->up_available = (mb_row != 0);
 
     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -312,10 +347,8 @@
 
     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
     {
-        // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
 
-        if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+        if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
         {
             for (i = 0; i < 16; i++)
             {
@@ -329,43 +362,32 @@
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
-        xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
-        xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
         xd->left_available = (mb_col != 0);
 
         // Select the appropriate reference frame for this MB
-        if (xd->mbmi.ref_frame == LAST_FRAME)
-        {
-            xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
-        }
-        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-        {
-            // Golden frame reconstruction buffer
-            xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
-        }
+        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+            ref_fb_idx = pc->lst_fb_idx;
+        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+            ref_fb_idx = pc->gld_fb_idx;
         else
-        {
-            // Alternate reference frame reconstruction buffer
-            xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = pc->alt_fb_idx;
+
+        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
         vp8_build_uvmvs(xd, pc->full_pixel);
 
         /*
-        if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0)
+        if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0)
         pbi->debugoutput =1;
         else
         pbi->debugoutput =0;
         */
-        vp8dx_bool_decoder_fill(xd->current_bc);
         vp8_decode_macroblock(pbi, xd);
 
 
@@ -374,19 +396,14 @@
 
         ++xd->mode_info_context;  /* next mb */
 
-        xd->gf_active_ptr++;      // GF useage flag for next MB
-
-        xd->above_context[Y1CONTEXT] += 4;
-        xd->above_context[UCONTEXT ] += 2;
-        xd->above_context[VCONTEXT ] += 2;
-        xd->above_context[Y2CONTEXT] ++;
+        xd->above_context++;
 
         pbi->current_mb_col_main = mb_col;
     }
 
     // adjust to the next row of mbs
     vp8_extend_mb_row(
-        &pc->new_frame,
+        &pc->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
     );
 
@@ -473,18 +490,7 @@
     VP8_COMMON *pc = &pbi->common;
 
     if (pc->multi_token_partition != ONE_PARTITION)
-    {
-        int num_part = (1 << pc->multi_token_partition);
-
-        for (i = 0; i < num_part; i++)
-        {
-            vp8dx_stop_decode(&pbi->mbc[i]);
-        }
-
         vpx_free(pbi->mbc);
-    }
-    else
-        vp8dx_stop_decode(& pbi->bc2);
 }
 
 static void init_frame(VP8D_COMP *pbi)
@@ -545,10 +551,10 @@
         }
     }
 
-    xd->left_context = pc->left_context;
+    xd->left_context = &pc->left_context;
     xd->mode_info_context = pc->mi;
     xd->frame_type = pc->frame_type;
-    xd->mbmi.mode = DC_PRED;
+    xd->mode_info_context->mbmi.mode = DC_PRED;
     xd->mode_info_stride = pc->mode_info_stride;
 }
 
@@ -609,7 +615,7 @@
                                    "Invalid frame height");
             }
 
-            if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height))
+            if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
                 vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                                    "Failed to allocate frame buffers");
         }
@@ -793,7 +799,6 @@
     }
 
 
-    vp8dx_bool_decoder_fill(bc);
     {
         // read coef probability tree
 
@@ -813,11 +818,11 @@
                     }
     }
 
-    vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG));
-    vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG));
+    vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+    vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
 
     // set up frame new frame for intra coded blocks
-    vp8_setup_intra_recon(&pc->new_frame);
+    vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
 
     vp8_setup_block_dptrs(xd);
 
@@ -829,34 +834,25 @@
     // Read the mb_no_coeff_skip flag
     pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
 
-    if (pc->frame_type == KEY_FRAME)
-        vp8_kfread_modes(pbi);
-    else
-        vp8_decode_mode_mvs(pbi);
 
-    // reset since these guys are used as iterators
-    vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
-    vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
-    vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
-    vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
+    vp8_decode_mode_mvs(pbi);
 
-    xd->gf_active_ptr = (signed char *)pc->gf_active_flags;     // Point to base of GF active flags data structure
-
+    vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
 
     vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
 
 
-    if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
+    if (pbi->b_multithreaded_lf && pc->filter_level != 0)
         vp8_start_lfthread(pbi);
 
-    if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION)
+    if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
     {
         vp8_mtdecode_mb_rows(pbi, xd);
     }
     else
     {
         int ibc = 0;
-        int num_part = 1 << pbi->common.multi_token_partition;
+        int num_part = 1 << pc->multi_token_partition;
 
         // Decode the individual macro block
         for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
@@ -880,13 +876,14 @@
 
     stop_token_decoder(pbi);
 
-    vp8dx_stop_decode(bc);
-
     // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos);
 
     // If this was a kf or Gf note the Q used
-    if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame)
+    if ((pc->frame_type == KEY_FRAME) ||
+         pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
+    {
         pc->last_kf_gf_q = pc->base_qindex;
+    }
 
     if (pc->refresh_entropy_probs == 0)
     {
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
deleted file mode 100644
index fd05e6d..0000000
--- a/vp8/decoder/demode.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-#include "entropymode.h"
-#include "findnearmv.h"
-
-
-int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
-
-    return i;
-}
-
-
-int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
-
-    return i;
-}
-
-int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
-
-    return i;
-}
-
-
-
-int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
-
-    return i;
-}
-
-void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
-{
-    // Is segmentation enabled
-    if (x->segmentation_enabled && x->update_mb_segmentation_map)
-    {
-        // If so then read the segment id.
-        if (vp8_read(r, x->mb_segment_tree_probs[0]))
-            mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
-        else
-            mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
-    }
-}
-
-void vp8_kfread_modes(VP8D_COMP *pbi)
-{
-    VP8_COMMON *const cp = & pbi->common;
-    vp8_reader *const bc = & pbi->bc;
-
-    MODE_INFO *m = cp->mi;
-    const int ms = cp->mode_info_stride;
-
-    int mb_row = -1;
-    vp8_prob prob_skip_false = 0;
-
-    if (cp->mb_no_coeff_skip)
-        prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8));
-
-    while (++mb_row < cp->mb_rows)
-    {
-        int mb_col = -1;
-
-        while (++mb_col < cp->mb_cols)
-        {
-            MB_PREDICTION_MODE y_mode;
-
-            vp8dx_bool_decoder_fill(bc);
-            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
-            // By default on a key frame reset all MBs to segment 0
-            m->mbmi.segment_id = 0;
-
-            if (pbi->mb.update_mb_segmentation_map)
-                vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
-
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
-            if (cp->mb_no_coeff_skip)
-                m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false);
-            else
-                m->mbmi.mb_skip_coeff = 0;
-
-            y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob);
-
-            m->mbmi.ref_frame = INTRA_FRAME;
-
-            if ((m->mbmi.mode = y_mode) == B_PRED)
-            {
-                int i = 0;
-
-                do
-                {
-                    const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode;
-                    const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
-
-                    m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]);
-                }
-                while (++i < 16);
-            }
-            else
-            {
-                int BMode;
-                int i = 0;
-
-                switch (y_mode)
-                {
-                case DC_PRED:
-                    BMode = B_DC_PRED;
-                    break;
-                case V_PRED:
-                    BMode = B_VE_PRED;
-                    break;
-                case H_PRED:
-                    BMode = B_HE_PRED;
-                    break;
-                case TM_PRED:
-                    BMode = B_TM_PRED;
-                    break;
-                default:
-                    BMode = B_DC_PRED;
-                    break;
-                }
-
-                do
-                {
-                    m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
-                }
-                while (++i < 16);
-            }
-
-            (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob);
-        }
-
-        m++; // skip the border
-    }
-}
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
deleted file mode 100644
index 51bbc5e..0000000
--- a/vp8/decoder/demode.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-
-/* Read (intra) modes for all blocks in a keyframe */
-
-void vp8_kfread_modes(VP8D_COMP *pbi);
-
-/* Intra mode for a Y subblock */
-
-int vp8_read_bmode(vp8_reader *, const vp8_prob *);
-
-/* MB intra Y mode trees differ for key and inter frames. */
-
-int   vp8_read_ymode(vp8_reader *, const vp8_prob *);
-int vp8_kfread_ymode(vp8_reader *, const vp8_prob *);
-
-/* MB intra UV mode trees are the same for key and inter frames. */
-
-int vp8_read_uv_mode(vp8_reader *, const vp8_prob *);
-
-/* Read any macroblock-level features that may be present. */
-
-void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 14798d9..8cfa2a3 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -31,8 +32,12 @@
     }
 }
 
-void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+                            unsigned char *dest, int pitch, int stride)
 {
+    short output[16];
+    short *diff_ptr = output;
+    int r, c;
     int i;
 
     for (i = 0; i < 16; i++)
@@ -40,13 +45,40 @@
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch);
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
     vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += 4;
+        pred += pitch;
+    }
 }
 
-void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc)
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+                               unsigned char *dest, int pitch, int stride,
+                               int Dc)
 {
     int i;
+    short output[16];
+    short *diff_ptr = output;
+    int r, c;
 
     input[0] = (short)Dc;
 
@@ -55,6 +87,28 @@
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch);
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct4x4llm_c(input, output, 4 << 1);
+
     vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += 4;
+        pred += pitch;
+    }
 }
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index d16b02e..b78e39c 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -15,11 +16,31 @@
 #define prototype_dequant_block(sym) \
     void sym(BLOCKD *x)
 
-#define prototype_dequant_idct(sym) \
-    void sym(short *input, short *dq, short *output, int pitch)
+#define prototype_dequant_idct_add(sym) \
+    void sym(short *input, short *dq, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride)
 
-#define prototype_dequant_idct_dc(sym) \
-    void sym(short *input, short *dq, short *output, int pitch, int dc)
+#define prototype_dequant_dc_idct_add(sym) \
+    void sym(short *input, short *dq, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride, \
+             int dc)
+
+#define prototype_dequant_dc_idct_add_y_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs, short *dc)
+
+#define prototype_dequant_idct_add_y_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs)
+
+#define prototype_dequant_idct_add_uv_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst_u, \
+             unsigned char *dst_v, int stride, char *eobs)
 
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/dequantize_x86.h"
@@ -34,25 +55,52 @@
 #endif
 extern prototype_dequant_block(vp8_dequant_block);
 
-#ifndef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_c
+#ifndef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_c
 #endif
-extern prototype_dequant_idct(vp8_dequant_idct);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 
-#ifndef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c
+#ifndef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
 #endif
-extern prototype_dequant_idct_dc(vp8_dequant_idct_dc);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
+
+#ifndef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_c
+#endif
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
+#endif
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+#endif
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
 
 
 typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t));
-typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t));
+
+typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
+
+typedef prototype_dequant_dc_idct_add_y_block((*vp8_dequant_dc_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
+
 typedef struct
 {
-    vp8_dequant_block_fn_t    block;
-    vp8_dequant_idct_fn_t     idct;
-    vp8_dequant_idct_dc_fn_t  idct_dc;
+    vp8_dequant_block_fn_t               block;
+    vp8_dequant_idct_add_fn_t            idct_add;
+    vp8_dequant_dc_idct_add_fn_t         dc_idct_add;
+    vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
+    vp8_dequant_idct_add_y_block_fn_t    idct_add_y_block;
+    vp8_dequant_idct_add_uv_block_fn_t   idct_add_uv_block;
 } vp8_dequant_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index a42f18d..65c7d53 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -13,12 +14,12 @@
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
+#include "detokenize.h"
 
-#define BR_COUNT 8
 #define BOOL_DATA UINT8
 
 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
 #define EOB_CONTEXT_NODE            0
 #define ZERO_CONTEXT_NODE           1
 #define ONE_CONTEXT_NODE            2
@@ -60,32 +61,54 @@
 
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 {
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    for (i = 0; i < 24; i++)
+    /* Clear entropy contexts for Y2 blocks */
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
-
-        a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-        l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
-        *a = *l = 0;
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
     }
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    else
     {
-        a = A[Y2CONTEXT] + vp8_block2above[24];
-        l = L[Y2CONTEXT] + vp8_block2left[24];
-        *a = *l = 0;
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
     }
-
-
 }
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+
+#if CONFIG_ARM_ASM_DETOK
+// mashup of vp8_block2left and vp8_block2above so we only need one pointer
+// for the assembly version.
+DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
+{
+    //vp8_block2left
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    //vp8_block2above
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
+
+void vp8_init_detokenizer(VP8D_COMP *dx)
+{
+    const VP8_COMMON *const oc = & dx->common;
+    MACROBLOCKD *x = & dx->mb;
+
+    dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
+    dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
+    dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
+    dx->detoken.scan = vp8_default_zig_zag1d;
+    dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
+    dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+    dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
+    dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
+    dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
+    dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
+}
+#endif
+
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
+#define FILL \
+    if(count < 0) \
+        VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
 #define NORMALIZE \
     /*if(range < 0x80)*/                            \
     { \
@@ -93,17 +116,13 @@
         range <<= shift; \
         value <<= shift; \
         count -= shift; \
-        if(count <= 0) \
-        { \
-            count += BR_COUNT ; \
-            value |= (*bufptr) << (BR_COUNT-count); \
-            bufptr = br_ptr_advance(bufptr, 1); \
-        } \
     }
 
 #define DECODE_AND_APPLYSIGN(value_to_sign) \
     split = (range + 1) >> 1; \
-    if ( (value >> 8) < split ) \
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+    FILL \
+    if ( value < bigsplit ) \
     { \
         range = split; \
         v= value_to_sign; \
@@ -111,28 +130,25 @@
     else \
     { \
         range = range-split; \
-        value = value-(split<<8); \
+        value = value-bigsplit; \
         v = -value_to_sign; \
     } \
     range +=range;                   \
     value +=value;                   \
-    if (!--count) \
-    { \
-        count = BR_COUNT; \
-        value |= *bufptr; \
-        bufptr = br_ptr_advance(bufptr, 1); \
-    }
+    count--;
 
 #define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
     { \
         split = 1 +  ((( probability*(range-1) ) )>> 8); \
-        if ( (value >> 8) < split ) \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
         { \
             range = split; \
             NORMALIZE \
             goto branch; \
         } \
-        value -= (split<<8); \
+        value -= bigsplit; \
         range = range - split; \
         NORMALIZE \
     }
@@ -140,7 +156,9 @@
 #define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
     { \
         split = 1 + ((( probability*(range-1) ) ) >> 8); \
-        if ( (value >> 8) < split ) \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
         { \
             range = split; \
             NORMALIZE \
@@ -151,7 +169,7 @@
             goto branch; \
             } goto BLOCK_FINISHED; /*for malformed input */\
         } \
-        value -= (split<<8); \
+        value -= bigsplit; \
         range = range - split; \
         NORMALIZE \
     }
@@ -169,10 +187,12 @@
 
 #define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
     split = 1 +  (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
-    if(value >= (split<<8))\
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+    FILL \
+    if(value >= bigsplit)\
     {\
         range = range-split;\
-        value = value-(split<<8);\
+        value = value-bigsplit;\
         val += ((UINT16)1<<bits_count);\
     }\
     else\
@@ -181,14 +201,45 @@
     }\
     NORMALIZE
 
+#if CONFIG_ARM_ASM_DETOK
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 {
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    int eobtotal = 0;
+    int i, type;
+
+    dx->detoken.current_bc = x->current_bc;
+    dx->detoken.A = x->above_context;
+    dx->detoken.L = x->left_context;
+
+    type = 3;
+
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        type = 1;
+        eobtotal -= 16;
+    }
+
+    vp8_decode_mb_tokens_v6(&dx->detoken, type);
+
+    for (i = 0; i < 25; i++)
+    {
+        x->eobs[i] = dx->detoken.eob[i];
+        eobtotal += dx->detoken.eob[i];
+    }
+
+    return eobtotal;
+}
+#else
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+    ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
     const VP8_COMMON *const oc = & dx->common;
 
     BOOL_DECODER *bc = x->current_bc;
 
+    char *eobs = x->eobs;
+
     ENTROPY_CONTEXT *a;
     ENTROPY_CONTEXT *l;
     int i;
@@ -198,11 +249,13 @@
     register int count;
 
     const BOOL_DATA *bufptr;
+    const BOOL_DATA *bufend;
     register unsigned int range;
-    register unsigned int value;
+    VP8_BD_VALUE value;
     const int *scan;
     register unsigned int shift;
     UINT32 split;
+    VP8_BD_VALUE bigsplit;
     INT16 *qcoeff_ptr;
 
     const vp8_prob *coef_probs;
@@ -210,46 +263,44 @@
     int stop;
     INT16 val, bits_count;
     INT16 c;
-    INT16 t;
     INT16 v;
     const vp8_prob *Prob;
 
-    //int *scan;
     type = 3;
     i = 0;
     stop = 16;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    scan = vp8_default_zig_zag1d;
+    qcoeff_ptr = &x->qcoeff[0];
+
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
         stop = 24;
         type = 1;
-        qcoeff_ptr = &x->qcoeff[24*16];
-        scan = vp8_default_zig_zag1d;
+        qcoeff_ptr += 24*16;
         eobtotal -= 16;
     }
-    else
-    {
-        scan = vp8_default_zig_zag1d;
-        qcoeff_ptr = &x->qcoeff[0];
-    }
 
+    bufend  = bc->user_buffer_end;
+    bufptr  = bc->user_buffer;
+    value   = bc->value;
     count   = bc->count;
     range   = bc->range;
-    value   = bc->value;
-    bufptr  = bc->read_ptr;
 
 
     coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
 
 BLOCK_LOOP:
-    a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-    l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+    a = A + vp8_block2above[i];
+    l = L + vp8_block2left[i];
+
     c = (INT16)(!type);
 
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+//    Dest = ((A)!=0) + ((B)!=0);
+    VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
     Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
+    Prob += v * ENTROPY_NODES;
 
 DO_WHILE:
     Prob += vp8_coef_bands_x[c];
@@ -336,9 +387,8 @@
 
     qcoeff_ptr [ scan[15] ] = (INT16) v;
 BLOCK_FINISHED:
-    t = ((x->block[i].eob = c) != !type);   // any nonzero data?
-    eobtotal += x->block[i].eob;
-    *a = *l = t;
+    *a = *l = ((eobs[i] = c) != !type);   // any nonzero data?
+    eobtotal += c;
     qcoeff_ptr += 16;
 
     i++;
@@ -348,12 +398,11 @@
 
     if (i == 25)
     {
-        scan = vp8_default_zig_zag1d;//x->scan_order1d;
         type = 0;
         i = 0;
         stop = 16;
         coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
-        qcoeff_ptr = &x->qcoeff[0];
+        qcoeff_ptr -= (24*16 + 16);
         goto BLOCK_LOOP;
     }
 
@@ -365,10 +414,12 @@
         goto BLOCK_LOOP;
     }
 
-    bc->count = count;
+    FILL
+    bc->user_buffer = bufptr;
     bc->value = value;
+    bc->count = count;
     bc->range = range;
-    bc->read_ptr = bufptr;
     return eobtotal;
 
 }
+#endif //!CONFIG_ASM_DETOK
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 6a9a476..294a4a5 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,19 +1,24 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#ifndef detokenize_h
-#define detokenize_h 1
+#ifndef DETOKENIZE_H
+#define DETOKENIZE_H
 
 #include "onyxd_int.h"
 
+#if ARCH_ARM
+#include "arm/detokenize_arm.h"
+#endif
+
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 
-#endif /* detokenize_h */
+#endif /* DETOKENIZE_H */
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 302b64b..60f2af5 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -18,13 +19,15 @@
 {
     // Pure C:
 #if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-    pbi->dequant.block   = vp8_dequantize_b_c;
-    pbi->dequant.idct    = vp8_dequant_idct_c;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
+    pbi->mb.rtcd                     = &pbi->common.rtcd;
+    pbi->dequant.block               = vp8_dequantize_b_c;
+    pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
+    pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_c;
+    pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
+    pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_c;
+    pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
+    pbi->dboolhuff.start             = vp8dx_start_decode_c;
+    pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
 #if 0 //For use with RTCD, when implemented
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
new file mode 100644
index 0000000..c6a4257
--- /dev/null
+++ b/vp8/decoder/idct_blk.c
@@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]);
+            else
+                vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride);
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+            dc  ++;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstu += 4;
+        }
+
+        pre  += 32 - 8;
+        dstu += 4*stride - 8;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstv += 4;
+        }
+
+        pre  += 32 - 8;
+        dstv += 4*stride - 8;
+    }
+}
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 6875585..1651784 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -23,18 +24,16 @@
 #include "threading.h"
 #include "decoderthreading.h"
 #include <stdio.h>
-#include "segmentation_common.h"
+
 #include "quant_common.h"
 #include "vpx_scale/vpxscale.h"
 #include "systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
-
+#include "detokenize.h"
 
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
 
-// DEBUG code
 #if CONFIG_DEBUG
 void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
 {
@@ -128,6 +127,9 @@
         cm->last_sharpness_level = cm->sharpness_level;
     }
 
+#if CONFIG_ARM_ASM_DETOK
+    vp8_init_detokenizer(pbi);
+#endif
     pbi->common.error.setjmp = 0;
     return (VP8D_PTR) pbi;
 }
@@ -179,38 +181,38 @@
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
     return 0;
 }
 int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
     return 0;
 }
 
@@ -220,12 +222,95 @@
 extern void vp8_pop_neon(INT64 *store);
 static INT64 dx_store_reg[8];
 #endif
+
+static int get_free_fb (VP8_COMMON *cm)
+{
+    int i;
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+        if (cm->fb_idx_ref_cnt[i] == 0)
+            break;
+
+    cm->fb_idx_ref_cnt[i] = 1;
+    return i;
+}
+
+static void ref_cnt_fb (int *buf, int *idx, int new_idx)
+{
+    if (buf[*idx] > 0)
+        buf[*idx]--;
+
+    *idx = new_idx;
+
+    buf[new_idx]++;
+}
+
+// If any buffer copy / swapping is signalled it should be done here.
+static int swap_frame_buffers (VP8_COMMON *cm)
+{
+    int fb_to_update_with, err = 0;
+
+    if (cm->refresh_last_frame)
+        fb_to_update_with = cm->lst_fb_idx;
+    else
+        fb_to_update_with = cm->new_fb_idx;
+
+    // The alternate reference frame or golden frame can be updated
+    //  using the new, last, or golden/alt ref frame.  If it
+    //  is updated using the newly decoded frame it is a refresh.
+    //  An update using the last or golden/alt ref frame is a copy.
+    if (cm->copy_buffer_to_arf)
+    {
+        int new_fb = 0;
+
+        if (cm->copy_buffer_to_arf == 1)
+            new_fb = fb_to_update_with;
+        else if (cm->copy_buffer_to_arf == 2)
+            new_fb = cm->gld_fb_idx;
+        else
+            err = -1;
+
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+    }
+
+    if (cm->copy_buffer_to_gf)
+    {
+        int new_fb = 0;
+
+        if (cm->copy_buffer_to_gf == 1)
+            new_fb = fb_to_update_with;
+        else if (cm->copy_buffer_to_gf == 2)
+            new_fb = cm->alt_fb_idx;
+        else
+            err = -1;
+
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
+    }
+
+    if (cm->refresh_golden_frame)
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
+
+    if (cm->refresh_alt_ref_frame)
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
+
+    if (cm->refresh_last_frame)
+    {
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
+
+        cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
+    }
+    else
+        cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+    cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+
+    return err;
+}
+
 int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int retcode = 0;
-
     struct vpx_usec_timer timer;
 
 //  if(pbi->ready_for_new_data == 0)
@@ -256,6 +341,8 @@
     pbi->Source = source;
     pbi->source_sz = size;
 
+    cm->new_fb_idx = get_free_fb (cm);
+
     retcode = vp8_decode_frame(pbi);
 
     if (retcode < 0)
@@ -268,23 +355,17 @@
         return retcode;
     }
 
-    // Update the GF useage maps.
-    vp8_update_gf_useage_maps(cm, &pbi->mb);
-
     if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
         vp8_stop_lfthread(pbi);
 
-    if (cm->refresh_last_frame)
+    if (swap_frame_buffers (cm))
     {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
-        cm->frame_to_show = &cm->last_frame;
-    }
-    else
-    {
-        cm->frame_to_show = &cm->new_frame;
+        pbi->common.error.error_code = VPX_CODEC_ERROR;
+        pbi->common.error.setjmp = 0;
+        return -1;
     }
 
+/*
     if (!pbi->b_multithreaded_lf)
     {
         struct vpx_usec_timer lpftimer;
@@ -292,16 +373,45 @@
         // Apply the loop filter if appropriate.
 
         if (cm->filter_level > 0)
-        {
             vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-            cm->last_frame_type = cm->frame_type;
-            cm->last_filter_type = cm->filter_type;
-            cm->last_sharpness_level = cm->sharpness_level;
-
-        }
 
         vpx_usec_timer_mark(&lpftimer);
         pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+    }else{
+      struct vpx_usec_timer lpftimer;
+      vpx_usec_timer_start(&lpftimer);
+      // Apply the loop filter if appropriate.
+
+      if (cm->filter_level > 0)
+          vp8_mt_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+      vpx_usec_timer_mark(&lpftimer);
+      pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+    }
+    if (cm->filter_level > 0) {
+        cm->last_frame_type = cm->frame_type;
+        cm->last_filter_type = cm->filter_type;
+        cm->last_sharpness_level = cm->sharpness_level;
+    }
+*/
+
+    if(pbi->common.filter_level)
+    {
+        struct vpx_usec_timer lpftimer;
+        vpx_usec_timer_start(&lpftimer);
+        // Apply the loop filter if appropriate.
+
+        if (pbi->b_multithreaded_lf && cm->multi_token_partition != ONE_PARTITION)
+            vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
+        else
+            vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+        vpx_usec_timer_mark(&lpftimer);
+        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+
+        cm->last_frame_type = cm->frame_type;
+        cm->last_filter_type = cm->filter_type;
+        cm->last_sharpness_level = cm->sharpness_level;
     }
 
     vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
@@ -313,49 +423,6 @@
         write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
 #endif
 
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->copy_buffer_to_arf)
-    {
-        if (cm->copy_buffer_to_arf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-        }
-        else if (cm->copy_buffer_to_arf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
-    }
-
-    if (cm->copy_buffer_to_gf)
-    {
-        if (cm->copy_buffer_to_gf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
-        }
-        else if (cm->copy_buffer_to_gf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
-    }
-
-    // Should the golden or alternate reference frame be refreshed?
-    if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
-    {
-        if (cm->refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
-        if (cm->refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
-        //vpx_log("Decoder: recovery frame received \n");
-
-        // Update data structures that monitors GF useage
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
-    }
-
     vp8_clear_system_state();
 
     vpx_usec_timer_mark(&timer);
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
deleted file mode 100644
index 363ad5d..0000000
--- a/vp8/decoder/onyxd_if_sjl.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "onyxc_int.h"
-#include "postproc.h"
-#include "onyxd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "alloccommon.h"
-#include "vpx_scale/yv12extend.h"
-#include "loopfilter.h"
-#include "swapyv12buffer.h"
-#include "g_common.h"
-#include "threading.h"
-#include "decoderthreading.h"
-#include <stdio.h>
-#include "segmentation_common.h"
-#include "quant_common.h"
-#include "vpx_scale/vpxscale.h"
-#include "systemdependent.h"
-#include "vpx_ports/vpx_timer.h"
-
-
-#ifndef VPX_NO_GLOBALS
-static int init_ct = 0;
-#else
-# include "vpx_global_handling.h"
-# define init_ct ((int)vpxglobalm(onyxd,init_ct))
-#endif
-
-extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
-extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-extern void init_detokenizer(VP8D_COMP *dx);
-
-// DEBUG code
-void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
-{
-    FILE *yuv_file = fopen((char *)name, "ab");
-    unsigned char *src = s->y_buffer;
-    int h = s->y_height;
-
-    do
-    {
-        fwrite(src, s->y_width, 1,  yuv_file);
-        src += s->y_stride;
-    }
-    while (--h);
-
-    src = s->u_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1,  yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    src = s->v_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1, yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    fclose(yuv_file);
-}
-
-void vp8dx_initialize()
-{
-    if (!init_ct++)
-    {
-        vp8_initialize_common();
-        vp8_scale_machine_specific_config();
-    }
-}
-
-void vp8dx_shutdown()
-{
-    if (!--init_ct)
-    {
-        vp8_shutdown_common();
-    }
-}
-
-
-VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
-{
-    VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
-
-    if (!pbi)
-        return NULL;
-
-    vpx_memset(pbi, 0, sizeof(VP8D_COMP));
-
-    vp8dx_initialize();
-
-    vp8_create_common(&pbi->common);
-    vp8_dmachine_specific_config(pbi);
-
-    pbi->common.current_video_frame = 0;
-    pbi->ready_for_new_data = 1;
-
-    pbi->CPUFreq = 0; //vp8_get_processor_freq();
-    pbi->max_threads = oxcf->max_threads;
-    vp8_decoder_create_threads(pbi);
-
-    //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
-    // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
-    vp8cx_init_de_quantizer(pbi);
-
-    {
-        VP8_COMMON *cm = &pbi->common;
-
-        vp8_init_loop_filter(cm);
-        cm->last_frame_type = KEY_FRAME;
-        cm->last_filter_type = cm->filter_type;
-        cm->last_sharpness_level = cm->sharpness_level;
-    }
-
-    init_detokenizer(pbi);
-
-    return (VP8D_PTR) pbi;
-}
-void vp8dx_remove_decompressor(VP8D_PTR ptr)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
-    if (!pbi)
-        return;
-
-    vp8_decoder_remove_threads(pbi);
-    vp8_remove_common(&pbi->common);
-    vpx_free(pbi);
-    vp8dx_shutdown();
-
-}
-
-void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-    (void) x;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-}
-
-int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-
-    return -1;
-}
-
-int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-
-    if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
-    else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
-    else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
-    else
-        return -1;
-
-    return 0;
-}
-int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-
-    if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
-    else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
-    else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
-    else
-        return -1;
-
-    return 0;
-}
-int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-    int retcode = 0;
-
-    struct vpx_usec_timer timer;
-    (void) size;
-
-//  if(pbi->ready_for_new_data == 0)
-//      return -1;
-
-    vpx_usec_timer_start(&timer);
-
-    if (ptr == 0)
-    {
-        return -1;
-    }
-
-    //cm->current_video_frame++;
-    pbi->Source = source;
-
-    retcode = vp8_decode_frame(pbi);
-
-    if (retcode < 0)
-        return retcode;
-
-    // Update the GF useage maps.
-    vp8_update_gf_useage_maps(cm, &pbi->mb);
-
-    if (pbi->b_multithreaded)
-        vp8_stop_lfthread(pbi);
-
-    if (cm->refresh_last_frame)
-    {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
-        cm->frame_to_show = &cm->last_frame;
-    }
-    else
-    {
-        cm->frame_to_show = &cm->new_frame;
-    }
-
-    if (!pbi->b_multithreaded)
-    {
-        struct vpx_usec_timer lpftimer;
-        vpx_usec_timer_start(&lpftimer);
-        // Apply the loop filter if appropriate.
-
-        if (cm->filter_level > 0)
-        {
-            vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-            cm->last_frame_type = cm->frame_type;
-            cm->last_filter_type = cm->filter_type;
-            cm->last_sharpness_level = cm->sharpness_level;
-
-        }
-
-        vpx_usec_timer_mark(&lpftimer);
-        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
-    }
-
-    vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
-#if 0
-    // DEBUG code
-    //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
-    if (cm->current_video_frame <= 5)
-        write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
-#endif
-
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->copy_buffer_to_arf)
-    {
-        if (cm->copy_buffer_to_arf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-        }
-        else if (cm->copy_buffer_to_arf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
-    }
-
-    if (cm->copy_buffer_to_gf)
-    {
-        if (cm->copy_buffer_to_gf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
-        }
-        else if (cm->copy_buffer_to_gf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
-    }
-
-    // Should the golden or alternate reference frame be refreshed?
-    if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
-    {
-        if (cm->refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
-        if (cm->refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
-        //vpx_log("Decoder: recovery frame received \n");
-
-        // Update data structures that monitors GF useage
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
-    }
-
-    vp8_clear_system_state();
-
-    vpx_usec_timer_mark(&timer);
-    pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
-
-    pbi->time_decoding += pbi->decode_microseconds;
-
-//  vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
-
-    cm->current_video_frame++;
-    pbi->ready_for_new_data = 0;
-    pbi->last_time_stamp = time_stamp;
-
-    {
-        int i;
-        INT64 earliest_time = pbi->dr[0].time_stamp;
-        INT64 latest_time = pbi->dr[0].time_stamp;
-        INT64 time_diff = 0;
-        int bytes = 0;
-
-        pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
-        pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
-
-        for (i = 0; i < 16; i++)
-        {
-
-            bytes += pbi->dr[i].size;
-
-            if (pbi->dr[i].time_stamp < earliest_time)
-                earliest_time = pbi->dr[i].time_stamp;
-
-            if (pbi->dr[i].time_stamp > latest_time)
-                latest_time = pbi->dr[i].time_stamp;
-        }
-
-        time_diff = latest_time - earliest_time;
-
-        if (time_diff > 0)
-        {
-            pbi->common.bitrate = 80000.00 * bytes / time_diff  ;
-            pbi->common.framerate = 160000000.00 / time_diff ;
-        }
-
-    }
-    return retcode;
-}
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level,  int noise_level, int flags)
-{
-    int ret = -1;
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
-    if (pbi->ready_for_new_data == 1)
-        return ret;
-
-    // ie no raw frame to show!!!
-    if (pbi->common.show_frame == 0)
-        return ret;
-
-    pbi->ready_for_new_data = 1;
-    *time_stamp = pbi->last_time_stamp;
-    *time_end_stamp = 0;
-
-    sd->clrtype = pbi->common.clr_type;
-    ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
-    vp8_clear_system_state();
-    return ret;
-}
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index fa4fa48..ad21ae3 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -47,21 +48,20 @@
 
 typedef struct
 {
-    int *scan;
-    UINT8 *ptr_onyxblock2context_leftabove;
-    vp8_tree_index *vp8_coef_tree_ptr;  //onyx_coef_tree_ptr; ???
-    TOKENEXTRABITS *teb_base_ptr;
+    int const *scan;
+    UINT8 const *ptr_block2leftabove;
+    vp8_tree_index const *vp8_coef_tree_ptr;
+    TOKENEXTRABITS const *teb_base_ptr;
     unsigned char *norm_ptr;
-//  UINT16 *ptr_onyx_coef_bands_x;
-    UINT8 *ptr_onyx_coef_bands_x;
+    UINT8 *ptr_coef_bands_x;
 
-    ENTROPY_CONTEXT   **A;
-    ENTROPY_CONTEXT(*L)[4];
+    ENTROPY_CONTEXT_PLANES *A;
+    ENTROPY_CONTEXT_PLANES *L;
 
     INT16 *qcoeff_start_ptr;
     BOOL_DECODER *current_bc;
 
-    UINT8 *coef_probs[4];
+    vp8_prob const *coef_probs[4];
 
     UINT8 eob[25];
 
@@ -94,20 +94,22 @@
     int current_mb_col_main;
     int decoding_thread_count;
     int allocated_decoding_thread_count;
+    int *current_mb_col;                  //Each row remembers its already decoded column.
+    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
 
     // variable for threading
     DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
 #if CONFIG_MULTITHREAD
-    pthread_t           h_thread_lpf;         // thread for postprocessing
-    sem_t               h_event_lpf;          // Event for post_proc completed
-    sem_t               h_event_start_lpf;
+    //pthread_t           h_thread_lpf;         // thread for postprocessing
+    sem_t               h_event_end_lpf;          // Event for post_proc completed
+    sem_t               *h_event_start_lpf;
 #endif
     MB_ROW_DEC           *mb_row_di;
     DECODETHREAD_DATA   *de_thread_data;
 #if CONFIG_MULTITHREAD
     pthread_t           *h_decoding_thread;
-    sem_t               *h_event_mbrdecoding;
-    sem_t               h_event_main;
+    sem_t               *h_event_start_decoding;
+    sem_t               h_event_end_decoding;
     // end of threading data
 #endif
     vp8_reader *mbc;
@@ -123,6 +125,12 @@
     struct vp8_dboolhuff_rtcd_vtable dboolhuff;
 #endif
 
+
+    vp8_prob prob_intra;
+    vp8_prob prob_last;
+    vp8_prob prob_gf;
+    vp8_prob prob_skip_false;
+
 } VP8D_COMP;
 
 int vp8_decode_frame(VP8D_COMP *cpi);
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index e35d175..a77552c 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,16 +1,20 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef WIN32
 # include <unistd.h>
 #endif
+#ifdef __APPLE__
+#include <mach/mach_init.h>
+#endif
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "threading.h"
@@ -19,6 +23,8 @@
 #include "extend.h"
 #include "vpx_ports/vpx_timer.h"
 
+#define MAX_ROWS 256
+
 extern void vp8_decode_mb_row(VP8D_COMP *pbi,
                               VP8_COMMON *pc,
                               int mb_row,
@@ -27,11 +33,10 @@
 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
 extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
 
+void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread);
+
 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
 {
-
-
-
 #if CONFIG_MULTITHREAD
     VP8_COMMON *const pc = & pbi->common;
     int i, j;
@@ -42,15 +47,11 @@
 #if CONFIG_RUNTIME_CPU_DETECT
         mbd->rtcd = xd->rtcd;
 #endif
-
-
         mbd->subpixel_predict        = xd->subpixel_predict;
         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
-        mbd->gf_active_ptr            = xd->gf_active_ptr;
 
-        mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
         mbd->mode_info_stride  = pc->mode_info_stride;
 
@@ -58,11 +59,8 @@
         mbd->frames_since_golden      = pc->frames_since_golden;
         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
 
-        mbd->pre = pc->last_frame;
-        mbd->dst = pc->new_frame;
-
-
-
+        mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+        mbd->dst = pc->yv12_fb[pc->new_fb_idx];
 
         vp8_setup_block_dptrs(mbd);
         vp8_build_block_doffsets(mbd);
@@ -70,9 +68,6 @@
         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        mbd->mbmi.mode = DC_PRED;
-        mbd->mbmi.uv_mode = DC_PRED;
-
         mbd->current_bc = &pbi->bc2;
 
         for (j = 0; j < 25; j++)
@@ -81,6 +76,8 @@
         }
     }
 
+    for (i=0; i< pc->mb_rows; i++)
+        pbi->current_mb_col[i]=-1;
 #else
     (void) pbi;
     (void) xd;
@@ -89,6 +86,68 @@
 #endif
 }
 
+void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *const pc = & pbi->common;
+    int i, j;
+
+    for (i = 0; i < count; i++)
+    {
+        MACROBLOCKD *mbd = &mbrd[i].mbd;
+//#if CONFIG_RUNTIME_CPU_DETECT
+//        mbd->rtcd = xd->rtcd;
+//#endif
+
+        //mbd->subpixel_predict        = xd->subpixel_predict;
+        //mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
+        //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
+        //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
+
+        mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
+        mbd->mode_info_stride  = pc->mode_info_stride;
+
+        //mbd->frame_type = pc->frame_type;
+        //mbd->frames_since_golden      = pc->frames_since_golden;
+        //mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
+
+        //mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+        //mbd->dst = pc->yv12_fb[pc->new_fb_idx];
+
+        //vp8_setup_block_dptrs(mbd);
+        //vp8_build_block_doffsets(mbd);
+        mbd->segmentation_enabled    = xd->segmentation_enabled;  //
+        mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;  //
+        vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));   //
+
+        //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
+        vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
+        //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
+        vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
+        //unsigned char mode_ref_lf_delta_enabled;
+        //unsigned char mode_ref_lf_delta_update;
+        mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
+        mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
+
+        //mbd->mbmi.mode = DC_PRED;
+        //mbd->mbmi.uv_mode = DC_PRED;
+        //mbd->current_bc = &pbi->bc2;
+
+        //for (j = 0; j < 25; j++)
+        //{
+        //    mbd->block[j].dequant = xd->block[j].dequant;
+        //}
+    }
+
+    for (i=0; i< pc->mb_rows; i++)
+        pbi->current_mb_col[i]=-1;
+#else
+    (void) pbi;
+    (void) xd;
+    (void) mbrd;
+    (void) count;
+#endif
+}
 
 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 {
@@ -96,156 +155,146 @@
     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
-    ENTROPY_CONTEXT mb_row_left_context[4][4];
+    ENTROPY_CONTEXT_PLANES mb_row_left_context;
 
     while (1)
     {
+        int current_filter_level = 0;
+
         if (pbi->b_multithreaded_rd == 0)
             break;
 
-        //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
-        if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
+        //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
+        if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
         {
             if (pbi->b_multithreaded_rd == 0)
                 break;
             else
             {
                 VP8_COMMON *pc = &pbi->common;
-                int mb_row       = mbrd->mb_row;
                 MACROBLOCKD *xd = &mbrd->mbd;
 
-                //printf("ithread:%d mb_row %d\n", ithread, mb_row);
-                int i;
-                int recon_yoffset, recon_uvoffset;
-                int mb_col;
-                int recon_y_stride = pc->last_frame.y_stride;
-                int recon_uv_stride = pc->last_frame.uv_stride;
-
+                int mb_row;
+                int num_part = 1 << pbi->common.multi_token_partition;
                 volatile int *last_row_current_mb_col;
 
-                if (ithread > 0)
-                    last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
-                else
-                    last_row_current_mb_col = &pbi->current_mb_col_main;
-
-                recon_yoffset = mb_row * recon_y_stride * 16;
-                recon_uvoffset = mb_row * recon_uv_stride * 8;
-                // reset above block coeffs
-
-                xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-                xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-                xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-                xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
-                xd->left_context = mb_row_left_context;
-                vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
-                xd->up_available = (mb_row != 0);
-
-                xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-                xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
-                for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+                for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
                 {
+                    int i;
+                    int recon_yoffset, recon_uvoffset;
+                    int mb_col;
+                    int ref_fb_idx = pc->lst_fb_idx;
+                    int dst_fb_idx = pc->new_fb_idx;
+                    int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+                    int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-                    while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
+                    pbi->mb_row_di[ithread].mb_row = mb_row;
+                    pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
+
+                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+
+                    recon_yoffset = mb_row * recon_y_stride * 16;
+                    recon_uvoffset = mb_row * recon_uv_stride * 8;
+                    // reset above block coeffs
+
+                    xd->above_context = pc->above_context;
+                    xd->left_context = &mb_row_left_context;
+                    vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
+                    xd->up_available = (mb_row != 0);
+
+                    xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+                    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+                    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
                     {
-                        x86_pause_hint();
-                        thread_sleep(0);
-                    }
-
-                    // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-                    vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
-
-                    if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
-                    {
-                        for (i = 0; i < 16; i++)
+                        if ((mb_col & 7) == 0)
                         {
-                            BLOCKD *d = &xd->block[i];
-                            vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                            while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                            {
+                                x86_pause_hint();
+                                thread_sleep(0);
+                            }
                         }
+
+                        if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+                        {
+                            for (i = 0; i < 16; i++)
+                            {
+                                BLOCKD *d = &xd->block[i];
+                                vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                            }
+                        }
+
+                        // Distance of Mb to the various image edges.
+                        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                        xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                        xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+                        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+                        xd->left_available = (mb_col != 0);
+
+                        // Select the appropriate reference frame for this MB
+                        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+                            ref_fb_idx = pc->lst_fb_idx;
+                        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+                            ref_fb_idx = pc->gld_fb_idx;
+                        else
+                            ref_fb_idx = pc->alt_fb_idx;
+
+                        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+                        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+                        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+                        vp8_build_uvmvs(xd, pc->full_pixel);
+
+                        vp8_decode_macroblock(pbi, xd);
+
+                        recon_yoffset += 16;
+                        recon_uvoffset += 8;
+
+                        ++xd->mode_info_context;  /* next mb */
+
+                        xd->above_context++;
+
+                        //pbi->mb_row_di[ithread].current_mb_col = mb_col;
+                        pbi->current_mb_col[mb_row] = mb_col;
                     }
 
-                    // Distance of Mb to the various image edges.
-                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                    xd->mb_to_left_edge = -((mb_col * 16) << 3);
-                    xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-
-                    xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
-                    xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
-                    xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
-
-                    xd->left_available = (mb_col != 0);
-
-                    // Select the appropriate reference frame for this MB
-                    if (xd->mbmi.ref_frame == LAST_FRAME)
-                    {
-                        xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
-                    }
-                    else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-                    {
-                        // Golden frame reconstruction buffer
-                        xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
-                    }
-                    else
-                    {
-                        // Alternate reference frame reconstruction buffer
-                        xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
-                    }
-
-                    vp8_build_uvmvs(xd, pc->full_pixel);
-
-                    vp8dx_bool_decoder_fill(xd->current_bc);
-                    vp8_decode_macroblock(pbi, xd);
-
-
-                    recon_yoffset += 16;
-                    recon_uvoffset += 8;
-
-                    ++xd->mode_info_context;  /* next mb */
-
-                    xd->gf_active_ptr++;      // GF useage flag for next MB
-
-                    xd->above_context[Y1CONTEXT] += 4;
-                    xd->above_context[UCONTEXT ] += 2;
-                    xd->above_context[VCONTEXT ] += 2;
-                    xd->above_context[Y2CONTEXT] ++;
-                    pbi->mb_row_di[ithread].current_mb_col = mb_col;
-
-                }
-
-                // adjust to the next row of mbs
-                vp8_extend_mb_row(
-                    &pc->new_frame,
+                    // adjust to the next row of mbs
+                    vp8_extend_mb_row(
+                    &pc->yv12_fb[dst_fb_idx],
                     xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-                );
+                    );
 
-                ++xd->mode_info_context;      /* skip prediction column */
+                    ++xd->mode_info_context;      /* skip prediction column */
 
-                // since we have multithread
-                xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+                    // since we have multithread
+                    xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
 
-                //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
-                if ((mb_row & 1) == 1)
-                {
                     pbi->last_mb_row_decoded = mb_row;
-                    //printf("S%d", pbi->last_mb_row_decoded);
-                }
-
-                if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
-                {
-                    //SetEvent(pbi->h_event_main);
-                    sem_post(&pbi->h_event_main);
 
                 }
             }
         }
-    }
 
+        // If |pbi->common.filter_level| is 0 the value can change in-between
+        // the sem_post and the check to call vp8_thread_loop_filter.
+        current_filter_level = pbi->common.filter_level;
+
+        //  add this to each frame
+        if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+        {
+            //SetEvent(pbi->h_event_end_decoding);
+            sem_post(&pbi->h_event_end_decoding);
+        }
+
+        if ((pbi->b_multithreaded_lf) && (current_filter_level))
+            vp8_thread_loop_filter(pbi, mbrd, ithread);
+
+    }
 #else
     (void) p_data;
 #endif
@@ -253,93 +302,60 @@
     return 0 ;
 }
 
-THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
+
+void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread)
 {
 #if CONFIG_MULTITHREAD
-    VP8D_COMP *pbi = (VP8D_COMP *)p_data;
 
-    while (1)
-    {
-        if (pbi->b_multithreaded_lf == 0)
-            break;
-
-        //printf("before waiting for start_lpf\n");
-
-        //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
-        if (sem_wait(&pbi->h_event_start_lpf) == 0)
+        if (sem_wait(&pbi->h_event_start_lpf[ithread]) == 0)
         {
-            if (pbi->b_multithreaded_lf == 0) // we're shutting down
-                break;
-            else
+           // if (pbi->b_multithreaded_lf == 0) // we're shutting down      ????
+           //     break;
+           // else
             {
-
                 VP8_COMMON *cm  = &pbi->common;
-                MACROBLOCKD *mbd = &pbi->lpfmb;
+                MACROBLOCKD *mbd = &mbrd->mbd;
                 int default_filt_lvl = pbi->common.filter_level;
 
-                YV12_BUFFER_CONFIG *post = &cm->new_frame;
+                YV12_BUFFER_CONFIG *post = cm->frame_to_show;
                 loop_filter_info *lfi = cm->lf_info;
+                //int frame_type = cm->frame_type;
 
                 int mb_row;
                 int mb_col;
 
-
-                int baseline_filter_level[MAX_MB_SEGMENTS];
                 int filter_level;
                 int alt_flt_enabled = mbd->segmentation_enabled;
 
                 int i;
                 unsigned char *y_ptr, *u_ptr, *v_ptr;
 
-                volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
-
-                //MODE_INFO * this_mb_mode_info = cm->mi;
-                mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
-
-                // Note the baseline filter values for each segment
-                if (alt_flt_enabled)
-                {
-                    for (i = 0; i < MAX_MB_SEGMENTS; i++)
-                    {
-                        if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-                            baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                        else
-                        {
-                            baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                            baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
-                        }
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < MAX_MB_SEGMENTS; i++)
-                        baseline_filter_level[i] = default_filt_lvl;
-                }
-
-                // Initialize the loop filter for this frame.
-                vp8_init_loop_filter(cm);
+                volatile int *last_row_current_mb_col;
 
                 // Set up the buffer pointers
-                y_ptr = post->y_buffer;
-                u_ptr = post->u_buffer;
-                v_ptr = post->v_buffer;
+                y_ptr = post->y_buffer + post->y_stride  * 16 * (ithread +1);
+                u_ptr = post->u_buffer + post->uv_stride *  8 * (ithread +1);
+                v_ptr = post->v_buffer + post->uv_stride *  8 * (ithread +1);
 
                 // vp8_filter each macro block
-                for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+                for (mb_row = ithread+1; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
                 {
+                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
 
-                    while (mb_row >= *last_mb_row_decoded)
-                    {
-                        x86_pause_hint();
-                        thread_sleep(0);
-                    }
-
-                    //printf("R%d", mb_row);
                     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
                     {
                         int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
 
-                        filter_level = baseline_filter_level[Segment];
+                        if ((mb_col & 7) == 0)
+                        {
+                            while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
+                            {
+                                x86_pause_hint();
+                                thread_sleep(0);
+                            }
+                        }
+
+                        filter_level = pbi->mt_baseline_filter_level[Segment];
 
                         // Apply any context driven MB level adjustment
                         vp8_adjust_mb_lf_value(mbd, &filter_level);
@@ -365,29 +381,28 @@
                         v_ptr += 8;
 
                         mbd->mode_info_context++;     // step to next MB
-
+                        pbi->current_mb_col[mb_row] = mb_col;
                     }
 
-                    y_ptr += post->y_stride  * 16 - post->y_width;
-                    u_ptr += post->uv_stride *  8 - post->uv_width;
-                    v_ptr += post->uv_stride *  8 - post->uv_width;
-
                     mbd->mode_info_context++;         // Skip border mb
+
+                    y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
+                    u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+                    v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+
+                    mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;         // Skip border mb
                 }
-
-                //printf("R%d\n", mb_row);
-                // When done, signal main thread that ME is finished
-                //SetEvent(pbi->h_event_lpf);
-                sem_post(&pbi->h_event_lpf);
             }
-
         }
-    }
 
+        //  add this to each frame
+        if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+        {
+          sem_post(&pbi->h_event_end_lpf);
+        }
 #else
-    (void) p_data;
+    (void) pbi;
 #endif
-    return 0;
 }
 
 void vp8_decoder_create_threads(VP8D_COMP *pbi)
@@ -399,39 +414,38 @@
     pbi->b_multithreaded_rd = 0;
     pbi->b_multithreaded_lf = 0;
     pbi->allocated_decoding_thread_count = 0;
-    core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
-    if (core_count > 1)
-    {
-        sem_init(&pbi->h_event_lpf, 0, 0);
-        sem_init(&pbi->h_event_start_lpf, 0, 0);
-        pbi->b_multithreaded_lf = 1;
-        pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
-    }
+    core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
 
     if (core_count > 1)
     {
         pbi->b_multithreaded_rd = 1;
-        pbi->decoding_thread_count = core_count - 1;
+        pbi->b_multithreaded_lf = 1;  // this can be merged with pbi->b_multithreaded_rd ?
+        pbi->decoding_thread_count = core_count -1;
 
         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
-        CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+        CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
 
+        CHECK_MEM_ERROR(pbi->current_mb_col, vpx_malloc(sizeof(int) * MAX_ROWS));  // pc->mb_rows));
+        CHECK_MEM_ERROR(pbi->h_event_start_lpf, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+
         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
         {
-            sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
+            sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
+            sem_init(&pbi->h_event_start_lpf[ithread], 0, 0);
 
             pbi->de_thread_data[ithread].ithread  = ithread;
             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
 
             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
-
         }
 
-        sem_init(&pbi->h_event_main, 0, 0);
+        sem_init(&pbi->h_event_end_decoding, 0, 0);
+        sem_init(&pbi->h_event_end_lpf, 0, 0);
+
         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
     }
 
@@ -446,39 +460,35 @@
 
     if (pbi->b_multithreaded_lf)
     {
+        int i;
         pbi->b_multithreaded_lf = 0;
-        sem_post(&pbi->h_event_start_lpf);
-        pthread_join(pbi->h_thread_lpf, 0);
-        sem_destroy(&pbi->h_event_start_lpf);
+
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+            sem_destroy(&pbi->h_event_start_lpf[i]);
+
+        sem_destroy(&pbi->h_event_end_lpf);
     }
 
     //shutdown MB Decoding thread;
     if (pbi->b_multithreaded_rd)
     {
+        int i;
+
         pbi->b_multithreaded_rd = 0;
+
         // allow all threads to exit
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
         {
-            int i;
-
-            for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-            {
-
-                sem_post(&pbi->h_event_mbrdecoding[i]);
-                pthread_join(pbi->h_decoding_thread[i], NULL);
-            }
-        }
-        {
-
-            int i;
-            for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-            {
-                sem_destroy(&pbi->h_event_mbrdecoding[i]);
-            }
-
-
+            sem_post(&pbi->h_event_start_decoding[i]);
+            pthread_join(pbi->h_decoding_thread[i], NULL);
         }
 
-        sem_destroy(&pbi->h_event_main);
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+        {
+            sem_destroy(&pbi->h_event_start_decoding[i]);
+        }
+
+        sem_destroy(&pbi->h_event_end_decoding);
 
         if (pbi->h_decoding_thread)
         {
@@ -486,10 +496,16 @@
             pbi->h_decoding_thread = NULL;
         }
 
-        if (pbi->h_event_mbrdecoding)
+        if (pbi->h_event_start_decoding)
         {
-            vpx_free(pbi->h_event_mbrdecoding);
-            pbi->h_event_mbrdecoding = NULL;
+            vpx_free(pbi->h_event_start_decoding);
+            pbi->h_event_start_decoding = NULL;
+        }
+
+        if (pbi->h_event_start_lpf)
+        {
+            vpx_free(pbi->h_event_start_lpf);
+            pbi->h_event_start_lpf = NULL;
         }
 
         if (pbi->mb_row_di)
@@ -503,8 +519,13 @@
             vpx_free(pbi->de_thread_data);
             pbi->de_thread_data = NULL;
         }
-    }
 
+        if (pbi->current_mb_col)
+        {
+            vpx_free(pbi->current_mb_col);
+            pbi->current_mb_col = NULL ;
+        }
+    }
 #else
     (void) pbi;
 #endif
@@ -514,9 +535,12 @@
 void vp8_start_lfthread(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
+  /*
     memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
     pbi->last_mb_row_decoded = 0;
     sem_post(&pbi->h_event_start_lpf);
+    */
+    (void) pbi;
 #else
     (void) pbi;
 #endif
@@ -525,14 +549,17 @@
 void vp8_stop_lfthread(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
+  /*
     struct vpx_usec_timer timer;
 
     vpx_usec_timer_start(&timer);
 
-    sem_wait(&pbi->h_event_lpf);
+    sem_wait(&pbi->h_event_end_lpf);
 
     vpx_usec_timer_mark(&timer);
     pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
+    */
+    (void) pbi;
 #else
     (void) pbi;
 #endif
@@ -548,49 +575,246 @@
 
     int ibc = 0;
     int num_part = 1 << pbi->common.multi_token_partition;
+    int i;
+    volatile int *last_row_current_mb_col = NULL;
 
     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
 
+    for (i = 0; i < pbi->decoding_thread_count; i++)
+        sem_post(&pbi->h_event_start_decoding[i]);
+
     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
     {
         int i;
-        pbi->current_mb_col_main = -1;
 
-        xd->current_bc = &pbi->mbc[ibc];
-        ibc++ ;
+        xd->current_bc = &pbi->mbc[mb_row%num_part];
 
-        if (ibc == num_part)
-            ibc = 0;
-
-        for (i = 0; i < pbi->decoding_thread_count; i++)
+        //vp8_decode_mb_row(pbi, pc, mb_row, xd);
         {
-            if ((mb_row + i + 1) >= pc->mb_rows)
-                break;
+            int i;
+            int recon_yoffset, recon_uvoffset;
+            int mb_col;
+            int ref_fb_idx = pc->lst_fb_idx;
+            int dst_fb_idx = pc->new_fb_idx;
+            int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+            int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-            pbi->mb_row_di[i].mb_row = mb_row + i + 1;
-            pbi->mb_row_di[i].mbd.current_bc =  &pbi->mbc[ibc];
-            ibc++;
+           // volatile int *last_row_current_mb_col = NULL;
+            if (mb_row > 0)
+                last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
 
-            if (ibc == num_part)
-                ibc = 0;
+            vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
+            recon_yoffset = mb_row * recon_y_stride * 16;
+            recon_uvoffset = mb_row * recon_uv_stride * 8;
+            // reset above block coeffs
 
-            pbi->mb_row_di[i].current_mb_col = -1;
-            sem_post(&pbi->h_event_mbrdecoding[i]);
+            xd->above_context = pc->above_context;
+            xd->up_available = (mb_row != 0);
+
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+            for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+            {
+                if ( mb_row > 0 && (mb_col & 7) == 0){
+                    while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                    {
+                        x86_pause_hint();
+                        thread_sleep(0);
+                    }
+                }
+
+                if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
+                {
+                    for (i = 0; i < 16; i++)
+                    {
+                        BLOCKD *d = &xd->block[i];
+                        vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                    }
+                }
+
+                // Distance of Mb to the various image edges.
+                // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+                xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+                xd->left_available = (mb_col != 0);
+
+                // Select the appropriate reference frame for this MB
+                if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+                    ref_fb_idx = pc->lst_fb_idx;
+                else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+                    ref_fb_idx = pc->gld_fb_idx;
+                else
+                    ref_fb_idx = pc->alt_fb_idx;
+
+                xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+                xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+                xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+                vp8_build_uvmvs(xd, pc->full_pixel);
+
+                vp8_decode_macroblock(pbi, xd);
+
+                recon_yoffset += 16;
+                recon_uvoffset += 8;
+
+                ++xd->mode_info_context;  /* next mb */
+
+                xd->above_context++;
+
+                //pbi->current_mb_col_main = mb_col;
+                pbi->current_mb_col[mb_row] = mb_col;
+            }
+
+            // adjust to the next row of mbs
+            vp8_extend_mb_row(
+                &pc->yv12_fb[dst_fb_idx],
+                xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
+            );
+
+            ++xd->mode_info_context;      /* skip prediction column */
+
+            pbi->last_mb_row_decoded = mb_row;
         }
-
-        vp8_decode_mb_row(pbi, pc, mb_row, xd);
-
         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
-
-        if (mb_row < pc->mb_rows - 1)
-        {
-            sem_wait(&pbi->h_event_main);
-        }
     }
 
-    pbi->last_mb_row_decoded = mb_row;
+    sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
 #else
     (void) pbi;
     (void) xd;
 #endif
 }
+
+
+void vp8_mt_loop_filter_frame( VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *cm  = &pbi->common;
+    MACROBLOCKD *mbd = &pbi->mb;
+    int default_filt_lvl = pbi->common.filter_level;
+
+    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+    loop_filter_info *lfi = cm->lf_info;
+    int frame_type = cm->frame_type;
+
+    int mb_row;
+    int mb_col;
+
+    int filter_level;
+    int alt_flt_enabled = mbd->segmentation_enabled;
+
+    int i;
+    unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+    volatile int *last_row_current_mb_col=NULL;
+
+    vp8_setup_loop_filter_thread_data(pbi, mbd, pbi->mb_row_di, pbi->decoding_thread_count);
+
+    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+
+    // Note the baseline filter values for each segment
+    if (alt_flt_enabled)
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+        {
+            // Abs value
+            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+                pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+            // Delta Value
+            else
+            {
+                pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+            pbi->mt_baseline_filter_level[i] = default_filt_lvl;
+    }
+
+    // Initialize the loop filter for this frame.
+    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+        vp8_init_loop_filter(cm);
+    else if (frame_type != cm->last_frame_type)
+        vp8_frame_init_loop_filter(lfi, frame_type);
+
+    for (i = 0; i < pbi->decoding_thread_count; i++)
+        sem_post(&pbi->h_event_start_lpf[i]);
+        // sem_post(&pbi->h_event_start_lpf);
+
+    // Set up the buffer pointers
+    y_ptr = post->y_buffer;
+    u_ptr = post->u_buffer;
+    v_ptr = post->v_buffer;
+
+    // vp8_filter each macro block
+    for (mb_row = 0; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
+    {
+        if (mb_row > 0)
+            last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+
+        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        {
+            int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+
+            if ( mb_row > 0 && (mb_col & 7) == 0){
+            // if ( mb_row > 0 ){
+                while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
+                {
+                    x86_pause_hint();
+                    thread_sleep(0);
+                }
+            }
+
+            filter_level = pbi->mt_baseline_filter_level[Segment];
+
+            // Distance of Mb to the various image edges.
+            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+            // Apply any context driven MB level adjustment
+            vp8_adjust_mb_lf_value(mbd, &filter_level);
+
+            if (filter_level)
+            {
+                if (mb_col > 0)
+                    cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                if (mbd->mode_info_context->mbmi.dc_diff > 0)
+                    cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                // don't apply across umv border
+                if (mb_row > 0)
+                    cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                if (mbd->mode_info_context->mbmi.dc_diff > 0)
+                    cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+            }
+
+            y_ptr += 16;
+            u_ptr += 8;
+            v_ptr += 8;
+
+            mbd->mode_info_context++;     // step to next MB
+            pbi->current_mb_col[mb_row] = mb_col;
+        }
+        mbd->mode_info_context++;         // Skip border mb
+
+        //update for multi-thread
+        y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
+        u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+        v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+        mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;
+    }
+
+    sem_wait(&pbi->h_event_end_lpf);
+#else
+    (void) pbi;
+#endif
+}
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index eb10e24..2778428 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 02be487..150d090 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -49,12 +50,12 @@
     ret
 
 
-;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch)
-global sym(vp8_dequant_idct_mmx)
-sym(vp8_dequant_idct_mmx):
+;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+global sym(vp8_dequant_idct_add_mmx)
+sym(vp8_dequant_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 4
+    SHADOW_ARGS_TO_STACK 6
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -76,7 +77,8 @@
         movq        mm3,    [rax+24]
         pmullw      mm3,    [rdx+24]
 
-        mov         rdx,    arg(2) ;output
+        mov         rdx,    arg(3) ;dest
+        mov         rsi,    arg(2) ;pred
         pxor        mm7,    mm7
 
 
@@ -87,7 +89,8 @@
         movq        [rax+24],mm7
 
 
-        movsxd      rax,            dword ptr arg(3) ;pitch
+        movsxd      rax,            dword ptr arg(4) ;pitch
+        movsxd      rdi,            dword ptr arg(5) ;stride
 
         psubw       mm0,            mm2             ; b1= 0-2
         paddw       mm2,            mm2             ;
@@ -206,13 +209,34 @@
         punpckldq   mm2,            mm4             ; 32 22 12 02
         punpckhdq   mm5,            mm4             ; 33 23 13 03
 
-        movq        [rdx],          mm0
+        pxor        mm7,            mm7
 
-        movq        [rdx+rax],      mm1
-        movq        [rdx+rax*2],    mm2
+        movd        mm4,            [rsi]
+        punpcklbw   mm4,            mm7
+        paddsw      mm0,            mm4
+        packuswb    mm0,            mm7
+        movd        [rdx],          mm0
 
-        add         rdx,            rax
-        movq        [rdx+rax*2],    mm5
+        movd        mm4,            [rsi+rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm1,            mm4
+        packuswb    mm1,            mm7
+        movd        [rdx+rdi],      mm1
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm2,            mm4
+        packuswb    mm2,            mm7
+        movd        [rdx+rdi*2],    mm2
+
+        add         rdx,            rdi
+        add         rsi,            rax
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm5,            mm4
+        packuswb    mm5,            mm7
+        movd        [rdx+rdi*2],    mm5
 
     ; begin epilog
     pop rdi
@@ -223,12 +247,12 @@
     ret
 
 
-;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc)
-global sym(vp8_dequant_dc_idct_mmx)
-sym(vp8_dequant_dc_idct_mmx):
+;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+global sym(vp8_dequant_dc_idct_add_mmx)
+sym(vp8_dequant_dc_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
+    SHADOW_ARGS_TO_STACK 7
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -237,8 +261,6 @@
         mov         rax,    arg(0) ;input
         mov         rdx,    arg(1) ;dq
 
-        movsxd      rcx,    dword ptr arg(4) ;Dc
-
         movq        mm0,    [rax   ]
         pmullw      mm0,    [rdx]
 
@@ -251,7 +273,8 @@
         movq        mm3,    [rax+24]
         pmullw      mm3,    [rdx+24]
 
-        mov         rdx,    arg(2) ;output
+        mov         rdx,    arg(3) ;dest
+        mov         rsi,    arg(2) ;pred
         pxor        mm7,    mm7
 
 
@@ -261,8 +284,15 @@
         movq        [rax+16],mm7
         movq        [rax+24],mm7
 
-        pinsrw      mm0,    rcx,  0
-        movsxd      rax,            dword ptr arg(3) ;pitch
+        ; move lower word of Dc to lower word of mm0
+        psrlq       mm0,    16
+        movzx       rcx,    word ptr arg(6) ;Dc
+        psllq       mm0,    16
+        movd        mm7,    rcx
+        por         mm0,    mm7
+
+        movsxd      rax,            dword ptr arg(4) ;pitch
+        movsxd      rdi,            dword ptr arg(5) ;stride
 
         psubw       mm0,            mm2             ; b1= 0-2
         paddw       mm2,            mm2             ;
@@ -381,13 +411,34 @@
         punpckldq   mm2,            mm4             ; 32 22 12 02
         punpckhdq   mm5,            mm4             ; 33 23 13 03
 
-        movq        [rdx],          mm0
+        pxor        mm7,            mm7
 
-        movq        [rdx+rax],      mm1
-        movq        [rdx+rax*2],    mm2
+        movd        mm4,            [rsi]
+        punpcklbw   mm4,            mm7
+        paddsw      mm0,            mm4
+        packuswb    mm0,            mm7
+        movd        [rdx],          mm0
 
-        add         rdx,            rax
-        movq        [rdx+rax*2],    mm5
+        movd        mm4,            [rsi+rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm1,            mm4
+        packuswb    mm1,            mm7
+        movd        [rdx+rdi],      mm1
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm2,            mm4
+        packuswb    mm2,            mm7
+        movd        [rdx+rdi*2],    mm2
+
+        add         rdx,            rdi
+        add         rsi,            rax
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm5,            mm4
+        packuswb    mm5,            mm7
+        movd        [rdx+rdi*2],    mm5
 
     ; begin epilog
     pop rdi
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 5def406..dc68daa 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -20,19 +21,48 @@
  */
 #if HAVE_MMX
 extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct(vp8_dequant_idct_mmx);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx);
-
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_mmx
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_mmx
+#undef  vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
 
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx
+#undef  vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
+
+#endif
+#endif
+
+#if HAVE_SSE2
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
 
 #endif
 #endif
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
new file mode 100644
index 0000000..78c91d3
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
new file mode 100644
index 0000000..0273d6e
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void idct_dequant_dc_0_2x_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int dst_stride, short *dc);
+void idct_dequant_dc_full_2x_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int dst_stride, short *dc);
+
+void idct_dequant_0_2x_sse2
+            (short *q, short *dq ,unsigned char *pre,
+             unsigned char *dst, int dst_stride, int blk_stride);
+void idct_dequant_full_2x_sse2
+            (short *q, short *dq ,unsigned char *pre,
+             unsigned char *dst, int dst_stride, int blk_stride);
+
+void vp8_dequant_dc_idct_add_y_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (((short *)(eobs))[0] & 0xfefe)
+            idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+        else
+            idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+
+        if (((short *)(eobs))[1] & 0xfefe)
+            idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+        else
+            idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += stride*4;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (((short *)(eobs))[0] & 0xfefe)
+            idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+        else
+            idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+
+        if (((short *)(eobs))[1] & 0xfefe)
+            idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+        else
+            idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+
+        q    += 64;
+        pre  += 64;
+        dst  += stride*4;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    if (((short *)(eobs))[0] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+    dstu += stride*4;
+
+    if (((short *)(eobs))[1] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+
+    if (((short *)(eobs))[2] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+    dstv += stride*4;
+
+    if (((short *)(eobs))[3] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+}
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 75a676a..50293c7 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 6d7cc36..47e346d 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -38,14 +39,24 @@
 #if CONFIG_RUNTIME_CPU_DETECT
     /* Override default functions with fastest ones for this CPU. */
 #if HAVE_MMX
-
     if (flags & HAS_MMX)
     {
-        pbi->dequant.block   = vp8_dequantize_b_mmx;
-        pbi->dequant.idct    = vp8_dequant_idct_mmx;
-        pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx;
+        pbi->dequant.block               = vp8_dequantize_b_mmx;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_mmx;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_mmx;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_mmx;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_mmx;
     }
-
 #endif
+#if HAVE_SSE2
+    if (flags & HAS_SSE2)
+    {
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_sse2;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_sse2;
+    }
+#endif
+
 #endif
 }
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
index cb2221c..e3b953e 100644
--- a/vp8/decoder/xprintf.c
+++ b/vp8/decoder/xprintf.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
index 2f175e9..f83dd39 100644
--- a/vp8/decoder/xprintf.h
+++ b/vp8/decoder/xprintf.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm
index 608c9ae..61ffdb3 100644
--- a/vp8/encoder/arm/armv6/walsh_v6.asm
+++ b/vp8/encoder/arm/armv6/walsh_v6.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
     EXPORT |vp8_short_walsh4x4_armv6|
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index e70b3ad..fe8e70c 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 0039796..8d70d63 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -62,7 +63,11 @@
     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
 
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
+    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
+    /* The neon quantizer has not been updated to match the new exact
+     * quantizer introduced in commit e04e2935
+     */
+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
 #elif HAVE_ARMV6
     cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
     cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index a671862..774599b 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index 3f1d053..cc9e014 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index 28f9e5c..eb69943 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 07f2186..4e95c47 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/neon/boolhuff_armv7.asm
index 9a5f366..9c4823c 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/neon/boolhuff_armv7.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
index d5dec44..8c191a7 100644
--- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
index de1c254..ca351a1 100644
--- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 1107037..ca1ea9c 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm
index 6169f10..d7c590e 100644
--- a/vp8/encoder/arm/neon/sad16_neon.asm
+++ b/vp8/encoder/arm/neon/sad16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm
index 28604dd..23ba6df 100644
--- a/vp8/encoder/arm/neon/sad8_neon.asm
+++ b/vp8/encoder/arm/neon/sad8_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 26bc0d0..5af5cb8 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 8781ca0..3ea00f8 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index 64b83ca..e1a4686 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index f26b4d7..b0450e5 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index f535967..6af4e87 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
index 9c52c52..c19ac82 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
index 92b0989..0756455 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
index 6d5f882..10a3d98 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
index 5269c0a..ba3decf 100644
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index aec716e..1b09cfe 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 3d02d7c..1c1441c 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index bd56761..cf4da62 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index 0586e55..b2d8f2b 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 46906d3..50f58bf 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index e93f0fe..5f9155e 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -14,8 +15,11 @@
 #if HAVE_ARMV7
 extern prototype_quantize_block(vp8_fast_quantize_b_neon);
 
-#undef  vp8_quantize_fastquantb
-#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
+/* The neon quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ */
+//#undef  vp8_quantize_fastquantb
+//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
 
 #endif
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index d9fc9b3..859e43f 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
index 8cdf079..c595ca3 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 31ad56a..929c178 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -791,7 +792,8 @@
 
     assert(NEARESTMV <= m  &&  m <= SPLITMV);
 
-    vp8_write_token(w, vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+    vp8_write_token(w, vp8_mv_ref_tree, p,
+                    vp8_mv_ref_encoding_array - NEARESTMV + m);
 }
 
 static void write_sub_mv_ref
@@ -801,7 +803,8 @@
 {
     assert(LEFT4X4 <= m  &&  m <= NEW4X4);
 
-    vp8_write_token(w, vp8_sub_mv_ref_tree, p, VP8_SUBMVREFENCODINGS + m);
+    vp8_write_token(w, vp8_sub_mv_ref_tree, p,
+                    vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }
 
 static void write_mv
@@ -869,6 +872,8 @@
     int prob_skip_false = 0;
     ms = pc->mi - 1;
 
+    cpi->mb.partition_info = cpi->mb.pi;
+
     // Calculate the probabilities to be used to code the reference frame based on actual useage this frame
     if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
         cpi->prob_intra_coded = 1;
@@ -1017,7 +1022,7 @@
 
                     do
                     {
-                        const B_MODE_INFO *const b = mi->partition_bmi + j;
+                        const B_MODE_INFO *const b = cpi->mb.partition_info->bmi + j;
                         const int *const  L = vp8_mbsplits [mi->partitioning];
                         int k = -1;  /* first block in subset j */
                         int mv_contz;
@@ -1039,7 +1044,7 @@
                             write_mv(w, &b->mv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
                         }
                     }
-                    while (++j < mi->partition_count);
+                    while (++j < cpi->mb.partition_info->count);
                 }
                 break;
                 default:
@@ -1048,9 +1053,11 @@
             }
 
             ++m;
+            cpi->mb.partition_info++;
         }
 
         ++m;  /* skip L prediction border */
+        cpi->mb.partition_info++;
     }
 }
 
@@ -1385,8 +1392,6 @@
     // every keyframe send startcode, width, height, scale factor, clamp and color type
     if (oh.type == KEY_FRAME)
     {
-        int w, h, hs, vs;
-
         // Start / synch code
         cx_data[0] = 0x9D;
         cx_data[1] = 0x01;
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index ee69f66..5596313 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index cc4cbe0..ffb8890 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -32,6 +33,7 @@
 
     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
     short(*quant)[4];
+    short(*quant_shift)[4];
     short(*zbin)[4];
     short(*zrun_zbin_boost);
     short(*round)[4];
@@ -50,6 +52,12 @@
 
 typedef struct
 {
+    int count;
+    B_MODE_INFO bmi[16];
+} PARTITION_INFO;
+
+typedef struct
+{
     DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
     DECLARE_ALIGNED(16, short, coeff[400]);     // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
 
@@ -59,6 +67,9 @@
     YV12_BUFFER_CONFIG src;
 
     MACROBLOCKD e_mbd;
+    PARTITION_INFO *partition_info; /* work pointer */
+    PARTITION_INFO *pi;   /* Corresponds to upper left visible macroblock */
+    PARTITION_INFO *pip;  /* Base of allocated array */
 
     search_site *ss;
     int ss_count;
@@ -91,6 +102,9 @@
 
     int encode_breakout;
 
+    //char * gf_active_ptr;
+    signed char *gf_active_ptr;
+
     unsigned char *active_ptr;
     MV_CONTEXT *mvc;
 
@@ -99,15 +113,8 @@
 
     void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-    void (*short_fdct4x4rd)(short *input, short *output, int pitch);
-    void (*short_fdct8x4rd)(short *input, short *output, int pitch);
-    void (*vp8_short_fdct4x4_ptr)(short *input, short *output, int pitch);
     void (*short_walsh4x4)(short *input, short *output, int pitch);
-
     void (*quantize_b)(BLOCK *b, BLOCKD *d);
-    void (*quantize_brd)(BLOCK *b, BLOCKD *d);
-
-
 
 } MACROBLOCK;
 
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index c101384..82006b1 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 0d929f0..f723da3 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 5207e39..b5a11ae 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -1,172 +1,64 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #include <math.h>
 
-
-static const short dct_matrix2[4][4] =
-{
-    { 23170,  30274,  23170, 12540 },
-    { 23170,  12540, -23170, -30274 },
-    { 23170, -12540, -23170, 30274 },
-    { 23170, -30274,  23170, -12540 }
-};
-
-static const short dct_matrix1[4][4] =
-{
-    { 23170,  23170,  23170,  23170 },
-    { 30274,  12540, -12540, -30274 },
-    { 23170, -23170, -23170,  23170 },
-    { 12540, -30274,  30274, -12540 }
-};
-
-
-#define _1STSTAGESHIFT           14
-#define _1STSTAGEROUNDING        (1<<( _1STSTAGESHIFT-1))
-#define _2NDSTAGESHIFT           16
-#define _2NDSTAGEROUNDING        (1<<( _2NDSTAGESHIFT-1))
-
-// using matrix multiply
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
 {
-    int i, j, k;
-    short temp[4][4];
-    int sumtemp;
-    pitch >>= 1;
-
-    for (i = 0; i < 4; i++)
-    {
-        for (j = 0; j < 4; j++)
-        {
-            sumtemp = 0;
-
-            for (k = 0; k < 4; k++)
-            {
-                sumtemp += input[i*pitch+k] * dct_matrix2[k][j];
-
-            }
-
-            temp[i][j] = (short)((sumtemp + _1STSTAGEROUNDING) >> _1STSTAGESHIFT);
-        }
-    }
-
-
-    for (i = 0; i < 4; i++)
-    {
-        for (j = 0; j < 4; j++)
-        {
-            sumtemp = 0;
-
-            for (k = 0; k < 4; k++)
-            {
-                sumtemp += dct_matrix1[i][ k] * temp[k][ j];
-            }
-
-            output[i*4+j] = (short)((sumtemp + _2NDSTAGEROUNDING) >> _2NDSTAGESHIFT);
-        }
-    }
-
-}
-
-
-void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
-{
-    vp8_short_fdct4x4_c(input,   output,    pitch);
-    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
-
-
-static const signed short x_c1 = 60547;
-static const signed short x_c2 = 46341;
-static const signed short x_c3 = 25080;
-
-void vp8_fast_fdct4x4_c(short *input, short *output, int pitch)
-{
     int i;
     int a1, b1, c1, d1;
-    int a2, b2, c2, d2;
     short *ip = input;
-
     short *op = output;
-    int temp1, temp2;
 
     for (i = 0; i < 4; i++)
     {
-        a1 = (ip[0] + ip[3]) * 2;
-        b1 = (ip[1] + ip[2]) * 2;
-        c1 = (ip[1] - ip[2]) * 2;
-        d1 = (ip[0] - ip[3]) * 2;
+        a1 = ((ip[0] + ip[3])<<3);
+        b1 = ((ip[1] + ip[2])<<3);
+        c1 = ((ip[1] - ip[2])<<3);
+        d1 = ((ip[0] - ip[3])<<3);
 
-        temp1 = a1 + b1;
-        temp2 = a1 - b1;
+        op[0] = a1 + b1;
+        op[2] = a1 - b1;
 
-        op[0] = ((temp1 * x_c2) >> 16) + temp1;
-        op[2] = ((temp2 * x_c2) >> 16) + temp2;
-
-        temp1 = (c1 * x_c3) >> 16;
-        temp2 = ((d1 * x_c1) >> 16) + d1;
-
-        op[1] = temp1 + temp2;
-
-        temp1 = (d1 * x_c3) >> 16;
-        temp2 = ((c1 * x_c1) >> 16) + c1;
-
-        op[3] = temp1 - temp2;
+        op[1] = (c1 * 2217 + d1 * 5352 +  14500)>>12;
+        op[3] = (d1 * 2217 - c1 * 5352 +   7500)>>12;
 
         ip += pitch / 2;
         op += 4;
-    }
 
+    }
     ip = output;
     op = output;
-
     for (i = 0; i < 4; i++)
     {
-
         a1 = ip[0] + ip[12];
         b1 = ip[4] + ip[8];
         c1 = ip[4] - ip[8];
         d1 = ip[0] - ip[12];
 
+        op[0]  = ( a1 + b1 + 7)>>4;
+        op[8]  = ( a1 - b1 + 7)>>4;
 
-        temp1 = a1 + b1;
-        temp2 = a1 - b1;
-
-        a2 = ((temp1 * x_c2) >> 16) + temp1;
-        c2 = ((temp2 * x_c2) >> 16) + temp2;
-
-        temp1 = (c1 * x_c3) >> 16;
-        temp2 = ((d1 * x_c1) >> 16) + d1;
-
-        b2 = temp1 + temp2;
-
-        temp1 = (d1 * x_c3) >> 16;
-        temp2 = ((c1 * x_c1) >> 16) + c1;
-
-        d2 = temp1 - temp2;
-
-
-        op[0]   = (a2 + 1) >> 1;
-        op[4]   = (b2 + 1) >> 1;
-        op[8]   = (c2 + 1) >> 1;
-        op[12]  = (d2 + 1) >> 1;
+        op[4]  =((c1 * 2217 + d1 * 5352 +  12000)>>16) + (d1!=0);
+        op[12] = (d1 * 2217 - c1 * 5352 +  51000)>>16;
 
         ip++;
         op++;
     }
 }
 
-void vp8_fast_fdct8x4_c(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
 {
-    vp8_fast_fdct4x4_c(input,   output,    pitch);
-    vp8_fast_fdct4x4_c(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
 void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
@@ -177,17 +69,18 @@
     short *ip = input;
     short *op = output;
 
+
     for (i = 0; i < 4; i++)
     {
-        a1 = ip[0] + ip[3];
-        b1 = ip[1] + ip[2];
-        c1 = ip[1] - ip[2];
-        d1 = ip[0] - ip[3];
+        a1 = ((ip[0] + ip[2])<<2);
+        d1 = ((ip[1] + ip[3])<<2);
+        c1 = ((ip[1] - ip[3])<<2);
+        b1 = ((ip[0] - ip[2])<<2);
 
-        op[0] = a1 + b1;
-        op[1] = c1 + d1;
-        op[2] = a1 - b1;
-        op[3] = d1 - c1;
+        op[0] = a1 + d1 + (a1!=0);
+        op[1] = b1 + c1;
+        op[2] = b1 - c1;
+        op[3] = a1 - d1;
         ip += pitch / 2;
         op += 4;
     }
@@ -197,25 +90,25 @@
 
     for (i = 0; i < 4; i++)
     {
-        a1 = ip[0] + ip[12];
-        b1 = ip[4] + ip[8];
-        c1 = ip[4] - ip[8];
-        d1 = ip[0] - ip[12];
+        a1 = ip[0] + ip[8];
+        d1 = ip[4] + ip[12];
+        c1 = ip[4] - ip[12];
+        b1 = ip[0] - ip[8];
 
-        a2 = a1 + b1;
-        b2 = c1 + d1;
-        c2 = a1 - b1;
-        d2 = d1 - c1;
+        a2 = a1 + d1;
+        b2 = b1 + c1;
+        c2 = b1 - c1;
+        d2 = a1 - d1;
 
-        a2 += (a2 > 0);
-        b2 += (b2 > 0);
-        c2 += (c2 > 0);
-        d2 += (d2 > 0);
+        a2 += a2<0;
+        b2 += b2<0;
+        c2 += c2<0;
+        d2 += d2<0;
 
-        op[0] = (a2) >> 1;
-        op[4] = (b2) >> 1;
-        op[8] = (c2) >> 1;
-        op[12] = (d2) >> 1;
+        op[0] = (a2+3) >> 3;
+        op[4] = (b2+3) >> 3;
+        op[8] = (c2+3) >> 3;
+        op[12]= (d2+3) >> 3;
 
         ip++;
         op++;
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index fb307cf..fec3b4c 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -31,15 +32,14 @@
 #endif
 extern prototype_fdct(vp8_fdct_short8x4);
 
+// There is no fast4x4 (for now)
 #ifndef vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4  vp8_fast_fdct4x4_c
+#define vp8_fdct_fast4x4  vp8_short_fdct4x4_c
 #endif
-extern prototype_fdct(vp8_fdct_fast4x4);
 
 #ifndef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4  vp8_fast_fdct8x4_c
+#define vp8_fdct_fast8x4  vp8_short_fdct8x4_c
 #endif
-extern prototype_fdct(vp8_fdct_fast8x4);
 
 #ifndef vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_c
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index a4e3772..d8a76d5 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,7 +17,7 @@
 #include "extend.h"
 #include "entropymode.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
 #include "setupintrarecon.h"
 #include "encodeintra.h"
 #include "reconinter.h"
@@ -59,10 +60,9 @@
 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 #endif
 
-// The first four entries are dummy values
 static const int qrounding_factors[129] =
 {
-    56, 56, 56, 56, 56, 56, 56, 56,
+    56, 56, 56, 56, 48, 48, 56, 56,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
@@ -83,7 +83,7 @@
 
 static const int qzbin_factors[129] =
 {
-    64, 64, 64, 64, 80, 80, 80, 80,
+    72, 72, 72, 72, 80, 80, 72, 72,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
@@ -102,6 +102,132 @@
     80,
 };
 
+static const int qrounding_factors_y2[129] =
+{
+    56, 56, 56, 56, 48, 48, 56, 56,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48,
+};
+
+static const int qzbin_factors_y2[129] =
+{
+    72, 72, 72, 72, 80, 80, 72, 72,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80,
+};
+
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+    unsigned t;
+    int l;
+    t = d;
+    for(l = 0; t > 1; l++)
+        t>>=1;
+    t = 1 + (1<<(16+l))/d;
+    *quant = (short)(t - (1<<16));
+    *shift = l;
+}
+
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+    int r, c;
+    int i;
+    int quant_val;
+    int Q;
+
+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+
+    for (Q = 0; Q < QINDEX_RANGE; Q++)
+    {
+        // dc values
+        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
+        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
+                           cpi->UVquant_shift[Q][0] + 0, quant_val);
+        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        // all the ac values = ;
+        for (i = 1; i < 16; i++)
+        {
+            int rc = vp8_default_zig_zag1d[i];
+            r = (rc >> 2);
+            c = (rc & 3);
+
+            quant_val = vp8_ac_yquant(Q);
+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
+                               cpi->Y1quant_shift[Q][r] + c, quant_val);
+            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
+                               cpi->Y2quant_shift[Q][r] + c, quant_val);
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
+                               cpi->UVquant_shift[Q][r] + c, quant_val);
+            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+        }
+    }
+}
+#else
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
     int r, c;
@@ -123,8 +249,8 @@
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
         cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
         cpi->common.Y2dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
@@ -151,8 +277,8 @@
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
             cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
             cpi->common.Y2dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
@@ -165,13 +291,12 @@
         }
     }
 }
-
+#endif
 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 {
     int i;
     int QIndex;
     MACROBLOCKD *xd = &x->e_mbd;
-    MB_MODE_INFO *mbmi = &xd->mbmi;
     int zbin_extra;
 
     // Select the baseline MB Q index.
@@ -179,12 +304,12 @@
     {
         // Abs Value
         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
 
+            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
         // Delta Value
         else
         {
-            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
         }
     }
@@ -197,6 +322,7 @@
     for (i = 0; i < 16; i++)
     {
         x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
         x->block[i].zbin = cpi->Y1zbin[QIndex];
         x->block[i].round = cpi->Y1round[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@@ -210,6 +336,7 @@
     for (i = 16; i < 24; i++)
     {
         x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
         x->block[i].zbin = cpi->UVzbin[QIndex];
         x->block[i].round = cpi->UVround[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@@ -220,6 +347,7 @@
     // Y2
     zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
     x->block[24].quant = cpi->Y2quant[QIndex];
+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
     x->block[24].zbin = cpi->Y2zbin[QIndex];
     x->block[24].round = cpi->Y2round[QIndex];
     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
@@ -255,16 +383,15 @@
     int i;
     int recon_yoffset, recon_uvoffset;
     int mb_col;
-    int recon_y_stride = cm->last_frame.y_stride;
-    int recon_uv_stride = cm->last_frame.uv_stride;
+    int ref_fb_idx = cm->lst_fb_idx;
+    int dst_fb_idx = cm->new_fb_idx;
+    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
     int seg_map_index = (mb_row * cpi->common.mb_cols);
 
 
     // reset above block coeffs
-    xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
-    xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
-    xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
-    xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
+    xd->above_context = cm->above_context;
 
     xd->up_available = (mb_row != 0);
     recon_yoffset = (mb_row * recon_y_stride * 16);
@@ -289,9 +416,9 @@
         x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
         x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
 
-        xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-        xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
         xd->left_available = (mb_col != 0);
 
         // Is segmentation enabled
@@ -300,14 +427,14 @@
         {
             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
-                xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
             else
-                xd->mbmi.segment_id = 0;
+                xd->mode_info_context->mbmi.segment_id = 0;
 
             vp8cx_mb_init_quantizer(cpi, x);
         }
         else
-            xd->mbmi.segment_id = 0;         // Set to Segment 0 by default
+            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 
         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 
@@ -331,14 +458,14 @@
 
                 for (b = 0; b < xd->mbmi.partition_count; b++)
                 {
-                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                    inter_b_modes[x->partition->bmi[b].mode] ++;
                 }
             }
 
 #endif
 
             // Count of last ref frame 0,0 useage
-            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 cpi->inter_zz_count ++;
 
             // Special case code for cyclic refresh
@@ -346,14 +473,14 @@
             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
             {
-                cpi->segmentation_map[seg_map_index+mb_col] = xd->mbmi.segment_id;
+                cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 
                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
                 // else mark it as dirty (1).
-                if (xd->mbmi.segment_id)
+                if (xd->mode_info_context->mbmi.segment_id)
                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
-                else if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+                else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 {
                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
@@ -366,10 +493,7 @@
 
         cpi->tplist[mb_row].stop = *tp;
 
-        xd->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
-
-        // store macroblock mode info into context array
-        vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
+        x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
         for (i = 0; i < 16; i++)
             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
@@ -383,27 +507,26 @@
         recon_uvoffset += 8;
 
         // Keep track of segment useage
-        segment_counts[xd->mbmi.segment_id] ++;
+        segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 
         // skip to next mb
         xd->mode_info_context++;
+        x->partition_info++;
 
-        xd->above_context[Y1CONTEXT] += 4;
-        xd->above_context[UCONTEXT ] += 2;
-        xd->above_context[VCONTEXT ] += 2;
-        xd->above_context[Y2CONTEXT] ++;
+        xd->above_context++;
         cpi->current_mb_col_main = mb_col;
     }
 
     //extend the recon for intra prediction
     vp8_extend_mb_row(
-        &cm->new_frame,
+        &cm->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16,
         xd->dst.u_buffer + 8,
         xd->dst.v_buffer + 8);
 
     // this is to account for the border
     xd->mode_info_context++;
+    x->partition_info++;
 }
 
 
@@ -448,7 +571,7 @@
     //}
 
 
-    xd->gf_active_ptr = (signed char *)cm->gf_active_flags;     // Point to base of GF active flags data structure
+    x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 
     x->vector_range = 32;
 
@@ -467,13 +590,13 @@
 
 #if 0
     // Experimental code
-    cpi->frame_distortion = 0;     
+    cpi->frame_distortion = 0;
     cpi->last_mb_distortion = 0;
 #endif
 
     totalrate = 0;
 
-    xd->mode_info = cm->mi - 1;
+    x->partition_info = x->pi;
 
     xd->mode_info_context = cm->mi;
     xd->mode_info_stride = cm->mode_info_stride;
@@ -509,12 +632,12 @@
     // Copy data over into macro block data sturctures.
 
     x->src = * cpi->Source;
-    xd->pre = cm->last_frame;
-    xd->dst = cm->new_frame;
+    xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+    xd->dst = cm->yv12_fb[cm->new_fb_idx];
 
     // set up frame new frame for intra coded blocks
 
-    vp8_setup_intra_recon(&cm->new_frame);
+    vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 
     vp8_build_block_offsets(x);
 
@@ -539,10 +662,10 @@
     //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
 #endif
 
-    xd->mbmi.mode = DC_PRED;
-    xd->mbmi.uv_mode = DC_PRED;
+    xd->mode_info_context->mbmi.mode = DC_PRED;
+    xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 
-    xd->left_context = cm->left_context;
+    xd->left_context = &cm->left_context;
 
     vp8_zero(cpi->count_mb_ref_frame_usage)
     vp8_zero(cpi->ymode_count)
@@ -550,17 +673,7 @@
 
     x->mvc = cm->fc.mvc;
 
-    // vp8_zero( entropy_stats)
-    {
-        ENTROPY_CONTEXT **p = cm->above_context;
-        const size_t L = cm->mb_cols;
-
-        vp8_zero_array(p [Y1CONTEXT], L * 4)
-        vp8_zero_array(p [ UCONTEXT], L * 2)
-        vp8_zero_array(p [ VCONTEXT], L * 2)
-        vp8_zero_array(p [Y2CONTEXT], L)
-    }
-
+    vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 
     {
         struct vpx_usec_timer  emr_timer;
@@ -619,6 +732,7 @@
                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 
                 if (mb_row < cm->mb_rows - 1)
                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
@@ -894,8 +1008,8 @@
 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
 {
     const MACROBLOCKD *xd = & x->e_mbd;
-    const MB_PREDICTION_MODE m = xd->mbmi.mode;
-    const MB_PREDICTION_MODE uvm = xd->mbmi.uv_mode;
+    const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
+    const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
 
 #ifdef MODE_STATS
     const int is_key = cpi->common.frame_type == KEY_FRAME;
@@ -933,7 +1047,7 @@
     int rateuv_tokenonly = 0;
     int i;
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
 #if !(CONFIG_REALTIME_ONLY)
 
@@ -949,7 +1063,7 @@
 
         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
 
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
         rate += rateuv;
@@ -957,7 +1071,7 @@
         if (Error4x4 < Error16x16)
         {
             rate += rate4x4;
-            x->e_mbd.mbmi.mode = B_PRED;
+            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
 
             // get back the intra block modes
             for (i = 0; i < 16; i++)
@@ -997,7 +1111,7 @@
 
         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
         {
-            x->e_mbd.mbmi.mode = mode;
+            x->e_mbd.mode_info_context->mbmi.mode = mode;
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
@@ -1017,17 +1131,17 @@
         else
             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         if (Error4x4 < Error16x16)
         {
-            x->e_mbd.mbmi.mode = B_PRED;
+            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
             cpi->prediction_error += Error4x4;
         }
         else
         {
-            x->e_mbd.mbmi.mode = best_mode;
+            x->e_mbd.mode_info_context->mbmi.mode = best_mode;
             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
             cpi->prediction_error += Error16x16;
         }
@@ -1044,7 +1158,7 @@
 extern int cnt_pm;
 #endif
 
-extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+extern void vp8_fix_contexts(MACROBLOCKD *x);
 
 int vp8cx_encode_inter_macroblock
 (
@@ -1061,7 +1175,7 @@
     x->skip = 0;
 
     if (xd->segmentation_enabled)
-        x->encode_breakout = cpi->segment_encode_breakout[xd->mbmi.segment_id];
+        x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
     else
         x->encode_breakout = cpi->oxcf.encode_breakout;
 
@@ -1092,17 +1206,17 @@
         if (cpi->cyclic_refresh_mode_enabled)
         {
             // Clear segment_id back to 0 if not coded (last frame 0,0)
-            if ((xd->mbmi.segment_id == 1) &&
-                ((xd->mbmi.ref_frame != LAST_FRAME) || (xd->mbmi.mode != ZEROMV)))
+            if ((xd->mode_info_context->mbmi.segment_id == 1) &&
+                ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
             {
-                xd->mbmi.segment_id = 0;
+                xd->mode_info_context->mbmi.segment_id = 0;
             }
         }
 
         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
         if (cpi->zbin_mode_boost_enabled)
         {
-            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame != LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
                 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
             else
                 cpi->zbin_mode_boost = 0;
@@ -1111,15 +1225,15 @@
         vp8cx_mb_init_quantizer(cpi,  x);
     }
 
-    cpi->count_mb_ref_frame_usage[xd->mbmi.ref_frame] ++;
+    cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
 
-    if (xd->mbmi.ref_frame == INTRA_FRAME)
+    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
 
-        if (xd->mbmi.mode == B_PRED)
+        if (xd->mode_info_context->mbmi.mode == B_PRED)
         {
             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
         }
@@ -1135,36 +1249,25 @@
         MV best_ref_mv;
         MV nearest, nearby;
         int mdcounts[4];
+        int ref_fb_idx;
 
         vp8_find_near_mvs(xd, xd->mode_info_context,
-                          &nearest, &nearby, &best_ref_mv, mdcounts, xd->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+                          &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
         vp8_build_uvmvs(xd, cpi->common.full_pixel);
 
-        // store motion vectors in our motion vector list
-        if (xd->mbmi.ref_frame == LAST_FRAME)
-        {
-            // Set up pointers for this macro block into the previous frame recon buffer
-            xd->pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
-        }
-        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-        {
-            // Set up pointers for this macro block into the golden frame recon buffer
-            xd->pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
-        }
+        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+            ref_fb_idx = cpi->common.lst_fb_idx;
+        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+            ref_fb_idx = cpi->common.gld_fb_idx;
         else
-        {
-            // Set up pointers for this macro block into the alternate reference frame recon buffer
-            xd->pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = cpi->common.alt_fb_idx;
 
-        if (xd->mbmi.mode == SPLITMV)
+        xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+        xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+        xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+        if (xd->mode_info_context->mbmi.mode == SPLITMV)
         {
             int i;
 
@@ -1177,19 +1280,19 @@
                 }
             }
         }
-        else if (xd->mbmi.mode == NEWMV)
+        else if (xd->mode_info_context->mbmi.mode == NEWMV)
         {
             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
         }
 
-        if (!x->skip && !x->e_mbd.mbmi.force_no_skip)
+        if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
         {
             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
 
             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
             if (!cpi->common.mb_no_coeff_skip)
-                xd->mbmi.mb_skip_coeff = 0;
+                xd->mode_info_context->mbmi.mb_skip_coeff = 0;
 
         }
         else
@@ -1202,19 +1305,19 @@
     {
         if (cpi->common.mb_no_coeff_skip)
         {
-            if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
-                xd->mbmi.dc_diff = 0;
+            if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+                xd->mode_info_context->mbmi.dc_diff = 0;
             else
-                xd->mbmi.dc_diff = 1;
+                xd->mode_info_context->mbmi.dc_diff = 1;
 
-            xd->mbmi.mb_skip_coeff = 1;
+            xd->mode_info_context->mbmi.mb_skip_coeff = 1;
             cpi->skip_true_count ++;
-            vp8_fix_contexts(cpi, xd);
+            vp8_fix_contexts(xd);
         }
         else
         {
             vp8_stuff_mb(cpi, xd, t);
-            xd->mbmi.mb_skip_coeff = 0;
+            xd->mode_info_context->mbmi.mb_skip_coeff = 0;
             cpi->skip_false_count ++;
         }
     }
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 403d020..af80857 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -52,7 +53,7 @@
 
     x->quantize_b(be, b);
 
-    x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
 
     vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
 
@@ -65,11 +66,11 @@
 
     ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
-    x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 
-    x->quantize_brd(be, b);
+    x->quantize_b(be, b);
 
-    x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
 
     IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
 
@@ -108,7 +109,6 @@
 
 #if !(CONFIG_REALTIME_ONLY)
 #if 1
-
     if (x->optimize && x->rddiv > 1)
         vp8_optimize_mby(x, rtcd);
 
@@ -124,7 +124,7 @@
     {
         BLOCKD *d = &x->e_mbd.block[b];
 
-        switch (x->e_mbd.mbmi.mode)
+        switch (x->e_mbd.mode_info_context->mbmi.mode)
         {
 
         case DC_PRED:
@@ -155,12 +155,11 @@
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
 
-    vp8_transform_intra_mbyrd(x);
+    vp8_transform_intra_mby(x);
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
 
-    vp8_quantize_mbyrd(x);
-
+    vp8_quantize_mby(x);
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
@@ -171,7 +170,7 @@
     {
         BLOCKD *d = &x->e_mbd.block[b];
 
-        switch (x->e_mbd.mbmi.mode)
+        switch (x->e_mbd.mode_info_context->mbmi.mode)
         {
 
         case DC_PRED:
@@ -224,11 +223,9 @@
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 
-    vp8_transform_mbuvrd(x);
+    vp8_transform_mbuv(x);
 
-    vp8_quantize_mbuvrd(x);
-
-
+    vp8_quantize_mbuv(x);
 
     vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index 4a43ab2..5be23d1 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index d825133..e10b515 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -12,6 +13,7 @@
 #include "encodemb.h"
 #include "reconinter.h"
 #include "quantize.h"
+#include "tokenize.h"
 #include "invtrans.h"
 #include "recon.h"
 #include "reconintra.h"
@@ -119,19 +121,11 @@
 
     for (i = 16; i < 24; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 16);
     }
 }
 
-void vp8_transform_mbuvrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i += 2)
-    {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
-    }
-}
 
 void vp8_transform_intra_mby(MACROBLOCK *x)
 {
@@ -139,32 +133,19 @@
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
     vp8_build_dcblock(x);
 
     // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+    x->short_walsh4x4(&x->block[24].src_diff[0],
+        &x->block[24].coeff[0], 8);
 
 }
 
-void vp8_transform_intra_mbyrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 0; i < 16; i += 2)
-    {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
-    }
-
-    // build dc block from 16 y dc values
-    vp8_build_dcblock(x);
-
-    // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
-}
 
 void vp8_transform_mb(MACROBLOCK *x)
 {
@@ -172,21 +153,24 @@
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
         vp8_build_dcblock(x);
 
     for (i = 16; i < 24; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 16);
     }
 
     // do 2nd order transform on the dc block
-    if (x->e_mbd.mbmi.mode != SPLITMV)
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
+        x->short_walsh4x4(&x->block[24].src_diff[0],
+        &x->block[24].coeff[0], 8);
 
 }
 
@@ -196,39 +180,19 @@
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
     {
         vp8_build_dcblock(x);
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+        x->short_walsh4x4(&x->block[24].src_diff[0],
+            &x->block[24].coeff[0], 8);
     }
 }
 
-void vp8_transform_mbrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 0; i < 16; i += 2)
-    {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
-    }
-
-    // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
-        vp8_build_dcblock(x);
-
-    for (i = 16; i < 24; i += 2)
-    {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
-    }
-
-    // do 2nd order transform on the dc block
-    if (x->e_mbd.mbmi.mode != SPLITMV)
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
-}
 
 void vp8_stuff_inter16x16(MACROBLOCK *x)
 {
@@ -265,792 +229,411 @@
 }
 
 #if !(CONFIG_REALTIME_ONLY)
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
+#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
 
-static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
+typedef struct vp8_token_state vp8_token_state;
+
+struct vp8_token_state{
+  int           rate;
+  int           error;
+  signed char   next;
+  signed char   token;
+  short         qc;
+};
+
+void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
+                    ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+                    const VP8_ENCODER_RTCD *rtcd)
 {
-    int c = !type;              /* start at coef 0, unless Y with Y2 */
-    int eob = b->eob;
-    int pt ;    /* surrounding block/prev coef predictor */
-    int cost = 0;
-    short *qcoeff_ptr = b->qcoeff;
+    BLOCK *b;
+    BLOCKD *d;
+    vp8_token_state tokens[17][2];
+    unsigned best_mask[2];
+    const short *dequant_ptr;
+    const short *coeff_ptr;
+    short *qcoeff_ptr;
+    short *dqcoeff_ptr;
+    int eob;
+    int i0;
+    int rc;
+    int x;
+    int sz;
+    int next;
+    int path;
+    int rdmult;
+    int rddiv;
+    int final_eob;
+    int rd_cost0;
+    int rd_cost1;
+    int rate0;
+    int rate1;
+    int error0;
+    int error1;
+    int t0;
+    int t1;
+    int best;
+    int band;
+    int pt;
 
+    b = &mb->block[i];
+    d = &mb->e_mbd.block[i];
+
+    /* Enable this to test the effect of RDO as a replacement for the dynamic
+     *  zero bin instead of an augmentation of it.
+     */
+#if 0
+    vp8_strict_quantize_b(b, d);
+#endif
+
+    dequant_ptr = &d->dequant[0][0];
+    coeff_ptr = &b->coeff[0];
+    qcoeff_ptr = d->qcoeff;
+    dqcoeff_ptr = d->dqcoeff;
+    i0 = !type;
+    eob = d->eob;
+
+    /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+    /* TODO: These should vary with the block type, since the quantizer does. */
+    rdmult = mb->rdmult << 2;
+    rddiv = mb->rddiv;
+    best_mask[0] = best_mask[1] = 0;
+    /* Initialize the sentinel node of the trellis. */
+    tokens[eob][0].rate = 0;
+    tokens[eob][0].error = 0;
+    tokens[eob][0].next = 16;
+    tokens[eob][0].token = DCT_EOB_TOKEN;
+    tokens[eob][0].qc = 0;
+    *(tokens[eob] + 1) = *(tokens[eob] + 0);
+    next = eob;
+    for (i = eob; i-- > i0;)
+    {
+        int base_bits;
+        int d2;
+        int dx;
+
+        rc = vp8_default_zig_zag1d[i];
+        x = qcoeff_ptr[rc];
+        /* Only add a trellis state for non-zero coefficients. */
+        if (x)
+        {
+            int shortcut=0;
+            error0 = tokens[next][0].error;
+            error1 = tokens[next][1].error;
+            /* Evaluate the first possibility for this state. */
+            rate0 = tokens[next][0].rate;
+            rate1 = tokens[next][1].rate;
+            t0 = (vp8_dct_value_tokens_ptr + x)->Token;
+            /* Consider both possible successor states. */
+            if (next < 16)
+            {
+                band = vp8_coef_bands[i + 1];
+                pt = vp8_prev_token_class[t0];
+                rate0 +=
+                    mb->token_costs[type][band][pt][tokens[next][0].token];
+                rate1 +=
+                    mb->token_costs[type][band][pt][tokens[next][1].token];
+            }
+            rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+            rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+            if (rd_cost0 == rd_cost1)
+            {
+                rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+                rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
+            }
+            /* And pick the best. */
+            best = rd_cost1 < rd_cost0;
+            base_bits = *(vp8_dct_value_cost_ptr + x);
+            dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+            d2 = dx*dx;
+            tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
+            tokens[i][0].error = d2 + (best ? error1 : error0);
+            tokens[i][0].next = next;
+            tokens[i][0].token = t0;
+            tokens[i][0].qc = x;
+            best_mask[0] |= best << i;
+            /* Evaluate the second possibility for this state. */
+            rate0 = tokens[next][0].rate;
+            rate1 = tokens[next][1].rate;
+
+            if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) &&
+               (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc]))
+                shortcut = 1;
+            else
+                shortcut = 0;
+
+            if(shortcut)
+            {
+                sz = -(x < 0);
+                x -= 2*sz + 1;
+            }
+
+            /* Consider both possible successor states. */
+            if (!x)
+            {
+                /* If we reduced this coefficient to zero, check to see if
+                 *  we need to move the EOB back here.
+                 */
+                t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
+                    DCT_EOB_TOKEN : ZERO_TOKEN;
+                t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
+                    DCT_EOB_TOKEN : ZERO_TOKEN;
+            }
+            else
+            {
+                t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
+            }
+            if (next < 16)
+            {
+                band = vp8_coef_bands[i + 1];
+                if(t0!=DCT_EOB_TOKEN)
+                {
+                    pt = vp8_prev_token_class[t0];
+                    rate0 += mb->token_costs[type][band][pt][
+                        tokens[next][0].token];
+                }
+                if(t1!=DCT_EOB_TOKEN)
+                {
+                    pt = vp8_prev_token_class[t1];
+                    rate1 += mb->token_costs[type][band][pt][
+                        tokens[next][1].token];
+                }
+            }
+
+            rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+            rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+            if (rd_cost0 == rd_cost1)
+            {
+                rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+                rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
+            }
+            /* And pick the best. */
+            best = rd_cost1 < rd_cost0;
+            base_bits = *(vp8_dct_value_cost_ptr + x);
+
+            if(shortcut)
+            {
+                dx -= (dequant_ptr[rc] + sz) ^ sz;
+                d2 = dx*dx;
+            }
+            tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+            tokens[i][1].error = d2 + (best ? error1 : error0);
+            tokens[i][1].next = next;
+            tokens[i][1].token =best?t1:t0;
+            tokens[i][1].qc = x;
+            best_mask[1] |= best << i;
+            /* Finally, make this the new head of the trellis. */
+            next = i;
+        }
+        /* There's no choice to make for a zero coefficient, so we don't
+         *  add a new trellis node, but we do need to update the costs.
+         */
+        else
+        {
+            band = vp8_coef_bands[i + 1];
+            t0 = tokens[next][0].token;
+            t1 = tokens[next][1].token;
+            /* Update the cost of each path if we're past the EOB token. */
+            if (t0 != DCT_EOB_TOKEN)
+            {
+                tokens[next][0].rate += mb->token_costs[type][band][0][t0];
+                tokens[next][0].token = ZERO_TOKEN;
+            }
+            if (t1 != DCT_EOB_TOKEN)
+            {
+                tokens[next][1].rate += mb->token_costs[type][band][0][t1];
+                tokens[next][1].token = ZERO_TOKEN;
+            }
+            /* Don't update next, because we didn't add a new node. */
+        }
+    }
+
+    /* Now pick the best path through the whole trellis. */
+    band = vp8_coef_bands[i + 1];
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
-# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
-
-    for (; c < eob; c++)
+    rate0 = tokens[next][0].rate;
+    rate1 = tokens[next][1].rate;
+    error0 = tokens[next][0].error;
+    error1 = tokens[next][1].error;
+    t0 = tokens[next][0].token;
+    t1 = tokens[next][1].token;
+    rate0 += mb->token_costs[type][band][pt][t0];
+    rate1 += mb->token_costs[type][band][pt][t1];
+    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+    if (rd_cost0 == rd_cost1)
     {
-        int v = QC(c);
-        int t = vp8_dct_value_tokens_ptr[v].Token;
-        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
-        cost += vp8_dct_value_cost_ptr[v];
-        pt = vp8_prev_token_class[t];
+        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
     }
+    best = rd_cost1 < rd_cost0;
+    final_eob = i0 - 1;
+    for (i = next; i < eob; i = next)
+    {
+        x = tokens[i][best].qc;
+        if (x)
+            final_eob = i;
+        rc = vp8_default_zig_zag1d[i];
+        qcoeff_ptr[rc] = x;
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];
+        next = tokens[i][best].next;
+        best = (best_mask[best] >> i) & 1;
+    }
+    final_eob++;
 
-# undef QC
-
-    if (c < 16)
-        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
-
-    return cost;
+    d->eob = final_eob;
+    *a = *l = (d->eob != !type);
 }
 
-static int mbycost_coeffs(MACROBLOCK *mb)
-{
-    int cost = 0;
-    int b;
-    TEMP_CONTEXT t;
-    int type = 0;
-
-    MACROBLOCKD *x = &mb->e_mbd;
-
-    vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
-
-    if (x->mbmi.mode == SPLITMV)
-        type = 3;
-
-    for (b = 0; b < 16; b++)
-        cost += cost_coeffs(mb, x->block + b, type,
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-
-    return cost;
-}
-
-#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-
-void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-    int nzpos[16] = {0};
-    short saved_qcoefs[16];
-    short saved_dqcoefs[16];
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    // count potential coefficient to be optimized
-    for (k = !type; k < 16; k++)
-    {
-        int qcoef = abs(bd->qcoeff[k]);
-        int coef = abs(b->coeff[k]);
-        int dq   = dequant_ptr[k];
-
-        if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
-        {
-            nzpos[nzcoefcount] = k;
-            nzcoefcount++;
-        }
-    }
-
-    // if nothing here, do nothing for this block.
-    if (!nzcoefcount)
-    {
-        *a = *l = (bd->eob != !type);
-        return;
-    }
-
-    // save a copy of quantized coefficients
-    vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
-    vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
-    besteob   = bd->eob;
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-    baserd    = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(bd->qcoeff,  saved_qcoefs,  32);
-        vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  = cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(bd->qcoeff,  saved_qcoefs, 32);
-    vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if (bestnc & (1 << k))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-#if 0
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-#endif
-    }
-
-#if 1
-    bd->eob = besteob;
-#endif
-#if 0
-    {
-        int eob = -1;
-        int rc;
-        int m;
-
-        for (m = 0; m < 16; m++)
-        {
-            rc   = vp8_default_zig_zag1d[m];
-
-            if (bd->qcoeff[rc])
-                eob = m;
-        }
-
-        bd->eob = eob + 1;
-    }
-
-#endif
-    *a = *l = (bd->eob != !type);
-    return;
-}
-
-void vp8_optimize_bplus(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-    int nzpos[16] = {0};
-    short saved_qcoefs[16];
-    short saved_dqcoefs[16];
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    // count potential coefficient to be optimized
-    for (k = !type; k < 16; k++)
-    {
-        int qcoef = abs(bd->qcoeff[k]);
-        int coef = abs(b->coeff[k]);
-        int dq   = dequant_ptr[k];
-
-        if (qcoef && (qcoef * dq < coef) && (coef < (qcoef * dq + dq)))
-        {
-            nzpos[nzcoefcount] = k;
-            nzcoefcount++;
-        }
-    }
-
-    // if nothing here, do nothing for this block.
-    if (!nzcoefcount)
-    {
-        //do not update context, we need do the other half.
-        //*a = *l = (bd->eob != !type);
-        return;
-    }
-
-    // save a copy of quantized coefficients
-    vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
-    vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
-    besteob   = bd->eob;
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-    baserd    = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
-        vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  = cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(bd->qcoeff,  saved_qcoefs, 32);
-    vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if (bestnc & (1 << k))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-    }
-
-    bd->eob = besteob;
-    //do not update context, we need do the other half.
-    //*a = *l = (bd->eob != !type);
-    return;
-}
-
-void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-
-    if (bd->eob == 0)
-        return;
-
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
-    baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (k = 0; k < 16; k++)
-    {
-        int cur_qcoef = bd->qcoeff[k];
-
-        if (!cur_qcoef)
-            continue;
-
-        if (cur_qcoef < 0)
-        {
-            bd->qcoeff[k]++;
-            bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
-        }
-        else
-        {
-            bd->qcoeff[k]--;
-            bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
-        }
-
-        if (bd->qcoeff[k] == 0)
-        {
-            int eob = -1;
-            int rc;
-            int l;
-
-            for (l = 0; l < 16; l++)
-            {
-                rc   = vp8_default_zig_zag1d[l];
-
-                if (bd->qcoeff[rc])
-                    eob = l;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  =   cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd > baserd)
-        {
-            bd->qcoeff[k] = cur_qcoef;
-            bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k];
-        }
-        else
-        {
-            baserd = thisrd;
-        }
-
-    }
-
-    {
-        int eob = -1;
-        int rc;
-
-        for (k = 0; k < 16; k++)
-        {
-            rc   = vp8_default_zig_zag1d[k];
-
-            if (bd->qcoeff[rc])
-                eob = k;
-        }
-
-        bd->eob = eob + 1;
-    }
-
-    return;
-}
-
-
 void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
-    int cost = 0;
     int b;
-    TEMP_CONTEXT t, t2;
-    int type = 0;
+    int type;
+    int has_2nd_order;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
+    has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+    type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
     {
-        //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, type,
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
-
     for (b = 16; b < 20; b++)
     {
-        //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
     for (b = 20; b < 24; b++)
     {
-        //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
+
+
+    /*
+    if (has_2nd_order)
+    {
+        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
+            x->e_mbd.left_context[Y2CONTEXT], 1);
+        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+    }
+    */
 }
 
 
 
-void vp8_super_slow_yquant_optimization(MACROBLOCK *x, int type, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK  *b = &x->block[0];
-    BLOCKD *bd = &x->e_mbd.block[0];
-    short *dequant_ptr = &bd->dequant[0][0];
-    struct
-    {
-        int block;
-        int pos;
-    } nzpos[256];
-    short saved_qcoefs[256];
-    short saved_dqcoefs[256];
-    short *coef_ptr   = x->coeff;
-    short *qcoef_ptr  = x->e_mbd.qcoeff;
-    short *dqcoef_ptr = x->e_mbd.dqcoeff;
-
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int i, k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    //this code has assumption in macroblock coeff buffer layout
-    for (i = 0; i < 16; i++)
-    {
-        // count potential coefficient to be optimized
-        for (k = !type; k < 16; k++)
-        {
-            int qcoef = abs(qcoef_ptr[i*16 + k]);
-            int coef = abs(coef_ptr[i*16 + k]);
-            int dq   = dequant_ptr[k];
-
-            if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
-            {
-                nzpos[nzcoefcount].block = i;
-                nzpos[nzcoefcount].pos   = k;
-                nzcoefcount++;
-            }
-        }
-    }
-
-    // if nothing here, do nothing for this macro_block.
-    if (!nzcoefcount || nzcoefcount > 15)
-    {
-        return;
-    }
-
-    /******************************************************************************
-    looking from each coeffient's perspective, each identifed coefficent above could
-    have 2 values:roundeddown(x) and roundedup(x). Therefore the total number of
-    different states is less than 2**nzcoefcount.
-    ******************************************************************************/
-    // save the qunatized coefficents and dequantized coefficicents
-    vpx_memcpy(saved_qcoefs, x->e_mbd.qcoeff,  256);
-    vpx_memcpy(saved_dqcoefs, x->e_mbd.dqcoeff, 256);
-
-    baserate    = mbycost_coeffs(x);
-    baseerror   = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);
-    baserd      = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(x->e_mbd.qcoeff,  saved_qcoefs, 256);
-        vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int bk  = nzpos[k].block;
-            int pos = nzpos[k].pos;
-            int mbkpos  = bk * 16 + pos;
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
-                if (cur_qcoef < 0)
-                {
-                    x->e_mbd.qcoeff[mbkpos]++;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    x->e_mbd.qcoeff[mbkpos]--;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        for (i = 0; i < 16; i++)
-        {
-            BLOCKD *bd = &x->e_mbd.block[i];
-            {
-                int eob = -1;
-                int rc;
-                int l;
-
-                for (l = 0; l < 16; l++)
-                {
-                    rc   = vp8_default_zig_zag1d[l];
-
-                    if (bd->qcoeff[rc])
-                        eob = l;
-                }
-
-                bd->eob = eob + 1;
-            }
-        }
-
-        rate  = mbycost_coeffs(x);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(x->e_mbd.qcoeff,  saved_qcoefs, 256);
-    vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int bk  = nzpos[k].block;
-            int pos = nzpos[k].pos;
-            int mbkpos  = bk * 16 + pos;
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
-                if (cur_qcoef < 0)
-                {
-                    x->e_mbd.qcoeff[mbkpos]++;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    x->e_mbd.qcoeff[mbkpos]--;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < 16; i++)
-    {
-        BLOCKD *bd = &x->e_mbd.block[i];
-        {
-            int eob = -1;
-            int rc;
-            int l;
-
-            for (l = 0; l < 16; l++)
-            {
-                rc   = vp8_default_zig_zag1d[l];
-
-                if (bd->qcoeff[rc])
-                    eob = l;
-            }
-
-            bd->eob = eob + 1;
-        }
-    }
-
-    return;
-}
-
 static void vp8_find_mb_skip_coef(MACROBLOCK *x)
 {
     int i;
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
 
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
     {
         for (i = 0; i < 16; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
         }
 
         for (i = 16; i < 25; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
         }
     }
     else
     {
         for (i = 0; i < 24; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
         }
     }
 }
 
 
-void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
-{
-    int cost = 0;
-    int b;
-    TEMP_CONTEXT t, t2;
-    int type = 0;
-
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
-
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
-
-    vp8_super_slow_yquant_optimization(x, type, rtcd);
-    /*
-    for(b=0;b<16;b++)
-    {
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-    }
-    */
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-
-    for (b = 16; b < 20; b++)
-    {
-        vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
-    }
-
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
-
-    for (b = 20; b < 24; b++)
-    {
-        vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
-    }
-}
-
-
 void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
-    int cost = 0;
     int b;
-    TEMP_CONTEXT t;
-    int type = 0;
+    int type;
+    int has_2nd_order;
 
-    if (!x->e_mbd.above_context[Y1CONTEXT])
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    if (!x->e_mbd.above_context)
         return;
 
-    if (!x->e_mbd.left_context[Y1CONTEXT])
+    if (!x->e_mbd.left_context)
         return;
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
+    has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
+    type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
     {
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, type,
+        ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
+    /*
+    if (has_2nd_order)
+    {
+        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
+            x->e_mbd.left_context[Y2CONTEXT], 1);
+        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+    }
+    */
 }
 
 void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
-    int cost = 0;
     int b;
-    TEMP_CONTEXT t, t2;
-    int type = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    if (!x->e_mbd.above_context[UCONTEXT])
+    if (!x->e_mbd.above_context)
         return;
 
-    if (!x->e_mbd.left_context[UCONTEXT])
+    if (!x->e_mbd.left_context)
         return;
 
-    if (!x->e_mbd.above_context[VCONTEXT])
-        return;
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 
-    if (!x->e_mbd.left_context[VCONTEXT])
-        return;
-
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     for (b = 16; b < 20; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-                       t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
-
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
     for (b = 20; b < 24; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-                       t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
 }
@@ -1067,15 +650,11 @@
     vp8_quantize_mb(x);
 
 #if !(CONFIG_REALTIME_ONLY)
-#if 1
-
     if (x->optimize && x->rddiv > 1)
     {
         vp8_optimize_mb(x, rtcd);
         vp8_find_mb_skip_coef(x);
     }
-
-#endif
 #endif
 
     vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
@@ -1122,8 +701,8 @@
     vp8_build_inter_predictors_mbuv(&x->e_mbd);
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 
-    vp8_transform_mbuvrd(x);
+    vp8_transform_mbuv(x);
 
-    vp8_quantize_mbuvrd(x);
+    vp8_quantize_mbuv(x);
 
 }
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 91ca8f5..08f75c3 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -99,9 +100,7 @@
 void vp8_build_dcblock(MACROBLOCK *b);
 void vp8_transform_mb(MACROBLOCK *mb);
 void vp8_transform_mbuv(MACROBLOCK *x);
-void vp8_transform_mbuvrd(MACROBLOCK *x);
 void vp8_transform_intra_mby(MACROBLOCK *x);
-void vp8_transform_intra_mbyrd(MACROBLOCK *x);
 void Encode16x16Y(MACROBLOCK *x);
 void Encode16x16UV(MACROBLOCK *x);
 void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index f287edc..cce7530 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -252,7 +253,7 @@
     vp8_writer *const w,
     struct mv_context *cur_mvc,
     const struct mv_context *default_mvc_,
-    const struct mv_context *update_mvc,       
+    const struct mv_context *update_mvc,
     const unsigned int events [MVvals],
     unsigned int rc,
     int *updated
@@ -282,8 +283,6 @@
 
     //j=0
     {
-        int j = 0;
-
         const int c = events [mv_max];
 
         is_short_ct [0] += c;     // Short vector
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 1c1f450..e4481bf 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index a0b50d2..962e741 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -27,7 +28,7 @@
     int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
     VP8_COMP *cpi   = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
     MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
-    ENTROPY_CONTEXT mb_row_left_context[4][4];
+    ENTROPY_CONTEXT_PLANES mb_row_left_context;
 
     //printf("Started thread %d\n", ithread);
 
@@ -55,8 +56,10 @@
                     int i;
                     int recon_yoffset, recon_uvoffset;
                     int mb_col;
-                    int recon_y_stride = cm->last_frame.y_stride;
-                    int recon_uv_stride = cm->last_frame.uv_stride;
+                    int ref_fb_idx = cm->lst_fb_idx;
+                    int dst_fb_idx = cm->new_fb_idx;
+                    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+                    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
                     volatile int *last_row_current_mb_col;
 
                     if (ithread > 0)
@@ -65,11 +68,8 @@
                         last_row_current_mb_col = &cpi->current_mb_col_main;
 
                     // reset above block coeffs
-                    xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
-                    xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
-                    xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
-                    xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
-                    xd->left_context = mb_row_left_context;
+                    xd->above_context = cm->above_context;
+                    xd->left_context = &mb_row_left_context;
 
                     vp8_zero(mb_row_left_context);
 
@@ -106,9 +106,9 @@
                         x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
                         x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
 
-                        xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-                        xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-                        xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+                        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
                         xd->left_available = (mb_col != 0);
 
                         // Is segmentation enabled
@@ -117,14 +117,14 @@
                         {
                             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
                             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
-                                xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+                                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
                             else
-                                xd->mbmi.segment_id = 0;
+                                xd->mode_info_context->mbmi.segment_id = 0;
 
                             vp8cx_mb_init_quantizer(cpi, x);
                         }
                         else
-                            xd->mbmi.segment_id = 0;         // Set to Segment 0 by default
+                            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 
 
                         if (cm->frame_type == KEY_FRAME)
@@ -147,24 +147,21 @@
 
                                 for (b = 0; b < xd->mbmi.partition_count; b++)
                                 {
-                                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                                    inter_b_modes[x->partition->bmi[b].mode] ++;
                                 }
                             }
 
 #endif
 
                             // Count of last ref frame 0,0 useage
-                            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+                            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                                 cpi->inter_zz_count ++;
 
                         }
 
                         cpi->tplist[mb_row].stop = *tp;
 
-                        xd->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
-
-                        // store macroblock mode info into context array
-                        vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
+                        x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
                         for (i = 0; i < 16; i++)
                             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
@@ -178,15 +175,13 @@
                         recon_uvoffset += 8;
 
                         // Keep track of segment useage
-                        segment_counts[xd->mbmi.segment_id] ++;
+                        segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 
                         // skip to next mb
                         xd->mode_info_context++;
+                        x->partition_info++;
 
-                        xd->above_context[Y1CONTEXT] += 4;
-                        xd->above_context[UCONTEXT ] += 2;
-                        xd->above_context[VCONTEXT ] += 2;
-                        xd->above_context[Y2CONTEXT] ++;
+                        xd->above_context++;
 
                         cpi->mb_row_ei[ithread].current_mb_col = mb_col;
 
@@ -194,19 +189,21 @@
 
                     //extend the recon for intra prediction
                     vp8_extend_mb_row(
-                        &cm->new_frame,
+                        &cm->yv12_fb[dst_fb_idx],
                         xd->dst.y_buffer + 16,
                         xd->dst.u_buffer + 8,
                         xd->dst.v_buffer + 8);
 
                     // this is to account for the border
                     xd->mode_info_context++;
+                    x->partition_info++;
 
                     x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
                     x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
                     x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                     xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                    x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
 
                     if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1)
                     {
@@ -256,13 +253,8 @@
 
     z->vp8_short_fdct4x4     = x->vp8_short_fdct4x4;
     z->vp8_short_fdct8x4     = x->vp8_short_fdct8x4;
-    z->short_fdct4x4rd   = x->short_fdct4x4rd;
-    z->short_fdct8x4rd   = x->short_fdct8x4rd;
-    z->short_fdct8x4rd   = x->short_fdct8x4rd;
-    z->vp8_short_fdct4x4_ptr = x->vp8_short_fdct4x4_ptr;
     z->short_walsh4x4    = x->short_walsh4x4;
     z->quantize_b        = x->quantize_b;
-    z->quantize_brd      = x->quantize_brd;
 
     /*
     z->mvc              = x->mvc;
@@ -290,6 +282,7 @@
     for (i = 0; i < 25; i++)
     {
         z->block[i].quant           = x->block[i].quant;
+        z->block[i].quant_shift     = x->block[i].quant_shift;
         z->block[i].zbin            = x->block[i].zbin;
         z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
         z->block[i].round           = x->block[i].round;
@@ -334,11 +327,6 @@
         zd->mb_segement_abs_delta      = xd->mb_segement_abs_delta;
         vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        /*
-        memcpy(zd->above_context,        xd->above_context, sizeof(xd->above_context));
-        memcpy(zd->mb_segment_tree_probs,  xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs));
-        memcpy(zd->segment_feature_data,  xd->segment_feature_data, sizeof(xd->segment_feature_data));
-        */
         for (i = 0; i < 25; i++)
         {
             zd->block[i].dequant = xd->block[i].dequant;
@@ -372,14 +360,15 @@
 #if CONFIG_RUNTIME_CPU_DETECT
         mbd->rtcd                   = xd->rtcd;
 #endif
-        mbd->gf_active_ptr            = xd->gf_active_ptr;
+        mb->gf_active_ptr            = x->gf_active_ptr;
 
         mb->vector_range             = 32;
 
         vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
         mbr_ei[i].totalrate = 0;
 
-        mbd->mode_info        = cm->mi - 1;
+        mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
+
         mbd->mode_info_context = cm->mi   + x->e_mbd.mode_info_stride * (i + 1);
         mbd->mode_info_stride  = cm->mode_info_stride;
 
@@ -389,8 +378,8 @@
         mbd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 
         mb->src = * cpi->Source;
-        mbd->pre = cm->last_frame;
-        mbd->dst = cm->new_frame;
+        mbd->pre = cm->yv12_fb[cm->lst_fb_idx];
+        mbd->dst = cm->yv12_fb[cm->new_fb_idx];
 
         mb->src.y_buffer += 16 * x->src.y_stride * (i + 1);
         mb->src.u_buffer +=  8 * x->src.uv_stride * (i + 1);
@@ -406,10 +395,7 @@
         mb->rddiv = cpi->RDDIV;
         mb->rdmult = cpi->RDMULT;
 
-        mbd->mbmi.mode = DC_PRED;
-        mbd->mbmi.uv_mode = DC_PRED;
-
-        mbd->left_context = cm->left_context;
+        mbd->left_context = &cm->left_context;
         mb->mvc = cm->fc.mvc;
 
         setup_mbby_copy(&mbr_ei[i].mb, x);
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index c519080..a65bce6 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -77,9 +78,9 @@
 
     if (use_dc_pred)
     {
-        x->e_mbd.mbmi.mode = DC_PRED;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
-        x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+        x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
     }
@@ -175,7 +176,6 @@
 double vp8_simple_weight(YV12_BUFFER_CONFIG *source)
 {
     int i, j;
-    int Total = 0;
 
     unsigned char *src = source->y_buffer;
     unsigned char value;
@@ -217,7 +217,7 @@
     // If we are running below the optimal level then we need to gradually tighten up on max_bits.
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
     {
-        double buffer_fullness_ratio = (double)DOUBLE_DIVIDE_CHECK(cpi->buffer_level) / (double)cpi->oxcf.optimal_buffer_level;
+        double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
 
         // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
         max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
@@ -536,8 +536,11 @@
 
     int col_blocks = 4 * cm->mb_cols;
     int recon_yoffset, recon_uvoffset;
-    int recon_y_stride = cm->last_frame.y_stride;
-    int recon_uv_stride = cm->last_frame.uv_stride;
+    YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+    YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+    YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+    int recon_y_stride = lst_yv12->y_stride;
+    int recon_uv_stride = lst_yv12->uv_stride;
     int intra_error = 0;
     int coded_error = 0;
 
@@ -559,8 +562,12 @@
     vp8_clear_system_state();  //__asm emms;
 
     x->src = * cpi->Source;
-    xd->pre = cm->last_frame;
-    xd->dst = cm->new_frame;
+    xd->pre = *lst_yv12;
+    xd->dst = *new_yv12;
+
+    x->partition_info = x->pi;
+
+    xd->mode_info_context = cm->mi;
 
     vp8_build_block_offsets(x);
 
@@ -569,7 +576,7 @@
     vp8_setup_block_ptrs(x);
 
     // set up frame new frame for intra coded blocks
-    vp8_setup_intra_recon(&cm->new_frame);
+    vp8_setup_intra_recon(new_yv12);
     vp8cx_frame_init_quantizer(cpi);
 
     // Initialise the MV cost table to the defaults
@@ -599,9 +606,9 @@
             int gf_motion_error = INT_MAX;
             int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 
-            xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-            xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-            xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+            xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+            xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
             xd->left_available = (mb_col != 0);
 
             // do intra 16x16 prediction
@@ -635,18 +642,22 @@
                 int motion_error = INT_MAX;
 
                 // Simple 0,0 motion with no mv overhead
-                vp8_zz_motion_search( cpi, x, &cm->last_frame, &motion_error, recon_yoffset );
+                vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
                 d->bmi.mv.as_mv.row = 0;
                 d->bmi.mv.as_mv.col = 0;
 
-                // Test last reference frame using the previous best mv as the starting point (best reference) for the search
-                vp8_first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, &cm->last_frame, &motion_error, recon_yoffset);
+                // Test last reference frame using the previous best mv as the
+                // starting point (best reference) for the search
+                vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
+                                        &d->bmi.mv.as_mv, lst_yv12,
+                                        &motion_error, recon_yoffset);
 
                 // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
                 if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0))
                 {
                    tmp_err = INT_MAX;
-                   vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->last_frame, &motion_error, recon_yoffset);
+                   vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
+                                     lst_yv12, &tmp_err, recon_yoffset);
 
                    if ( tmp_err < motion_error )
                    {
@@ -660,7 +671,7 @@
                 // Experimental search in a second reference frame ((0,0) based only)
                 if (cm->current_video_frame > 1)
                 {
-                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->golden_frame, &gf_motion_error, recon_yoffset);
+                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
 
                     if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
                     {
@@ -678,9 +689,9 @@
 
 
                     // Reset to last frame as reference buffer
-                    xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset;
-                    xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset;
-                    xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset;
+                    xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+                    xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+                    xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
                 }
 
                 if (motion_error <= this_error)
@@ -772,14 +783,13 @@
         x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 
         //extend the recon for intra prediction
-        vp8_extend_mb_row(&cm->new_frame, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+        vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
         vp8_clear_system_state();  //__asm emms;
     }
 
     vp8_clear_system_state();  //__asm emms;
     {
         double weight = 0.0;
-        double weigth2 = 0.0;
 
         FIRSTPASS_STATS fps;
 
@@ -839,17 +849,17 @@
         (cpi->this_frame_stats.pcnt_inter > 0.20) &&
         ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0))
     {
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
 
     // swap frame pointers so last frame refers to the frame we just compressed
-    vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-    vp8_yv12_extend_frame_borders(&cm->last_frame);
+    vp8_swap_yv12_buffer(lst_yv12, new_yv12);
+    vp8_yv12_extend_frame_borders(lst_yv12);
 
     // Special case for the first frame. Copy into the GF buffer as a second reference.
     if (cm->current_video_frame == 0)
     {
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
 
 
@@ -865,7 +875,7 @@
         else
             recon_file = fopen(filename, "ab");
 
-        fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+        fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
         fclose(recon_file);
     }
 
@@ -1193,8 +1203,9 @@
     FIRSTPASS_STATS next_frame;
     FIRSTPASS_STATS *start_pos;
     int i;
-    int count = 0;
-    int image_size = cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height;
+    int y_width  = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_width;
+    int y_height = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_height;
+    int image_size = y_width  * y_height;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double gf_group_err = 0.0;
@@ -1234,6 +1245,8 @@
 
     start_pos = cpi->stats_in;
 
+    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
     // Preload the stats for the next frame.
     mod_frame_err = calculate_modified_err(cpi, this_frame);
 
@@ -1390,6 +1403,7 @@
 
     // Should we use the alternate refernce frame
     if (cpi->oxcf.play_alternate &&
+        cpi->oxcf.lag_in_frames &&
         (i >= MIN_GF_INTERVAL) &&
         (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) &&          // dont use ARF very near next kf
         (((next_frame.pcnt_inter > 0.75) &&
@@ -1568,26 +1582,36 @@
         // Calculate the number of bits to be spent on the gf or arf based on the boost number
         cpi->gf_bits = (int)((double)Boost * (cpi->gf_group_bits / (double)allocation_chunks));
 
-        // If the frame that is to be boosted is simpler than the average for the gf/arf group then use an alternative calculation
+        // If the frame that is to be boosted is simpler than the average for
+        // the gf/arf group then use an alternative calculation
         // based on the error score of the frame itself
         if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval)
         {
             double  alt_gf_grp_bits;
             int     alt_gf_bits;
 
-            alt_gf_grp_bits = ((double)cpi->kf_group_bits  * (mod_frame_err * (double)cpi->baseline_gf_interval) / (double)cpi->kf_group_error_left) ;
-            alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks));
+            alt_gf_grp_bits =
+                (double)cpi->kf_group_bits  *
+                (mod_frame_err * (double)cpi->baseline_gf_interval) /
+                DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left);
+
+            alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits /
+                                                 (double)allocation_chunks));
 
             if (cpi->gf_bits > alt_gf_bits)
             {
                 cpi->gf_bits = alt_gf_bits;
             }
         }
-        // Else if it is harder than other frames in the group make sure it at least receives an allocation in keeping with
-        // its relative error score, otherwise it may be worse off than an "un-boosted" frame
+        // Else if it is harder than other frames in the group make sure it at
+        // least receives an allocation in keeping with its relative error
+        // score, otherwise it may be worse off than an "un-boosted" frame
         else
         {
-            int alt_gf_bits = (int)((double)cpi->kf_group_bits * (mod_frame_err / (double)cpi->kf_group_error_left));
+            int alt_gf_bits =
+                (int)((double)cpi->kf_group_bits *
+                      mod_frame_err /
+                      DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left));
 
             if (alt_gf_bits > cpi->gf_bits)
             {
@@ -1871,6 +1895,18 @@
         }
     }
 
+    // Keep a globally available copy of this and the next frame's iiratio.
+    cpi->this_iiratio = this_frame_intra_error /
+                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+    {
+        FIRSTPASS_STATS next_frame;
+        if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
+        {
+            cpi->next_iiratio = next_frame.intra_error /
+                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+        }
+    }
+
     // Set nominal per second bandwidth for this frame
     cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
     if (cpi->target_bandwidth < 0)
@@ -2028,6 +2064,8 @@
     double kf_group_coded_err = 0.0;
     double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
+    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
     vp8_clear_system_state();  //__asm emms;
     start_position = cpi->stats_in;
 
@@ -2044,7 +2082,7 @@
     // Take a copy of the initial frame details
     vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
 
-    cpi->kf_group_bits = 0;       // Estimate of total bits avaialable to kf group
+    cpi->kf_group_bits = 0;        // Total bits avaialable to kf group
     cpi->kf_group_error_left = 0;  // Group modified error score.
 
     kf_mod_err = calculate_modified_err(cpi, this_frame);
@@ -2060,33 +2098,34 @@
         kf_group_intra_err += this_frame->intra_error;
         kf_group_coded_err += this_frame->coded_error;
 
+        // load a the next frame's stats
         vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
+        vp8_input_stats(cpi, this_frame);
 
         // Provided that we are not at the end of the file...
-        if (EOF != vp8_input_stats(cpi, this_frame))
+        if (cpi->oxcf.auto_key
+            && lookup_next_frame_stats(cpi, &next_frame) != EOF)
         {
-            if (lookup_next_frame_stats(cpi, &next_frame) != EOF)
-            {
-                if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
-                    break;
-            }
-        }
+            if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
+                break;
 
-        // Step on to the next frame
-        cpi->frames_to_key ++;
+            // Step on to the next frame
+            cpi->frames_to_key ++;
 
-        // If we don't have a real key frame within the next two
-        // forcekeyframeevery intervals then break out of the loop.
-        if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
-            break;
-
+            // If we don't have a real key frame within the next two
+            // forcekeyframeevery intervals then break out of the loop.
+            if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
+                break;
+        } else
+            cpi->frames_to_key ++;
     }
 
     // If there is a max kf interval set by the user we must obey it.
     // We already breakout of the loop above at 2x max.
     // This code centers the extra kf if the actual natural
     // interval is between 1x and 2x
-    if ( cpi->frames_to_key > (int)cpi->key_frame_frequency )
+    if (cpi->oxcf.auto_key
+        && cpi->frames_to_key > (int)cpi->key_frame_frequency )
     {
         cpi->frames_to_key /= 2;
 
@@ -2111,39 +2150,64 @@
     // Calculate the number of bits that should be assigned to the kf group.
     if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
     {
-        int max_bits = frame_max_bits(cpi);    // Max for a single normal frame (not key frame)
+        // Max for a single normal frame (not key frame)
+        int max_bits = frame_max_bits(cpi);
 
-        // Default allocation based on bits left and relative complexity of the section
-        cpi->kf_group_bits = (int)(cpi->bits_left * (kf_group_err / cpi->modified_total_error_left));
+        // Maximum bits for the kf group
+        long long max_grp_bits;
+
+        // Default allocation based on bits left and relative
+        // complexity of the section
+        cpi->kf_group_bits = (long long)( cpi->bits_left *
+                                          ( kf_group_err /
+                                            cpi->modified_total_error_left ));
 
         // Clip based on maximum per frame rate defined by the user.
-        if (cpi->kf_group_bits > max_bits * cpi->frames_to_key)
-            cpi->kf_group_bits = max_bits * cpi->frames_to_key;
+        max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
+        if (cpi->kf_group_bits > max_grp_bits)
+            cpi->kf_group_bits = max_grp_bits;
 
         // Additional special case for CBR if buffer is getting full.
         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
         {
-            // If the buffer is near or above the optimal and this kf group is not being allocated much
-            // then increase the allocation a bit.
-            if (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level)
+            int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
+            int buffer_lvl = cpi->buffer_level;
+
+            // If the buffer is near or above the optimal and this kf group is
+            // not being allocated much then increase the allocation a bit.
+            if (buffer_lvl >= opt_buffer_lvl)
             {
-                int high_water_mark = (cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1;
-                int min_group_bits;
+                int high_water_mark = (opt_buffer_lvl +
+                                       cpi->oxcf.maximum_buffer_size) >> 1;
+
+                long long av_group_bits;
+
+                // Av bits per frame * number of frames
+                av_group_bits = (long long)cpi->av_per_frame_bandwidth *
+                                (long long)cpi->frames_to_key;
 
                 // We are at or above the maximum.
                 if (cpi->buffer_level >= high_water_mark)
                 {
-                    min_group_bits = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) + (cpi->buffer_level - high_water_mark);
+                    long long min_group_bits;
+
+                    min_group_bits = av_group_bits +
+                                     (long long)(buffer_lvl -
+                                                 high_water_mark);
 
                     if (cpi->kf_group_bits < min_group_bits)
                         cpi->kf_group_bits = min_group_bits;
                 }
                 // We are above optimal but below the maximum
-                else if (cpi->kf_group_bits < (cpi->av_per_frame_bandwidth * cpi->frames_to_key))
+                else if (cpi->kf_group_bits < av_group_bits)
                 {
-                    int bits_below_av = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) - cpi->kf_group_bits;
-                    cpi->kf_group_bits += (int)((double)bits_below_av * (double)(cpi->buffer_level - cpi->oxcf.optimal_buffer_level) /
-                                                (double)(high_water_mark - cpi->oxcf.optimal_buffer_level));
+                    long long bits_below_av = av_group_bits -
+                                              cpi->kf_group_bits;
+
+                    cpi->kf_group_bits +=
+                       (long long)((double)bits_below_av *
+                                   (double)(buffer_lvl - opt_buffer_lvl) /
+                                   (double)(high_water_mark - opt_buffer_lvl));
                 }
             }
         }
@@ -2269,7 +2333,7 @@
         int allocation_chunks;
         int Counter = cpi->frames_to_key;
         int alt_kf_bits;
-
+        YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
         // Min boost based on kf interval
 #if 0
 
@@ -2289,10 +2353,10 @@
         }
 
         // bigger frame sizes need larger kf boosts, smaller frames smaller boosts...
-        if ((cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) > (320 * 240))
-            kf_boost += 2 * (cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) / (320 * 240);
-        else if ((cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) < (320 * 240))
-            kf_boost -= 4 * (320 * 240) / (cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height);
+        if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240))
+            kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240);
+        else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240))
+            kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height);
 
         kf_boost = (int)((double)kf_boost * 100.0) >> 4;                          // Scale 16 to 100
 
@@ -2328,23 +2392,34 @@
                 cpi->kf_bits = (3 * cpi->buffer_level) >> 2;
         }
 
-        // If the key frame is actually easier than the average for the kf group (which does sometimes happen... eg a blank intro frame)
-        // Then use an alternate calculation based on the kf error score which should give a smaller key frame.
+        // If the key frame is actually easier than the average for the
+        // kf group (which does sometimes happen... eg a blank intro frame)
+        // Then use an alternate calculation based on the kf error score
+        // which should give a smaller key frame.
         if (kf_mod_err < kf_group_err / cpi->frames_to_key)
         {
-            double  alt_kf_grp_bits = ((double)cpi->bits_left * (kf_mod_err * (double)cpi->frames_to_key) / cpi->modified_total_error_left) ;
+            double  alt_kf_grp_bits =
+                        ((double)cpi->bits_left *
+                         (kf_mod_err * (double)cpi->frames_to_key) /
+                         DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
 
-            alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks));
+            alt_kf_bits = (int)((double)kf_boost *
+                                (alt_kf_grp_bits / (double)allocation_chunks));
 
             if (cpi->kf_bits > alt_kf_bits)
             {
                 cpi->kf_bits = alt_kf_bits;
             }
         }
-        // Else if it is much harder than other frames in the group make sure it at least receives an allocation in keeping with its relative error score
+        // Else if it is much harder than other frames in the group make sure
+        // it at least receives an allocation in keeping with its relative
+        // error score
         else
         {
-            alt_kf_bits = (int)((double)cpi->bits_left * (kf_mod_err / cpi->modified_total_error_left));
+            alt_kf_bits =
+                (int)((double)cpi->bits_left *
+                      (kf_mod_err /
+                       DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
 
             if (alt_kf_bits > cpi->kf_bits)
             {
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index d7b52f3..c7f3e0e 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 52aab66..1acb73d 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -67,8 +68,8 @@
 
     cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
     cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
+    cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
+    cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
     cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
 
     cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index d80059d..b89354e 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -996,7 +997,7 @@
     int tot_steps;
     MV this_mv;
 
-    unsigned int bestsad = UINT_MAX;
+    int bestsad = INT_MAX;
     int best_site = 0;
     int last_site = 0;
 
@@ -1137,7 +1138,6 @@
 }
 
 
-#if !(CONFIG_REALTIME_ONLY)
 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
 {
     unsigned char *what = (*(b->base_src) + b->src);
@@ -1238,7 +1238,7 @@
     unsigned char *bestaddress;
     MV *best_mv = &d->bmi.mv.as_mv;
     MV this_mv;
-    unsigned int bestsad = UINT_MAX;
+    int bestsad = INT_MAX;
     int r, c;
 
     unsigned char *check_here;
@@ -1350,7 +1350,6 @@
     else
         return INT_MAX;
 }
-#endif
 
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 921206f..7cc9242 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index 73170cf..d23c97e 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index 5ade265..99ef119 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7662720..56bac0a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -20,7 +21,7 @@
 #include "extend.h"
 #include "ratectrl.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
 #include "g_common.h"
 #include "vpx_scale/yv12extend.h"
 #include "postproc.h"
@@ -28,6 +29,8 @@
 #include "swapyv12buffer.h"
 #include "threading.h"
 #include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
 #include <math.h>
 #include <stdio.h>
 #include <limits.h>
@@ -136,8 +139,6 @@
 
 extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
 extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
 
 extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
 
@@ -225,6 +226,17 @@
     vpx_free(cpi->tok);
     cpi->tok = 0;
 
+    // Structure used to minitor GF useage
+    if (cpi->gf_active_flags != 0)
+        vpx_free(cpi->gf_active_flags);
+
+    cpi->gf_active_flags = 0;
+
+    if(cpi->mb.pip)
+        vpx_free(cpi->mb.pip);
+
+    cpi->mb.pip = 0;
+
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -281,7 +293,6 @@
 
     unsigned char *seg_map;
     signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-    int i, j;
 
     // Create a temporary map for segmentation data.
     CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
@@ -545,7 +556,8 @@
             sf->thresh_mult[THR_NEWG     ] = INT_MAX;
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
@@ -597,7 +609,8 @@
             sf->thresh_mult[THR_NEARMV   ] = INT_MAX;
             sf->thresh_mult[THR_SPLITMV  ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
         {
             sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
             sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
@@ -605,7 +618,8 @@
             sf->thresh_mult[THR_NEWG     ] = INT_MAX;
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
@@ -763,7 +777,7 @@
                 cpi->mode_check_freq[THR_NEWA] = 4;
             }
 
-            if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_GOLD_FLAG)
+            if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
             {
                 sf->thresh_mult[THR_NEARESTG ] = 2000;
                 sf->thresh_mult[THR_ZEROG    ] = 2000;
@@ -771,7 +785,7 @@
                 sf->thresh_mult[THR_NEWG     ] = 4000;
             }
 
-            if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_ALT_FLAG)
+            if (cpi->ref_frame_flags & VP8_ALT_FLAG)
             {
                 sf->thresh_mult[THR_NEARESTA ] = 2000;
                 sf->thresh_mult[THR_ZEROA    ] = 2000;
@@ -811,7 +825,7 @@
         sf->full_freq[1] = 31;
         sf->search_method = NSTEP;
 
-        if (!cpi->ref_frame_flags & VP8_LAST_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
         {
             sf->thresh_mult[THR_NEWMV    ] = INT_MAX;
             sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
@@ -820,7 +834,7 @@
             sf->thresh_mult[THR_SPLITMV  ] = INT_MAX;
         }
 
-        if (!cpi->ref_frame_flags & VP8_GOLD_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
         {
             sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
             sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
@@ -829,7 +843,7 @@
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
 
-        if (!cpi->ref_frame_flags & VP8_ALT_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
@@ -1122,40 +1136,33 @@
 
     if (cpi->sf.search_method == NSTEP)
     {
-        vp8_init3smotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+        vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
     }
     else if (cpi->sf.search_method == DIAMOND)
     {
-        vp8_init_dsmotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+        vp8_init_dsmotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
     }
 
     if (cpi->sf.improved_dct)
     {
         cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
         cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
-        cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
-        cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
     }
     else
     {
         cpi->mb.vp8_short_fdct8x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
         cpi->mb.vp8_short_fdct4x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
-        cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
-        cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
     }
 
-    cpi->mb.vp8_short_fdct4x4_ptr = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
     cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
 
     if (cpi->sf.improved_quant)
     {
         cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
-        cpi->mb.quantize_brd  = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
     }
     else
     {
         cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
-        cpi->mb.quantize_brd    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
     }
 
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -1221,6 +1228,20 @@
 
     cpi->source_buffer_count = 0;
 }
+
+static int vp8_alloc_partition_data(VP8_COMP *cpi)
+{
+    cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
+                                (cpi->common.mb_rows + 1),
+                                sizeof(PARTITION_INFO));
+    if(!cpi->mb.pip)
+        return ALLOC_FAILURE;
+
+    cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1;
+
+    return 0;
+}
+
 void vp8_alloc_compressor_data(VP8_COMP *cpi)
 {
     VP8_COMMON *cm = & cpi->common;
@@ -1232,6 +1253,11 @@
         vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate frame buffers");
 
+    if (vp8_alloc_partition_data(cpi))
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate partition data");
+
+
     if ((width & 0xf) != 0)
         width += 16 - (width & 0xf);
 
@@ -1262,6 +1288,15 @@
     cpi->inter_zz_count = 0;
     cpi->gf_bad_count = 0;
     cpi->gf_update_recommended = 0;
+
+
+    // Structures used to minitor GF usage
+    if (cpi->gf_active_flags != 0)
+        vpx_free(cpi->gf_active_flags);
+
+    CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+
+    cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 }
 
 
@@ -1309,14 +1344,26 @@
         cpi->max_gf_interval = 12;
 
 
-    // Special conditions when altr ref frame enabled
-    if (cpi->oxcf.play_alternate)
+    // Special conditions when altr ref frame enabled in lagged compress mode
+    if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
     {
         if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
             cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;
     }
 }
 
+
+static int
+rescale(int val, int num, int denom)
+{
+    int64_t llnum = num;
+    int64_t llden = denom;
+    int64_t llval = val;
+
+    return llval * llnum / llden;
+}
+
+
 void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 {
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
@@ -1344,9 +1391,9 @@
         cpi->oxcf.worst_allowed_q           = MAXQ;
 
         cpi->oxcf.end_usage                = USAGE_STREAM_FROM_SERVER;
-        cpi->oxcf.starting_buffer_level     =   4;
-        cpi->oxcf.optimal_buffer_level      =   5;
-        cpi->oxcf.maximum_buffer_size       =   6;
+        cpi->oxcf.starting_buffer_level     =   4000;
+        cpi->oxcf.optimal_buffer_level      =   5000;
+        cpi->oxcf.maximum_buffer_size       =   6000;
         cpi->oxcf.under_shoot_pct           =  90;
         cpi->oxcf.allow_df                 =   0;
         cpi->oxcf.drop_frames_water_mark     =  20;
@@ -1495,26 +1542,32 @@
     // local file playback mode == really big buffer
     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
     {
-        cpi->oxcf.starting_buffer_level   = 60;
-        cpi->oxcf.optimal_buffer_level    = 60;
-        cpi->oxcf.maximum_buffer_size     = 240;
+        cpi->oxcf.starting_buffer_level   = 60000;
+        cpi->oxcf.optimal_buffer_level    = 60000;
+        cpi->oxcf.maximum_buffer_size     = 240000;
 
     }
 
 
     // Convert target bandwidth from Kbit/s to Bit/s
     cpi->oxcf.target_bandwidth       *= 1000;
-    cpi->oxcf.starting_buffer_level   *= cpi->oxcf.target_bandwidth;
+    cpi->oxcf.starting_buffer_level =
+        rescale(cpi->oxcf.starting_buffer_level,
+                cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.optimal_buffer_level == 0)
         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.optimal_buffer_level =
+            rescale(cpi->oxcf.optimal_buffer_level,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.maximum_buffer_size == 0)
         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.maximum_buffer_size     *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.maximum_buffer_size =
+            rescale(cpi->oxcf.maximum_buffer_size,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     cpi->buffer_level                = cpi->oxcf.starting_buffer_level;
     cpi->bits_off_target              = cpi->oxcf.starting_buffer_level;
@@ -1570,9 +1623,9 @@
         cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
     }
 
-    if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
-        ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
-        cm->last_frame.y_width == 0)
+    if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+        ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+        cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
     {
         alloc_raw_frame_buffers(cpi);
         vp8_alloc_compressor_data(cpi);
@@ -1599,13 +1652,6 @@
     else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
         cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
 
-    // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
-    if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
-    {
-        cpi->oxcf.play_alternate = 0;
-        cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
-    }
-
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;
@@ -1774,26 +1820,32 @@
     // local file playback mode == really big buffer
     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
     {
-        cpi->oxcf.starting_buffer_level   = 60;
-        cpi->oxcf.optimal_buffer_level    = 60;
-        cpi->oxcf.maximum_buffer_size     = 240;
+        cpi->oxcf.starting_buffer_level   = 60000;
+        cpi->oxcf.optimal_buffer_level    = 60000;
+        cpi->oxcf.maximum_buffer_size     = 240000;
 
     }
 
     // Convert target bandwidth from Kbit/s to Bit/s
     cpi->oxcf.target_bandwidth       *= 1000;
 
-    cpi->oxcf.starting_buffer_level   *= cpi->oxcf.target_bandwidth;
+    cpi->oxcf.starting_buffer_level =
+        rescale(cpi->oxcf.starting_buffer_level,
+                cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.optimal_buffer_level == 0)
         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.optimal_buffer_level =
+            rescale(cpi->oxcf.optimal_buffer_level,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.maximum_buffer_size == 0)
         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.maximum_buffer_size     *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.maximum_buffer_size =
+            rescale(cpi->oxcf.maximum_buffer_size,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     cpi->buffer_level                = cpi->oxcf.starting_buffer_level;
     cpi->bits_off_target              = cpi->oxcf.starting_buffer_level;
@@ -1849,9 +1901,9 @@
         cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
     }
 
-    if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
-        ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
-        cm->last_frame.y_width == 0)
+    if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+        ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+        cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
     {
         alloc_raw_frame_buffers(cpi);
         vp8_alloc_compressor_data(cpi);
@@ -1878,13 +1930,6 @@
     else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
         cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
 
-    // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
-    if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
-    {
-        cpi->oxcf.play_alternate = 0;
-        cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
-    }
-
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;
@@ -1925,7 +1970,7 @@
         VP8_COMP *cpi;
         VP8_PTR   ptr;
     } ctx;
-    
+
     VP8_COMP *cpi;
     VP8_COMMON *cm;
 
@@ -2247,12 +2292,13 @@
 
             if (cpi->b_calculate_psnr)
             {
-                double samples = 3.0 / 2 * cpi->count * cpi->common.last_frame.y_width * cpi->common.last_frame.y_height;
+                YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+                double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
                 double total_psnr = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error);
                 double total_psnr2 = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error2);
                 double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
 
-                fprintf(f, "Bitrate\AVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t  Time(us)\n");
+                fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\tVPXSSIM\t  Time(us)\n");
                 fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f %8.0f\n",
                         dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim,
                         total_encode_time);
@@ -2586,19 +2632,19 @@
 {
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
     return 0;
 }
 int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
@@ -2606,18 +2652,19 @@
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
 
+    int ref_fb_idx;
+
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
     return 0;
 }
 int vp8_update_entropy(VP8_PTR comp, int update)
@@ -2692,8 +2739,8 @@
 #endif
     }
     // we may need to copy to a buffer so we can extend the image...
-    else if (cm->Width != cm->last_frame.y_width ||
-             cm->Height != cm->last_frame.y_height)
+    else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+             cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height)
     {
         //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
 #if HAVE_ARMV7
@@ -2783,24 +2830,18 @@
         cm->frame_type = KEY_FRAME;
 
     }
-    // Auto key frames (Only two pass will enter here)
+    // Special case for forced key frames
+    // The frame sizing here is still far from ideal for 2 pass.
+    else if (cm->frame_flags & FRAMEFLAGS_KEY)
+    {
+        cm->frame_type = KEY_FRAME;
+        resize_key_frame(cpi);
+        vp8_calc_iframe_target_size(cpi);
+    }
     else if (cm->frame_type == KEY_FRAME)
     {
         vp8_calc_auto_iframe_target_size(cpi);
     }
-    // Forced key frames (by interval or an external signal)
-    else if ((cm->frame_flags & FRAMEFLAGS_KEY) ||
-             (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0)))
-    {
-        // Key frame from VFW/auto-keyframe/first frame
-        cm->frame_type = KEY_FRAME;
-
-        resize_key_frame(cpi);
-
-        // Compute target frame size
-        if (cpi->pass != 2)
-            vp8_calc_iframe_target_size(cpi);
-    }
     else
     {
         // INTER frame: compute target frame size
@@ -2846,7 +2887,7 @@
     VP8_COMMON *cm = &cpi->common;
 
     // Update the golden frame buffer
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
 
     // Select an interval before next GF or altref
     if (!cpi->auto_gold)
@@ -2866,8 +2907,8 @@
     }
 
     // Update data structure that monitors level of reference to last GF
-    vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-    cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+    vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+    cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
     // this frame refreshes means next frames don't unless specified by user
 
     cpi->common.frames_since_golden = 0;
@@ -2888,7 +2929,7 @@
     if (cm->refresh_golden_frame)
     {
         // Update the golden frame buffer
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
 
         // Select an interval before next GF
         if (!cpi->auto_gold)
@@ -2914,8 +2955,8 @@
         }
 
         // Update data structure that monitors level of reference to last GF
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+        vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+        cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 
         // this frame refreshes means next frames don't unless specified by user
         cm->refresh_golden_frame = 0;
@@ -3220,10 +3261,31 @@
 }
 #endif
 // return of 0 means drop frame
-
+#define USE_FILTER_LUT 1
 #if VP8_TEMPORAL_ALT_REF
+
+#if USE_FILTER_LUT
+static int modifier_lut[7][19] =
+{
+    // Strength=0
+    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=1
+    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=2
+    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=3
+    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=4
+    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=5
+    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
+    // Strength=6
+    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
+};
+#endif
 static void vp8cx_temp_blur1_c
 (
+    VP8_COMP *cpi,
     unsigned char **frames,
     int frame_count,
     unsigned char *src,
@@ -3237,22 +3299,24 @@
     unsigned char block_size
 )
 {
-    int byte = 0;           // Buffer offset for the current pixel value being filtered
+    int byte = 0;         // Buffer offset for current pixel being filtered
     int frame = 0;
     int modifier = 0;
     int i, j, k;
     int block_ofset;
-    int Cols, Rows;
+    int cols;
     unsigned char Shift = (block_size == 16) ? 4 : 3;
+#if USE_FILTER_LUT
+    int *lut = modifier_lut[strength];
+#endif
 
-    Cols = width / block_size;
-    Rows = height / block_size;
+    cols = cpi->common.mb_cols;
 
     for (i = 0; i < height; i++)
     {
-        block_ofset = (i >> Shift) * Cols;
+        block_ofset = (i >> Shift) * cols;
 
-        for (j = 0; j < Cols; j ++)
+        for (j = 0; j < cols; j ++)
         {
             if (motion_map_ptr[block_ofset] > 2)
             {
@@ -3270,9 +3334,14 @@
                     for (frame = 0; frame < frame_count; frame++)
                     {
                         // get current frame pixel value
-                        int pixel_value = frames[frame][byte];       // int pixel_value = *frameptr;
-
-                        modifier   = src_byte;                       // modifier   = s[byte];
+                        int pixel_value = frames[frame][byte];
+#if USE_FILTER_LUT
+                        // LUT implementation --
+                        // improves precision of filter
+                        modifier = abs(src_byte-pixel_value);
+                        modifier = modifier>18 ? 0 : lut[modifier];
+#else
+                        modifier   = src_byte;
                         modifier  -= pixel_value;
                         modifier  *= modifier;
                         modifier >>= strength;
@@ -3282,17 +3351,17 @@
                             modifier = 16;
 
                         modifier = 16 - modifier;
-
+#endif
                         accumulator += modifier * pixel_value;
 
                         count += modifier;
                     }
 
                     accumulator += (count >> 1);
-                    accumulator *= fixed_divide[count];          // accumulator *= ppi->fixed_divide[count];
+                    accumulator *= fixed_divide[count];
                     accumulator >>= 16;
 
-                    dst[byte] = accumulator;        // d[byte] = accumulator;
+                    dst[byte] = accumulator;
 
                     // move to next pixel
                     byte++;
@@ -3398,7 +3467,8 @@
         {
             if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames)
             {
-                frames_to_blur_backward = max_frames - frames_to_blur_forward - 1;
+                frames_to_blur_backward
+                    = max_frames - frames_to_blur_forward - 1;
             }
         }
         else
@@ -3441,6 +3511,7 @@
 
     // Blur Y
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->y_buffer,  // cpi->Source->y_buffer,
@@ -3455,7 +3526,7 @@
 
     for (frame = 0; frame < frames_to_blur; frame++)
     {
-        int which_buffer =  cpi->last_alt_ref_sei - frame;
+        int which_buffer =  start_frame - frame;
 
         if (which_buffer < 0)
             which_buffer += cpi->oxcf.lag_in_frames;
@@ -3465,6 +3536,7 @@
 
     // Blur U
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->u_buffer,
@@ -3479,7 +3551,7 @@
 
     for (frame = 0; frame < frames_to_blur; frame++)
     {
-        int which_buffer =  cpi->last_alt_ref_sei - frame;
+        int which_buffer =  start_frame - frame;
 
         if (which_buffer < 0)
             which_buffer += cpi->oxcf.lag_in_frames;
@@ -3489,6 +3561,7 @@
 
     // Blur V
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->v_buffer,
@@ -4277,7 +4350,7 @@
 
     // Update the GF useage maps.
     // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
-    vp8_update_gf_useage_maps(cm, &cpi->mb.e_mbd);
+    vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
 
     if (cm->frame_type == KEY_FRAME)
         cm->refresh_last_frame = 1;
@@ -4285,7 +4358,7 @@
     if (0)
     {
         FILE *f = fopen("gfactive.stt", "a");
-        fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+        fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
         fclose(f);
     }
 
@@ -4298,11 +4371,11 @@
 
     if (cm->refresh_last_frame)
     {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-        cm->frame_to_show = &cm->last_frame;
+        vp8_swap_yv12_buffer(&cm->yv12_fb[cm->lst_fb_idx], &cm->yv12_fb[cm->new_fb_idx]);
+        cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
     }
     else
-        cm->frame_to_show = &cm->new_frame;
+        cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
 
 
 
@@ -4352,43 +4425,48 @@
         }
     }
 
-
-    // At this point the new frame has been encoded coded.
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->frame_type == KEY_FRAME)
     {
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-    }
-    else    // For non key frames
-    {
-        // Code to copy between reference buffers
-        if (cm->copy_buffer_to_arf)
+        YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+        YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+        YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+        YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
+        // At this point the new frame has been encoded coded.
+        // If any buffer copy / swaping is signalled it should be done here.
+        if (cm->frame_type == KEY_FRAME)
         {
-            if (cm->copy_buffer_to_arf == 1)
-            {
-                if (cm->refresh_last_frame)
-                    // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
-                    vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-                else
-                    vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-            }
-            else if (cm->copy_buffer_to_arf == 2)
-                vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
         }
-
-        if (cm->copy_buffer_to_gf)
+        else    // For non key frames
         {
-            if (cm->copy_buffer_to_gf == 1)
+            // Code to copy between reference buffers
+            if (cm->copy_buffer_to_arf)
             {
-                if (cm->refresh_last_frame)
-                    // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
-                    vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-                else
-                    vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+                if (cm->copy_buffer_to_arf == 1)
+                {
+                    if (cm->refresh_last_frame)
+                        // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+                        vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
+                    else
+                        vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
+                }
+                else if (cm->copy_buffer_to_arf == 2)
+                    vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
             }
-            else if (cm->copy_buffer_to_gf == 2)
-                vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
+
+            if (cm->copy_buffer_to_gf)
+            {
+                if (cm->copy_buffer_to_gf == 1)
+                {
+                    if (cm->refresh_last_frame)
+                        // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+                        vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
+                    else
+                        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
+                }
+                else if (cm->copy_buffer_to_gf == 2)
+                    vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
+            }
         }
     }
 
@@ -4591,23 +4669,23 @@
     cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
 
     if (cpi->gold_is_last)
-        cpi->ref_frame_flags &= !VP8_GOLD_FLAG;
+        cpi->ref_frame_flags &= ~VP8_GOLD_FLAG;
 
     if (cpi->alt_is_last)
-        cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
 
     if (cpi->gold_is_alt)
-        cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
 
 
     if (cpi->oxcf.error_resilient_mode)
     {
         // Is this an alternate reference update
         if (cpi->common.refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
 
         if (cpi->common.refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
     }
     else
     {
@@ -4659,7 +4737,8 @@
         FILE *recon_file;
         sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
         recon_file = fopen(filename, "wb");
-        fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+        fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
+               cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
         fclose(recon_file);
     }
 
@@ -4683,7 +4762,7 @@
 void vp8_check_gf_quality(VP8_COMP *cpi)
 {
     VP8_COMMON *cm = &cpi->common;
-    int gf_active_pct = (100 * cm->gf_active_count) / (cm->mb_rows * cm->mb_cols);
+    int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
     int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);
     int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);
 
@@ -4739,7 +4818,6 @@
 #if !(CONFIG_REALTIME_ONLY)
 static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags)
 {
-    double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
     if (!cpi->common.refresh_alt_ref_frame)
         vp8_second_pass(cpi);
@@ -4748,7 +4826,11 @@
     cpi->bits_left -= 8 * *size;
 
     if (!cpi->common.refresh_alt_ref_frame)
+    {
+        double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
+            *cpi->oxcf.two_pass_vbrmin_section / 100);
         cpi->bits_left += (long long)(two_pass_min_rate / cpi->oxcf.frame_rate);
+    }
 }
 #endif
 
@@ -4959,20 +5041,7 @@
             {
                 if (cpi->source_encode_index ==  cpi->last_alt_ref_sei)
                 {
-#if VP8_TEMPORAL_ALT_REF
-
-                    if (cpi->oxcf.arnr_max_frames == 0)
-                    {
-                        cpi->is_src_frame_alt_ref = 1; // copy alt ref
-                    }
-                    else
-                    {
-                        cpi->is_src_frame_alt_ref = 0;
-                    }
-
-#else
                     cpi->is_src_frame_alt_ref = 1;
-#endif
                     cpi->last_alt_ref_sei    = -1;
                 }
                 else
@@ -5140,8 +5209,6 @@
     {
 
         // return to normal state
-        cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
-
         cm->refresh_entropy_probs = 1;
         cm->refresh_alt_ref_frame = 0;
         cm->refresh_golden_frame = 0;
@@ -5402,7 +5469,7 @@
     {
         for (j = 0; j < source->y_width; j += 16)
         {
-            unsigned int sse, sse2, sum2;
+            unsigned int sse;
             VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
 
             if (sse < 8096)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 29b120e..211f659 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -25,7 +26,7 @@
 #include "entropy.h"
 #include "threading.h"
 #include "vpx_ports/mem.h"
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "mcomp.h"
 
 #define INTRARDOPT
@@ -233,14 +234,17 @@
 {
 
     DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
 
@@ -355,9 +359,14 @@
     int gf_bits;                     // Bits for the golden frame or ARF - 2 pass only
     int mid_gf_extra_bits;             // A few extra bits for the frame half way between two gfs.
 
-    int kf_group_bits;                // Projected total bits available for a key frame group of frames
-    int kf_group_error_left;           // Error score of frames still to be coded in kf group
-    int kf_bits;                     // Bits for the key frame in a key frame group - 2 pass only
+    // Projected total bits available for a key frame group of frames
+    long long kf_group_bits;
+
+    // Error score of frames still to be coded in kf group
+    long long kf_group_error_left;
+
+    // Bits for the key frame in a key frame group - 2 pass only
+    int kf_bits;
 
     int non_gf_bitrate_adjustment;     // Used in the few frames following a GF to recover the extra bits spent in that GF
     int initial_gf_use;               // percentage use of gf 2 frames after gf
@@ -522,8 +531,8 @@
     int motion_lvl;
     int motion_speed;
     int motion_var;
-    int next_iiratio;
-    int this_iiratio;
+    unsigned int next_iiratio;
+    unsigned int this_iiratio;
     int this_frame_modified_error;
 
     double norm_intra_err_per_mb;
@@ -637,6 +646,12 @@
     int b_calculate_ssimg;
 #endif
     int b_calculate_psnr;
+
+
+    unsigned char *gf_active_flags;   // Record of which MBs still refer to last golden frame either directly or through 0,0
+    int gf_active_count;
+
+
 } VP8_COMP;
 
 void control_data_rate(VP8_COMP *cpi);
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index 66fdafb..6cc4501 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index d61e2ce..eeeddcc 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -219,13 +220,20 @@
 {
     MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
-    TEMP_CONTEXT t;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
     int distortion = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     vp8_intra_prediction_down_copy(xd);
-    vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
 
     for (i = 0; i < 16; i++)
     {
@@ -238,8 +246,8 @@
 
         error += pick_intra4x4block(rtcd,
                                     mb, mb->block + i, xd->block + i, &best_mode, A, L,
-                                    t.a + vp8_block2above[i],
-                                    t.l + vp8_block2left[i], &r, &d);
+                                    ta + vp8_block2above[i],
+                                    tl + vp8_block2left[i], &r, &d);
 
         cost += r;
         distortion += d;
@@ -409,7 +417,7 @@
     }
 
 
-    mb->e_mbd.mbmi.uv_mode = best_mode;
+    mb->e_mbd.mode_info_context->mbmi.uv_mode = best_mode;
     return best_error;
 
 }
@@ -422,6 +430,7 @@
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv1;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -453,41 +462,48 @@
     vpx_memset(mode_mv, 0, sizeof(mode_mv));
     vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
     vpx_memset(near_mv, 0, sizeof(near_mv));
+    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
 
 
     // set up all the refframe dependent pointers.
     if (cpi->ref_frame_flags & VP8_LAST_FLAG)
     {
+        YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME],
                           &best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[LAST_FRAME] = cpi->common.last_frame.y_buffer + recon_yoffset;
-        u_buffer[LAST_FRAME] = cpi->common.last_frame.u_buffer + recon_uvoffset;
-        v_buffer[LAST_FRAME] = cpi->common.last_frame.v_buffer + recon_uvoffset;
+        y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
+        u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
+        v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[LAST_FRAME] = 1;
 
     if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
     {
+        YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME],
                           &best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.y_buffer + recon_yoffset;
-        u_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-        v_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+        y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
+        u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
+        v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[GOLDEN_FRAME] = 1;
 
     if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active)
     {
+        YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME],
                           &best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-        u_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-        v_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+        y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
+        u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
+        v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[ALTREF_FRAME] = 1;
@@ -527,7 +543,7 @@
 
     best_rd = INT_MAX;
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
     // if we encode a new mv this is important
     // find the best new motion vector
@@ -539,9 +555,9 @@
         if (best_rd <= cpi->rd_threshes[mode_index])
             continue;
 
-        x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
 
-        if (skip_mode[x->e_mbd.mbmi.ref_frame])
+        if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame])
             continue;
 
         // Check to see if the testing frequency for this mode is at its max
@@ -570,33 +586,33 @@
         distortion2 = 0;
 
         this_mode = vp8_mode_order[mode_index];
-        
+
         // Experimental debug code.
         //all_rds[mode_index] = -1;
 
-        x->e_mbd.mbmi.mode = this_mode;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
 
         // Work out the cost assosciated with selecting the reference frame
-        frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+        frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
         rate2 += frame_cost;
 
         // everything but intra
-        if (x->e_mbd.mbmi.ref_frame)
+        if (x->e_mbd.mode_info_context->mbmi.ref_frame)
         {
-            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mbmi.ref_frame];
-            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mbmi.ref_frame];
-            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mbmi.ref_frame];
-            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mbmi.ref_frame];
-            mode_mv[NEARMV] = near_mv[x->e_mbd.mbmi.ref_frame];
-            best_ref_mv1 = best_ref_mv[x->e_mbd.mbmi.ref_frame];
-            memcpy(mdcounts, MDCounts[x->e_mbd.mbmi.ref_frame], sizeof(mdcounts));
+            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
         }
 
         //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
         if (cpi->is_src_frame_alt_ref)
         {
-            if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
                 continue;
         }
 
@@ -636,7 +652,7 @@
         case TM_PRED:
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
-            rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+            rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
             if (this_rd < best_intra_rd)
@@ -774,10 +790,10 @@
                 continue;
 
             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
-            x->e_mbd.mbmi.mode = this_mode;
-            x->e_mbd.mbmi.mv.as_mv = mode_mv[this_mode];
+            x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+            x->e_mbd.mode_info_context->mbmi.mv.as_mv = mode_mv[this_mode];
             x->e_mbd.block[0].bmi.mode = this_mode;
-            x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mbmi.mv.as_int;
+            x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
 
             distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));
 
@@ -816,7 +832,8 @@
             *returnrate = rate2;
             *returndistortion = distortion2;
             best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             if (this_mode == B_PRED || this_mode == SPLITMV)
                 for (i = 0; i < 16; i++)
@@ -862,9 +879,9 @@
 
     if (best_mbmode.mode <= B_PRED)
     {
-        x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
         vp8_pick_intra_mbuv_mode(x);
-        best_mbmode.uv_mode = x->e_mbd.mbmi.uv_mode;
+        best_mbmode.uv_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
     }
 
 
@@ -890,23 +907,25 @@
         best_mbmode.partitioning = 0;
         best_mbmode.dc_diff = 0;
 
-        vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
         }
 
-        x->e_mbd.mbmi.mv.as_int = 0;
+        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
 
         return best_rd;
     }
 
 
     // macroblock modes
-    vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
-    if (x->e_mbd.mbmi.mode == B_PRED || x->e_mbd.mbmi.mode == SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
         for (i = 0; i < 16; i++)
         {
             vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
@@ -914,10 +933,10 @@
         }
     else
     {
-        vp8_set_mbmode_and_mvs(x, x->e_mbd.mbmi.mode, &best_bmodes[0].mv.as_mv);
+        vp8_set_mbmode_and_mvs(x, x->e_mbd.mode_info_context->mbmi.mode, &best_bmodes[0].mv.as_mv);
     }
 
-    x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
 
     return best_rd;
 }
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index fb28837..b80e4c8 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index bbd7840..79e07db 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index f99277f..588656b 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm
index e0e976d..6e0099d 100644
--- a/vp8/encoder/ppc/encodemb_altivec.asm
+++ b/vp8/encoder/ppc/encodemb_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm
index eaab14c..935d0cb 100644
--- a/vp8/encoder/ppc/fdct_altivec.asm
+++ b/vp8/encoder/ppc/fdct_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm
index 917bfe0..ba48230 100644
--- a/vp8/encoder/ppc/rdopt_altivec.asm
+++ b/vp8/encoder/ppc/rdopt_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm
index 1102ccf..e5f2638 100644
--- a/vp8/encoder/ppc/sad_altivec.asm
+++ b/vp8/encoder/ppc/sad_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index 952bf72..a1ebf66 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 148a8d2..301360b 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c
index d2a13dc..bd918fa 100644
--- a/vp8/encoder/preproc.c
+++ b/vp8/encoder/preproc.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index 0e34cec..dc2a03b 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index 9f6ca0b..8ae4448 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 6028ebf..20ec9d1 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -15,6 +16,109 @@
 #include "entropy.h"
 #include "predictdc.h"
 
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+        zbin = zbin_ptr[rc] ;
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc] = x;                          // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+            }
+        }
+    }
+    d->eob = eob + 1;
+}
+
+void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+    short zbin_oq_value = b->zbin_extra;
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        //if ( i == 0 )
+        //    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
+        //else
+        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+        zbin_boost_ptr ++;
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc]  = x;                         // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+                zbin_boost_ptr = &b->zrun_zbin_boost[0];    // reset zero runlength
+            }
+        }
+    }
+
+    d->eob = eob + 1;
+}
+#else
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -55,9 +159,7 @@
             }
         }
     }
-
     d->eob = eob + 1;
-
 }
 
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
@@ -111,61 +213,100 @@
 
     d->eob = eob + 1;
 }
+
+#endif
+
+/* Perform regular quantization, with unbiased rounding and no zero bin. */
+void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i;
+    int rc;
+    int eob;
+    int x;
+    int y;
+    int z;
+    int sz;
+    short *coeff_ptr;
+    short *quant_ptr;
+    short *quant_shift_ptr;
+    short *qcoeff_ptr;
+    short *dqcoeff_ptr;
+    short *dequant_ptr;
+
+    coeff_ptr = &b->coeff[0];
+    quant_ptr = &b->quant[0][0];
+    quant_shift_ptr = &b->quant_shift[0][0];
+    qcoeff_ptr = d->qcoeff;
+    dqcoeff_ptr = d->dqcoeff;
+    dequant_ptr = &d->dequant[0][0];
+    eob = - 1;
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+    for (i = 0; i < 16; i++)
+    {
+        int dq;
+        int round;
+
+        /*TODO: These arrays should be stored in zig-zag order.*/
+        rc = vp8_default_zig_zag1d[i];
+        z = coeff_ptr[rc];
+        dq = dequant_ptr[rc];
+        round = dq >> 1;
+        /* Sign of z. */
+        sz = -(z < 0);
+        x = (z + sz) ^ sz;
+        x += round;
+        if (x >= dq)
+        {
+            /* Quantize x. */
+            y  = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
+            /* Put the sign back. */
+            x = (y + sz) ^ sz;
+            /* Save the coefficient and its dequantized value. */
+            qcoeff_ptr[rc] = x;
+            dqcoeff_ptr[rc] = x * dq;
+            /* Remember the last non-zero coefficient. */
+            if (y)
+                eob = i;
+        }
+    }
+
+    d->eob = eob + 1;
+}
+
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;
+    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    for (i = 0; i < 16; i++)
     {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
+            (x->e_mbd.block[i].eob <= has_2nd_order);
     }
-    else
+
+    if(has_2nd_order)
     {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
+        x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
     }
 }
 
 void vp8_quantize_mb(MACROBLOCK *x)
 {
     int i;
+    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
+    for (i = 0; i < 24+has_2nd_order; i++)
     {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        for (i = 16; i < 25; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
+            (x->e_mbd.block[i].eob <= (has_2nd_order && i<16));
     }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-
 }
 
 
@@ -176,74 +317,6 @@
     for (i = 16; i < 24; i++)
     {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-    }
-}
-
-// This function is not currently called
-void vp8_quantize_mbrd(MACROBLOCK *x)
-{
-    int i;
-
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        for (i = 16; i < 25; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-}
-
-void vp8_quantize_mbuvrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i++)
-    {
-        x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-    }
-}
-
-void vp8_quantize_mbyrd(MACROBLOCK *x)
-{
-    int i;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        x->quantize_brd(&x->block[24], &x->e_mbd.block[24]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-
-    }
-    else
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
     }
 }
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 868e8e3..b74718b 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -16,6 +17,10 @@
 #define prototype_quantize_block(sym) \
     void (sym)(BLOCK *b,BLOCKD *d)
 
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/quantize_x86.h"
+#endif
+
 #if ARCH_ARM
 #include "arm/quantize_arm.h"
 #endif
@@ -42,11 +47,10 @@
 #define QUANTIZE_INVOKE(ctx,fn) vp8_quantize_##fn
 #endif
 
+extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d);
+
 extern void vp8_quantize_mb(MACROBLOCK *x);
 extern void vp8_quantize_mbuv(MACROBLOCK *x);
 extern void vp8_quantize_mby(MACROBLOCK *x);
-extern void vp8_quantize_mbyrd(MACROBLOCK *x);
-extern void vp8_quantize_mbuvrd(MACROBLOCK *x);
-extern void vp8_quantize_mbrd(MACROBLOCK *x);
 
 #endif
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 05040d3..50f4db0 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -407,7 +408,7 @@
                   cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
                   cpi->recent_ref_frame_usage[ALTREF_FRAME];
 
-    int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+    int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
 
     // Reset the last boost indicator
     //cpi->last_boost = 100;
@@ -1021,7 +1022,7 @@
                       cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
                       cpi->recent_ref_frame_usage[ALTREF_FRAME];
 
-        int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+        int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
 
         // Reset the last boost indicator
         //cpi->last_boost = 100;
@@ -1063,7 +1064,6 @@
 
         if (cpi->common.refresh_golden_frame == TRUE)
         {
-            int isize_adjustment = 0;
 #if 0
 
             if (0)   // p_gw
@@ -1119,10 +1119,13 @@
                     cpi->this_frame_target = (baseline_bits_at_q(1, Q, cpi->common.MBs) * cpi->last_boost) / 100;
 
             }
-            // If there is an active ARF at this location use the minimum bits on this frame
+            // If there is an active ARF at this location use the minimum
+            // bits on this frame even if it is a contructed arf.
+            // The active maximum quantizer insures that an appropriate
+            // number of bits will be spent if needed for contstructed ARFs.
             else
             {
-                cpi->this_frame_target = 0;           // Minimial spend on gf that is replacing an arf
+                cpi->this_frame_target = 0;
             }
 
             cpi->current_gf_interval = cpi->frames_till_gf_update_due;
@@ -1171,7 +1174,8 @@
         while (Z > 0)
         {
             Z --;
-            projected_size_based_on_q *= (int)Factor;
+            projected_size_based_on_q =
+                (int)(Factor * projected_size_based_on_q);
             Factor += factor_adjustment;
 
             if (Factor  >= 0.999)
@@ -1362,7 +1366,8 @@
                 if (cpi->zbin_over_quant > zbin_oqmax)
                     cpi->zbin_over_quant = zbin_oqmax;
 
-                bits_per_mb_at_this_q *= (int)Factor;                   // Each over-ruin step is assumed to equate to approximately 3% reduction in bitrate
+                // Adjust bits_per_mb_at_this_q estimate
+                bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
                 Factor += factor_adjustment;
 
                 if (Factor  >= 0.999)
@@ -1439,6 +1444,9 @@
     }
     else
     {
+        int last_kf_interval =
+                (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1;
+
         // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
         for (i = 0; i < KEY_FRAME_CONTEXT; i++)
         {
@@ -1449,8 +1457,8 @@
             }
             else
             {
-                cpi->prior_key_frame_size[KEY_FRAME_CONTEXT - 1]     = cpi->projected_frame_size;
-                cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = cpi->frames_since_key;
+                cpi->prior_key_frame_size[i]     = cpi->projected_frame_size;
+                cpi->prior_key_frame_distance[i] = last_kf_interval;
             }
 
             av_key_frame_bits      += prior_key_frame_weight[i] * cpi->prior_key_frame_size[i];
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 588c7a8..766dfdf 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 0846996..dbef85b 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -63,11 +64,6 @@
 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
 
 
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
-
 
 const int vp8_auto_speed_thresh[17] =
 {
@@ -170,15 +166,13 @@
 
 }
 
-static int rd_iifactor [ 32 ] =  {    16,  16,  16,  12,   8,   4,   2,   0,
+static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                  };
 
 
-
-
 // The values in this table should be reviewed
 static int sad_per_bit16lut[128] =
 {
@@ -232,43 +226,41 @@
     int i;
     int *thresh;
     int threshmult;
-
-    int capped_q = (Qvalue < 160) ? Qvalue : 160;
+    double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
+    double rdconst = 3.00;
 
     vp8_clear_system_state();  //__asm emms;
 
-    cpi->RDMULT = (int)((0.00007 * (capped_q * capped_q * capped_q * capped_q)) - (0.0125 * (capped_q * capped_q * capped_q)) +
-                        (2.25 * (capped_q * capped_q)) - (12.5 * capped_q) + 25.0);
-
-    if (cpi->RDMULT < 25)
-        cpi->RDMULT = 25;
-
-    if (cpi->pass == 2)
-    {
-        if (cpi->common.frame_type == KEY_FRAME)
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[0]) / 16;
-        else if (cpi->next_iiratio > 31)
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) / 16;
-        else
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) / 16;
-    }
-
+    // Further tests required to see if optimum is different
+    // for key frames, golden frames and arf frames.
+    // if (cpi->common.refresh_golden_frame ||
+    //     cpi->common.refresh_alt_ref_frame)
+    cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
 
     // Extend rate multiplier along side quantizer zbin increases
     if (cpi->zbin_over_quant  > 0)
     {
-        // Extend rate multiplier along side quantizer zbin increases
-        if (cpi->zbin_over_quant  > 0)
-        {
-            double oq_factor = pow(1.006,  cpi->zbin_over_quant);
+        double oq_factor;
+        double modq;
 
-            if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0)))
-                oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0);
-
-            cpi->RDMULT *= (int)oq_factor;
-        }
+        // Experimental code using the same basic equation as used for Q above
+        // The units of cpi->zbin_over_quant are 1/128 of Q bin size
+        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
+        modq = (int)((double)capped_q * oq_factor);
+        cpi->RDMULT = (int)(rdconst * (modq * modq));
     }
 
+    if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
+    {
+        if (cpi->next_iiratio > 31)
+            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
+        else
+            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
+    }
+
+    if (cpi->RDMULT < 125)
+        cpi->RDMULT = 125;
+
     cpi->mb.errorperbit = (cpi->RDMULT / 100);
 
     if (cpi->mb.errorperbit < 1)
@@ -494,7 +486,7 @@
     int i, j;
     int berror;
 
-    dc = !(mb->e_mbd.mbmi.mode == B_PRED || mb->e_mbd.mbmi.mode == SPLITMV);
+    dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV);
 
     for (i = 0; i < 16; i++)
     {
@@ -622,24 +614,28 @@
 {
     int cost = 0;
     int b;
-    TEMP_CONTEXT t, t2;
     int type = 0;
-
     MACROBLOCKD *x = &mb->e_mbd;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
-    vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1);
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 
-    if (x->mbmi.mode == SPLITMV)
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
         type = 3;
 
     for (b = 0; b < 16; b++)
         cost += cost_coeffs(mb, x->block + b, type,
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
         cost += cost_coeffs(mb, x->block + 24, 1,
-                            t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]);
+                    ta + vp8_block2above[24], tl + vp8_block2left[24]);
 
     return cost;
 }
@@ -718,13 +714,20 @@
 {
     MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
-    TEMP_CONTEXT t;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int distortion = 0;
     int tot_rate_y = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     vp8_intra_prediction_down_copy(xd);
-    vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
 
     for (i = 0; i < 16; i++)
     {
@@ -737,8 +740,8 @@
 
         rd_pick_intra4x4block(
             cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
-            t.a + vp8_block2above[i],
-            t.l + vp8_block2left[i], &r, &ry, &d);
+            ta + vp8_block2above[i],
+            tl + vp8_block2left[i], &r, &ry, &d);
 
         cost += r;
         distortion += d;
@@ -769,9 +772,9 @@
         int dummy;
         rate = 0;
 
-        x->e_mbd.mbmi.mode = mode;
+        x->e_mbd.mode_info_context->mbmi.mode = mode;
 
-        rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+        rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
 
         vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
 
@@ -793,28 +796,33 @@
         }
     }
 
-    x->e_mbd.mbmi.mode = mode_selected;
+    x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
     return best_rd;
 }
 
 
 static int rd_cost_mbuv(MACROBLOCK *mb)
 {
-    TEMP_CONTEXT t, t2;
     int b;
     int cost = 0;
     MACROBLOCKD *x = &mb->e_mbd;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    vp8_setup_temp_context(&t, x->above_context[UCONTEXT], x->left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->above_context[VCONTEXT], x->left_context[VCONTEXT], 2);
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     for (b = 16; b < 20; b++)
         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
     for (b = 20; b < 24; b++)
         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
-                            t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
     return cost;
 }
@@ -855,11 +863,11 @@
         int distortion;
         int this_rd;
 
-        x->e_mbd.mbmi.uv_mode = mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
         vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
 
         rate_to = rd_cost_mbuv(x);
-        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.uv_mode];
+        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
 
         distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
 
@@ -878,7 +886,7 @@
     *rate = r;
     *distortion = d;
 
-    x->e_mbd.mbmi.uv_mode = mode_selected;
+    x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
     return best_rd;
 }
 #endif
@@ -888,16 +896,17 @@
     vp8_prob p [VP8_MVREFS-1];
     assert(NEARESTMV <= m  &&  m <= SPLITMV);
     vp8_mv_ref_probs(p, near_mv_ref_ct);
-    return vp8_cost_token(vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+    return vp8_cost_token(vp8_mv_ref_tree, p,
+                          vp8_mv_ref_encoding_array - NEARESTMV + m);
 }
 
 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
 {
     int i;
 
-    x->e_mbd.mbmi.mode = mb;
-    x->e_mbd.mbmi.mv.as_mv.row = mv->row;
-    x->e_mbd.mbmi.mv.as_mv.col = mv->col;
+    x->e_mbd.mode_info_context->mbmi.mode = mb;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
 
     for (i = 0; i < 16; i++)
     {
@@ -1002,18 +1011,19 @@
     return cost;
 }
 
-static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, TEMP_CONTEXT *t)
+static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
+                              int which_label, ENTROPY_CONTEXT *ta,
+                              ENTROPY_CONTEXT *tl)
 {
     int cost = 0;
     int b;
     MACROBLOCKD *x = &mb->e_mbd;
 
-
     for (b = 0; b < 16; b++)
         if (labels[ b] == which_label)
             cost += cost_coeffs(mb, x->block + b, 3,
-                                t->a + vp8_block2above[b],
-                                t->l + vp8_block2left[b]);
+                                ta + vp8_block2above[b],
+                                tl + vp8_block2left[b]);
 
     return cost;
 
@@ -1033,11 +1043,11 @@
 
             vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
             ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
-            x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
+            x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 
             // set to 0 no way to account for 2nd order DC so discount
             //be->coeff[0] = 0;
-            x->quantize_brd(be, bd);
+            x->quantize_b(be, bd);
 
             distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
         }
@@ -1061,13 +1071,13 @@
     // Fdct and building the 2nd order block
     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
     {
-        mb->short_fdct8x4rd(beptr->src_diff, beptr->coeff, 32);
+        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
         *Y2DCPtr++ = beptr->coeff[0];
         *Y2DCPtr++ = beptr->coeff[16];
     }
 
     // 2nd order fdct
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
         mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
     }
@@ -1075,20 +1085,20 @@
     // Quantization
     for (b = 0; b < 16; b++)
     {
-        mb->quantize_brd(&mb->block[b], &mb->e_mbd.block[b]);
+        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
     }
 
     // DC predication and Quantization of 2nd Order block
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
 
         {
-            mb->quantize_brd(mb_y2, x_y2);
+            mb->quantize_b(mb_y2, x_y2);
         }
     }
 
     // Distortion
-    if (x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
         d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2;
     else
     {
@@ -1130,6 +1140,9 @@
     MV bmvs[16];
     int beobs[16];
 
+    vpx_memset(beobs, 0, sizeof(beobs));
+
+
     for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++)
     {
         int label_count;
@@ -1143,9 +1156,20 @@
 
         vp8_variance_fn_ptr_t v_fn_ptr;
 
-        TEMP_CONTEXT t;
-        TEMP_CONTEXT tb;
-        vp8_setup_temp_context(&t, xc->above_context[Y1CONTEXT], xc->left_context[Y1CONTEXT], 4);
+        ENTROPY_CONTEXT_PLANES t_above, t_left;
+        ENTROPY_CONTEXT *ta;
+        ENTROPY_CONTEXT *tl;
+        ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
+        ENTROPY_CONTEXT *ta_b;
+        ENTROPY_CONTEXT *tl_b;
+
+        vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+        ta = (ENTROPY_CONTEXT *)&t_above;
+        tl = (ENTROPY_CONTEXT *)&t_left;
+        ta_b = (ENTROPY_CONTEXT *)&t_above_b;
+        tl_b = (ENTROPY_CONTEXT *)&t_left_b;
 
         br = 0;
         bd = 0;
@@ -1230,9 +1254,15 @@
                 int this_rd;
                 int num00;
                 int labelyrate;
+                ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
+                ENTROPY_CONTEXT *ta_s;
+                ENTROPY_CONTEXT *tl_s;
 
-                TEMP_CONTEXT ts;
-                vp8_setup_temp_context(&ts, &t.a[0], &t.l[0], 4);
+                vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
+                vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
+
+                ta_s = (ENTROPY_CONTEXT *)&t_above_s;
+                tl_s = (ENTROPY_CONTEXT *)&t_left_s;
 
                 if (this_mode == NEW4X4)
                 {
@@ -1317,7 +1347,7 @@
 
                 distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
 
-                labelyrate = rdcost_mbsegment_y(x, labels, i, &ts);
+                labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
                 rate += labelyrate;
 
                 this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
@@ -1329,12 +1359,15 @@
                     bestlabelyrate = labelyrate;
                     mode_selected = this_mode;
                     best_label_rd = this_rd;
-                    vp8_setup_temp_context(&tb, &ts.a[0], &ts.l[0], 4);
+
+                    vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
+                    vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
 
                 }
             }
 
-            vp8_setup_temp_context(&t, &tb.a[0], &tb.l[0], 4);
+            vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
+            vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
 
             labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost);
 
@@ -1405,10 +1438,10 @@
 
     // save partitions
     labels = vp8_mbsplits[best_seg];
-    x->e_mbd.mbmi.partitioning = best_seg;
-    x->e_mbd.mbmi.partition_count = vp8_count_labels(labels);
+    x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
+    x->partition_info->count = vp8_count_labels(labels);
 
-    for (i = 0; i < x->e_mbd.mbmi.partition_count; i++)
+    for (i = 0; i < x->partition_info->count; i++)
     {
         int j;
 
@@ -1418,8 +1451,8 @@
                 break;
         }
 
-        x->e_mbd.mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
-        x->e_mbd.mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
+        x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
+        x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
     }
 
     return best_segment_rd;
@@ -1433,6 +1466,7 @@
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -1464,6 +1498,8 @@
 
     *returnintra = INT_MAX;
 
+    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
+
     cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
 
     x->skip = 0;
@@ -1517,9 +1553,9 @@
 
     vpx_memset(mode_mv, 0, sizeof(mode_mv));
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
     vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
-    uv_intra_mode = x->e_mbd.mbmi.uv_mode;
+    uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
     {
         uvintra_eob = 0;
 
@@ -1541,7 +1577,7 @@
         //all_rds[mode_index] = -1;
         //all_rates[mode_index] = -1;
         //all_dist[mode_index] = -1;
-        //intermodecost[mode_index] = -1;  
+        //intermodecost[mode_index] = -1;
 
         // Test best rd so far against threshold for trying this mode.
         if (best_rd <= cpi->rd_threshes[mode_index])
@@ -1563,31 +1599,34 @@
 
         this_mode = vp8_mode_order[mode_index];
 
-        x->e_mbd.mbmi.mode = this_mode;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
-        x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
 
         //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
         if (cpi->is_src_frame_alt_ref)
         {
-            if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
                 continue;
         }
 
-        if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+        if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
         {
+            YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
             if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
                 continue;
 
             lf_or_gf = 0;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
         }
-        else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+        else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
         {
+            YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
 
             // not supposed to reference gold frame
             if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
@@ -1596,12 +1635,14 @@
             lf_or_gf = 1;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
         }
-        else if (x->e_mbd.mbmi.ref_frame == ALTREF_FRAME)
+        else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME)
         {
+            YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
             // not supposed to reference alt ref frame
             if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
                 continue;
@@ -1612,19 +1653,19 @@
             lf_or_gf = 1;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset;
         }
 
         vp8_find_near_mvs(&x->e_mbd,
                           x->e_mbd.mode_info_context,
                           &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
-                          mdcounts, x->e_mbd.mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+                          mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
 
         // Estimate the reference frame signaling cost and add it to the rolling cost variable.
-        frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+        frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
         rate2 += frame_cost;
 
         if (this_mode <= B_PRED)
@@ -1691,9 +1732,9 @@
             int breakout_rd = best_rd - frame_cost_rd;
             int tmp_rd;
 
-            if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+            if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ;
-            else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+            else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ;
             else
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ;
@@ -1747,19 +1788,19 @@
             }
 
             // trap cases where the 8x8s can be promoted to 8x16s or 16x8s
-            if (0)//x->e_mbd.mbmi.partition_count == 4)
+            if (0)//x->partition_info->count == 4)
             {
 
-                if (x->e_mbd.mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mbmi.partition_bmi[1].mv.as_int
-                    && x->e_mbd.mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mbmi.partition_bmi[3].mv.as_int)
+                if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int
+                    && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int)
                 {
                     const int *labels = vp8_mbsplits[2];
-                    x->e_mbd.mbmi.partitioning = 0;
+                    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
                     rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
                     rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[1]];
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[3]];
-                    x->e_mbd.mbmi.partition_bmi[1] = x->e_mbd.mbmi.partition_bmi[2];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[1]];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[3]];
+                    x->partition_info->bmi[1] = x->partition_info->bmi[2];
                 }
             }
 
@@ -1769,14 +1810,14 @@
         case V_PRED:
         case H_PRED:
         case TM_PRED:
-            x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+            x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             {
                 macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
                 rate2 += rate;
                 rate_y = rate;
                 distortion2 += distortion;
-                rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+                rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
                 rate2 += uv_intra_rate;
                 rate_uv = uv_intra_rate_tokenonly;
                 distortion2 += uv_intra_distortion;
@@ -2082,7 +2123,7 @@
         //all_rates[mode_index] = rate2;
         //all_dist[mode_index] = distortion2;
 
-        if ((x->e_mbd.mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
+        if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
         {
             *returnintra = this_rd ;
         }
@@ -2092,17 +2133,18 @@
         {
             // Note index of best mode so far
             best_mode_index = mode_index;
-            x->e_mbd.mbmi.force_no_skip = force_no_skip;
+            x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
 
             if (this_mode <= B_PRED)
             {
-                x->e_mbd.mbmi.uv_mode = uv_intra_mode;
+                x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
             }
 
             *returnrate = rate2;
             *returndistortion = distortion2;
             best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             for (i = 0; i < 16; i++)
             {
@@ -2183,28 +2225,30 @@
         best_mbmode.partitioning = 0;
         best_mbmode.dc_diff = 0;
 
-        vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
         }
 
-        x->e_mbd.mbmi.mv.as_int = 0;
+        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
 
         return best_rd;
     }
 
 
     // macroblock modes
-    vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
     for (i = 0; i < 16; i++)
     {
         vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
     }
 
-    x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
 
     return best_rd;
 }
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index c6eae4b..fb74dd4 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 74c6bd7..e63be2b 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
new file mode 100644
index 0000000..fc0967d
--- /dev/null
+++ b/vp8/encoder/segmentation.c
@@ -0,0 +1,64 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "segmentation.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
+{
+    int mb_row, mb_col;
+
+    MODE_INFO *this_mb_mode_info = cm->mi;
+
+    x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
+
+    if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
+    {
+        // Reset Gf useage monitors
+        vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+        cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+    }
+    else
+    {
+        // for each macroblock row in image
+        for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+        {
+            // for each macroblock col in image
+            for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+            {
+
+                // If using golden then set GF active flag if not already set.
+                // If using last frame 0,0 mode then leave flag as it is
+                // else if using non 0,0 motion or intra modes then clear flag if it is currently set
+                if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
+                {
+                    if (*(x->gf_active_ptr) == 0)
+                    {
+                        *(x->gf_active_ptr) = 1;
+                        cpi->gf_active_count ++;
+                    }
+                }
+                else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(x->gf_active_ptr))
+                {
+                    *(x->gf_active_ptr) = 0;
+                    cpi->gf_active_count--;
+                }
+
+                x->gf_active_ptr++;          // Step onto next entry
+                this_mb_mode_info++;           // skip to next mb
+
+            }
+
+            // this is to account for the border
+            this_mb_mode_info++;
+        }
+    }
+}
diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h
new file mode 100644
index 0000000..216e194
--- /dev/null
+++ b/vp8/encoder/segmentation.h
@@ -0,0 +1,16 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "string.h"
+#include "blockd.h"
+#include "onyx_int.h"
+
+extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index df214a8..4ebcba1 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 33ddd64..d9b8d36 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -23,12 +24,12 @@
 _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
 #endif
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+void vp8_fix_contexts(MACROBLOCKD *x);
 
 TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-TOKENEXTRA *vp8_dct_value_tokens_ptr;
+const TOKENEXTRA *vp8_dct_value_tokens_ptr;
 int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-int *vp8_dct_value_cost_ptr;
+const int *vp8_dct_value_cost_ptr;
 #if 0
 int skip_true_count = 0;
 int skip_false_count = 0;
@@ -196,86 +197,18 @@
     *a = *l = pt;
 
 }
-#if 0
+
 void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 {
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
     int plane_type;
     int b;
 
     TOKENEXTRA *start = *t;
     TOKENEXTRA *tp = *t;
 
-    x->mbmi.dc_diff = 1;
-
-    vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
-
-    if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
-    {
-        plane_type = 3;
-    }
-    else
-    {
-        tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                            A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
-        plane_type = 0;
-
-    }
-
-    for (b = 0; b < 16; b++)
-        tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
-    for (b = 16; b < 24; b++)
-        tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
-    if (cpi->common.mb_no_coeff_skip)
-    {
-        x->mbmi.mb_skip_coeff = 1;
-
-        while ((tp != *t) && x->mbmi.mb_skip_coeff)
-        {
-            x->mbmi.mb_skip_coeff = (x->mbmi.mb_skip_coeff && (tp->Token == DCT_EOB_TOKEN));
-            tp ++;
-        }
-
-        if (x->mbmi.mb_skip_coeff == 1)
-        {
-            x->mbmi.dc_diff = 0;
-            //redo the coutnts
-            vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts));
-
-            *t = start;
-            cpi->skip_true_count++;
-
-            //skip_true_count++;
-        }
-        else
-        {
-
-            cpi->skip_false_count++;
-            //skip_false_count++;
-        }
-    }
-}
-#else
-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-{
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-    int plane_type;
-    int b;
-
-    TOKENEXTRA *start = *t;
-    TOKENEXTRA *tp = *t;
-
-    x->mbmi.dc_diff = 1;
+    x->mode_info_context->mbmi.dc_diff = 1;
 
 #if 0
 
@@ -290,7 +223,7 @@
 
 #if 1
 
-    if (x->mbmi.mb_skip_coeff)
+    if (x->mode_info_context->mbmi.mb_skip_coeff)
     {
 
         cpi->skip_true_count++;
@@ -299,13 +232,13 @@
             vp8_stuff_mb(cpi, x, t) ;
         else
         {
-            vp8_fix_contexts(cpi, x);
+            vp8_fix_contexts(x);
         }
 
-        if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-            x->mbmi.dc_diff = 0;
+        if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+            x->mode_info_context->mbmi.dc_diff = 0;
         else
-            x->mbmi.dc_diff = 1;
+            x->mode_info_context->mbmi.dc_diff = 1;
 
 
         return;
@@ -346,27 +279,27 @@
     vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
 #endif
 
-    if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV)
     {
         plane_type = 3;
     }
     else
     {
         tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                            A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+                   A + vp8_block2above[24], L + vp8_block2left[24], cpi);
         plane_type = 0;
 
     }
 
     for (b = 0; b < 16; b++)
         tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                            A + vp8_block2above[b],
+                            L + vp8_block2left[b], cpi);
 
     for (b = 16; b < 24; b++)
         tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                            A + vp8_block2above[b],
+                            L + vp8_block2left[b], cpi);
 
 #if 0
 
@@ -405,7 +338,7 @@
 
 #endif
 }
-#endif
+
 
 #ifdef ENTROPY_STATS
 
@@ -580,57 +513,45 @@
 
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 {
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
     int plane_type;
     int b;
 
     stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                     A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+                     A + vp8_block2above[24], L + vp8_block2left[24], cpi);
     plane_type = 0;
 
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-        x->mbmi.dc_diff = 0;
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+        x->mode_info_context->mbmi.dc_diff = 0;
     else
-        x->mbmi.dc_diff = 1;
+        x->mode_info_context->mbmi.dc_diff = 1;
 
 
     for (b = 0; b < 16; b++)
         stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                         A[vp8_block2context[b]] + vp8_block2above[b],
-                         L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                         A + vp8_block2above[b],
+                         L + vp8_block2left[b], cpi);
 
     for (b = 16; b < 24; b++)
         stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
-                           A[vp8_block2context[b]] + vp8_block2above[b],
-                           L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                           A + vp8_block2above[b],
+                           L + vp8_block2left[b], cpi);
 
 }
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x)
+void vp8_fix_contexts(MACROBLOCKD *x)
 {
-    x->left_context[Y1CONTEXT][0] = 0;
-    x->left_context[Y1CONTEXT][1] = 0;
-    x->left_context[Y1CONTEXT][2] = 0;
-    x->left_context[Y1CONTEXT][3] = 0;
-    x->left_context[UCONTEXT][0]  = 0;
-    x->left_context[VCONTEXT][0]  = 0;
-    x->left_context[UCONTEXT][1]  = 0;
-    x->left_context[VCONTEXT][1]  = 0;
-
-    x->above_context[Y1CONTEXT][0] = 0;
-    x->above_context[Y1CONTEXT][1] = 0;
-    x->above_context[Y1CONTEXT][2] = 0;
-    x->above_context[Y1CONTEXT][3] = 0;
-    x->above_context[UCONTEXT][0]  = 0;
-    x->above_context[VCONTEXT][0]  = 0;
-    x->above_context[UCONTEXT][1]  = 0;
-    x->above_context[VCONTEXT][1]  = 0;
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    /* Clear entropy contexts for Y2 blocks */
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        x->left_context[Y2CONTEXT][0] = 0;
-        x->above_context[Y2CONTEXT][0] = 0;
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
     }
+    else
+    {
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+    }
+
 }
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 02aacc2..7b9fc9e 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -34,5 +35,11 @@
 extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
 #endif
 
+extern const int *vp8_dct_value_cost_ptr;
+/* TODO: The Token field should be broken out into a separate char array to
+ *  improve cache locality, since it's needed for costing when the rest of the
+ *  fields are not.
+ */
+extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
 
 #endif  /* tokenize_h */
diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c
index e398044..03967c8 100644
--- a/vp8/encoder/treewriter.c
+++ b/vp8/encoder/treewriter.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 05ac74c..88096d8 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index b1aede2..0341fbd 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -1,13 +1,13 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "config.h"
 
 #ifndef VARIANCE_H
 #define VARIANCE_H
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 85269b9..179cd0d 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index 186ee68..9fb6761 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -179,11 +180,21 @@
     {
         // Willamette instruction set available:
         vp8_mbuverror                = vp8_mbuverror_xmm;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_sse;
+        /* The sse quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
+#if 0 //new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_wmt;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
+        vp8_fast_fdct8x4              = vp8_short_fdct8x4_wmt;
+#else
+        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
+        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
+        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
+#endif
         vp8_subtract_b                = vp8_subtract_b_mmx;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
         vp8_variance4x4              = vp8_variance4x4_mmx;
@@ -216,11 +227,21 @@
     {
         // MMX instruction set available:
         vp8_mbuverror                = vp8_mbuverror_mmx;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_mmx;
+        /* The mmx quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
+#if 0 // new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_mmx;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
+        vp8_fast_fdct8x4              = vp8_short_fdct8x4_mmx;
+#else
+        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
+        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
+        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
+#endif
         vp8_subtract_b                = vp8_subtract_b_mmx;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
         vp8_variance4x4              = vp8_variance4x4_mmx;
@@ -253,10 +274,10 @@
     {
         // Pure C:
         vp8_mbuverror                = vp8_mbuverror_c;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
+        vp8_fast_quantize_b          = vp8_fast_quantize_b_c;
         vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
         vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
         vp8_subtract_b                = vp8_subtract_b_c;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_c;
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index e134237..b6cfc5c 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -12,8 +13,7 @@
 
 section .text
     global sym(vp8_short_fdct4x4_mmx)
-    global sym(vp8_fast_fdct4x4_mmx)
-    global sym(vp8_fast_fdct8x4_wmt)
+    global sym(vp8_short_fdct8x4_wmt)
 
 
 %define         DCTCONSTANTSBITS         (16)
@@ -23,10 +23,6 @@
 %define         x_c3                      (25080)          ; cos(pi*3/8) * (1<<15)
 
 
-%define _1STSTAGESHIFT           14
-%define _2NDSTAGESHIFT           16
-
-; using matrix multiply with source and destbuffer has a pitch
 ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
 sym(vp8_short_fdct4x4_mmx):
     push        rbp
@@ -36,333 +32,6 @@
     push rsi
     push rdi
     ; end prolog
-
-        mov         rsi,    arg(0) ;input
-        mov         rdi,    arg(1) ;output
-
-        movsxd      rax,    dword ptr arg(2) ;pitch
-        lea         rdx,    [dct_matrix GLOBAL]
-
-        movq        mm0,    [rsi   ]
-        movq        mm1,    [rsi + rax]
-
-        movq        mm2,    [rsi + rax*2]
-        lea         rsi,    [rsi + rax*2]
-
-        movq        mm3,    [rsi + rax]
-
-        ; first column
-        movq        mm4,    mm0
-        movq        mm7,    [rdx]
-
-        pmaddwd     mm4,    mm7
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    mm7
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-
-        pmaddwd     mm5,    mm7
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    mm7
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi],  mm4
-
-        ;second column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+8]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+8]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+8],  mm4
-
-
-        ;third column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+16]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+16]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+16],  mm4
-
-        ;fourth column (this is the last column, so we do not have save the source any more)
-
-        pmaddwd     mm0,    [rdx+24]
-
-        pmaddwd     mm1,    [rdx+24]
-        movq        mm6,    mm0
-
-        punpckldq   mm0,    mm1
-        punpckhdq   mm6,    mm1
-
-        paddd       mm0,    mm6
-
-        pmaddwd     mm2,    [rdx+24]
-
-        pmaddwd     mm3,    [rdx+24]
-        movq        mm7,    mm2
-
-        punpckldq   mm2,    mm3
-        punpckhdq   mm7,    mm3
-
-        paddd       mm2,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm0,    mm6
-        paddd       mm2,    mm6
-
-        psrad       mm0,    _1STSTAGESHIFT
-        psrad       mm2,    _1STSTAGESHIFT
-
-        packssdw    mm0,    mm2
-
-        movq        mm3,    mm0
-
-        ; done with one pass
-        ; now start second pass
-        movq        mm0,    [rdi   ]
-        movq        mm1,    [rdi+ 8]
-        movq        mm2,    [rdi+ 16]
-
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi],  mm4
-
-        ;second column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+8]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+8]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+8],  mm4
-
-
-        ;third column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+16]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+16]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+16],  mm4
-
-        ;fourth column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+24]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+24]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+24]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+24]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+24],  mm4
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch)
-sym(vp8_fast_fdct4x4_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    ; end prolog
         mov     rsi,    arg(0) ;input
         mov     rdi,    arg(1) ;output
 
@@ -378,11 +47,11 @@
         movq    mm3,    [rcx + rax]
         ; get the constants
         ;shift to left by 1 for prescision
-        paddw   mm0,    mm0
-        paddw   mm1,    mm1
+        psllw   mm0,    3
+        psllw   mm1,    3
 
-        psllw   mm2,    1
-        psllw   mm3,    1
+        psllw   mm2,    3
+        psllw   mm3,    3
 
         ; transpose for the second stage
         movq    mm4,    mm0         ; 00 01 02 03
@@ -530,20 +199,23 @@
         movq    mm3,    mm5
         ; done with vertical
 
-		pcmpeqw	mm4,	mm4
-		pcmpeqw	mm5,	mm5
-		psrlw	mm4,	15
-		psrlw	mm5,	15
+        pcmpeqw mm4,    mm4
+        pcmpeqw mm5,    mm5
+        psrlw   mm4,    15
+        psrlw   mm5,    15
+
+        psllw   mm4,    2
+        psllw   mm5,    2
 
         paddw   mm0,    mm4
         paddw   mm1,    mm5
         paddw   mm2,    mm4
         paddw   mm3,    mm5
 
-        psraw   mm0, 1
-        psraw   mm1, 1
-        psraw   mm2, 1
-        psraw   mm3, 1
+        psraw   mm0, 3
+        psraw   mm1, 3
+        psraw   mm2, 3
+        psraw   mm3, 3
 
         movq        [rdi   ],   mm0
         movq        [rdi+ 8],   mm1
@@ -559,8 +231,8 @@
     ret
 
 
-;void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch)
-sym(vp8_fast_fdct8x4_wmt):
+;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
+sym(vp8_short_fdct8x4_wmt):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 3
@@ -583,11 +255,11 @@
         movdqa      xmm3,       [rcx + rax]
         ; get the constants
         ;shift to left by 1 for prescision
-        psllw       xmm0,        1
-        psllw       xmm2,        1
+        psllw       xmm0,        3
+        psllw       xmm2,        3
 
-        psllw       xmm4,        1
-        psllw       xmm3,        1
+        psllw       xmm4,        3
+        psllw       xmm3,        3
 
         ; transpose for the second stage
         movdqa      xmm1,       xmm0         ; 00 01 02 03 04 05 06 07
@@ -757,20 +429,23 @@
         ; done with vertical
 
 
-        pcmpeqw		xmm4,		xmm4
-        pcmpeqw		xmm5,		xmm5;
-        psrlw		xmm4,		15
-        psrlw		xmm5,		15
+        pcmpeqw     xmm4,       xmm4
+        pcmpeqw     xmm5,       xmm5;
+        psrlw       xmm4,       15
+        psrlw       xmm5,       15
+
+        psllw       xmm4,       2
+        psllw       xmm5,       2
 
         paddw       xmm0,       xmm4
         paddw       xmm1,       xmm5
         paddw       xmm2,       xmm4
         paddw       xmm3,       xmm5
 
-        psraw       xmm0,       1
-        psraw       xmm1,       1
-        psraw       xmm2,       1
-        psraw       xmm3,       1
+        psraw       xmm0,       3
+        psraw       xmm1,       3
+        psraw       xmm2,       3
+        psraw       xmm3,       3
 
         movq        QWORD PTR[rdi   ],   xmm0
         movq        QWORD PTR[rdi+ 8],   xmm1
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 3e5e9a7..f7a1843 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -1,260 +1,189 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
 %include "vpx_ports/x86_abi_support.asm"
 
-global sym(vp8_short_fdct4x4_wmt)
-
-%define         DCTCONSTANTSBITS         (16)
-%define         DCTROUNDINGVALUE         (1<< (DCTCONSTANTSBITS-1))
-%define         x_c1                      (60547)          ; cos(pi  /8) * (1<<15)
-%define         x_c2                      (46341)          ; cos(pi*2/8) * (1<<15)
-%define         x_c3                      (25080)          ; cos(pi*3/8) * (1<<15)
-
-%define _1STSTAGESHIFT           14
-%define _2NDSTAGESHIFT           16
-
-
-;; using matrix multiply
-;void vp8_short_fdct4x4_wmt(short *input, short *output)
-sym(vp8_short_fdct4x4_wmt):
+;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
+global sym(vp8_short_fdct4x4_sse2)
+sym(vp8_short_fdct4x4_sse2):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 2
+    SHADOW_ARGS_TO_STACK 3
+;;    SAVE_XMM
     GET_GOT     rbx
+    push        rsi
+    push        rdi
     ; end prolog
 
-        mov         rax,        arg(0) ;input
-        mov         rcx,        arg(1) ;output
+    mov         rsi, arg(0)
+    movsxd      rax, DWORD PTR arg(2)
+    lea         rdi, [rsi + rax*2]
 
-        lea         rdx,        [dct_matrix_sse2 GLOBAL]
+    movq        xmm0, MMWORD PTR[rsi   ]        ;03 02 01 00
+    movq        xmm2, MMWORD PTR[rsi + rax]     ;13 12 11 10
+    movq        xmm1, MMWORD PTR[rsi + rax*2]   ;23 22 21 20
+    movq        xmm3, MMWORD PTR[rdi + rax]     ;33 32 31 30
 
-        movdqu      xmm0,       [rax   ]
-        movdqu      xmm1,       [rax+16]
+    punpcklqdq  xmm0, xmm2                      ;13 12 11 10 03 02 01 00
+    punpcklqdq  xmm1, xmm3                      ;33 32 31 30 23 22 21 20
 
-        ; first column
-        movdqa      xmm2,       xmm0
-        movdqa      xmm7,       [rdx]
+    mov         rdi, arg(1)
 
-        pmaddwd     xmm2,       xmm7
-        movdqa      xmm3,       xmm1
+    movdqa      xmm2, xmm0
+    punpckldq   xmm0, xmm1                      ;23 22 03 02 21 20 01 00
+    punpckhdq   xmm2, xmm1                      ;33 32 13 12 31 30 11 10
+    movdqa      xmm1, xmm0
+    punpckldq   xmm0, xmm2                      ;31 21 30 20 11 10 01 00
+    pshufhw     xmm1, xmm1, 0b1h                ;22 23 02 03 xx xx xx xx
+    pshufhw     xmm2, xmm2, 0b1h                ;32 33 12 13 xx xx xx xx
 
-        pmaddwd     xmm3,       xmm7
-        movdqa      xmm4,       xmm2
+    punpckhdq   xmm1, xmm2                      ;32 33 22 23 12 13 02 03
+    movdqa      xmm3, xmm0
+    paddw       xmm0, xmm1                      ;b1 a1 b1 a1 b1 a1 b1 a1
+    psubw       xmm3, xmm1                      ;c1 d1 c1 d1 c1 d1 c1 d1
+    psllw       xmm0, 3                         ;b1 <<= 3 a1 <<= 3
+    psllw       xmm3, 3                         ;c1 <<= 3 d1 <<= 3
+    movdqa      xmm1, xmm0
+    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL]     ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL]     ;a1 - b1
+    movdqa      xmm4, xmm3
+    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
 
-        punpckldq   xmm2,       xmm3
-        punpckhdq   xmm4,       xmm3
+    paddd       xmm3, XMMWORD PTR[_14500 GLOBAL]
+    paddd       xmm4, XMMWORD PTR[_7500 GLOBAL]
+    psrad       xmm3, 12            ;(c1 * 2217 + d1 * 5352 +  14500)>>12
+    psrad       xmm4, 12            ;(d1 * 2217 - c1 * 5352 +   7500)>>12
 
-        movdqa      xmm3,       xmm2
-        punpckldq   xmm2,       xmm4
+    packssdw    xmm0, xmm1                      ;op[2] op[0]
+    packssdw    xmm3, xmm4                      ;op[3] op[1]
+    ; 23 22 21 20 03 02 01 00
+    ;
+    ; 33 32 31 30 13 12 11 10
+    ;
+    movdqa      xmm2, xmm0
+    punpcklqdq  xmm0, xmm3                      ;13 12 11 10 03 02 01 00
+    punpckhqdq  xmm2, xmm3                      ;23 22 21 20 33 32 31 30
 
-        punpckhdq   xmm3,       xmm4
-        paddd       xmm2,       xmm3
+    movdqa      xmm3, xmm0
+    punpcklwd   xmm0, xmm2                      ;32 30 22 20 12 10 02 00
+    punpckhwd   xmm3, xmm2                      ;33 31 23 21 13 11 03 01
+    movdqa      xmm2, xmm0
+    punpcklwd   xmm0, xmm3                      ;13 12 11 10 03 02 01 00
+    punpckhwd   xmm2, xmm3                      ;33 32 31 30 23 22 21 20
 
+    movdqa      xmm5, XMMWORD PTR[_7 GLOBAL]
+    pshufd      xmm2, xmm2, 04eh
+    movdqa      xmm3, xmm0
+    paddw       xmm0, xmm2                      ;b1 b1 b1 b1 a1 a1 a1 a1
+    psubw       xmm3, xmm2                      ;c1 c1 c1 c1 d1 d1 d1 d1
 
-        paddd       xmm2,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-        psrad       xmm2,       _1STSTAGESHIFT
-        ;second column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+16]
+    pshufd      xmm0, xmm0, 0d8h                ;b1 b1 a1 a1 b1 b1 a1 a1
+    movdqa      xmm2, xmm3                      ;save d1 for compare
+    pshufd      xmm3, xmm3, 0d8h                ;c1 c1 d1 d1 c1 c1 d1 d1
+    pshuflw     xmm0, xmm0, 0d8h                ;b1 b1 a1 a1 b1 a1 b1 a1
+    pshuflw     xmm3, xmm3, 0d8h                ;c1 c1 d1 d1 c1 d1 c1 d1
+    pshufhw     xmm0, xmm0, 0d8h                ;b1 a1 b1 a1 b1 a1 b1 a1
+    pshufhw     xmm3, xmm3, 0d8h                ;c1 d1 c1 d1 c1 d1 c1 d1
+    movdqa      xmm1, xmm0
+    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1
 
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+16]
+    pxor        xmm4, xmm4                      ;zero out for compare
+    paddd       xmm0, xmm5
+    paddd       xmm1, xmm5
+    pcmpeqw     xmm2, xmm4
+    psrad       xmm0, 4                         ;(a1 + b1 + 7)>>4
+    psrad       xmm1, 4                         ;(a1 - b1 + 7)>>4
+    pandn       xmm2, XMMWORD PTR[_cmp_mask GLOBAL] ;clear upper,
+                                                    ;and keep bit 0 of lower
 
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
+    movdqa      xmm4, xmm3
+    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
+    paddd       xmm3, XMMWORD PTR[_12000 GLOBAL]
+    paddd       xmm4, XMMWORD PTR[_51000 GLOBAL]
+    packssdw    xmm0, xmm1                      ;op[8] op[0]
+    psrad       xmm3, 16                ;(c1 * 2217 + d1 * 5352 +  12000)>>16
+    psrad       xmm4, 16                ;(d1 * 2217 - c1 * 5352 +  51000)>>16
 
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
+    packssdw    xmm3, xmm4                      ;op[12] op[4]
+    movdqa      xmm1, xmm0
+    paddw       xmm3, xmm2                      ;op[4] += (d1!=0)
+    punpcklqdq  xmm0, xmm3                      ;op[4] op[0]
+    punpckhqdq  xmm1, xmm3                      ;op[12] op[8]
 
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
+    movdqa      XMMWORD PTR[rdi + 0], xmm0
+    movdqa      XMMWORD PTR[rdi + 16], xmm1
 
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
-        psrad       xmm3,       _1STSTAGESHIFT
-        packssdw    xmm2,       xmm3
-
-        ;third column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+32]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+32]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _1STSTAGESHIFT
-
-        ;fourth column (this is the last column, so we do not have save the source any more)
-        pmaddwd     xmm0,       [rdx+48]
-        pmaddwd     xmm1,       [rdx+48]
-
-        movdqa      xmm4,       xmm0
-        punpckldq   xmm0,       xmm1
-
-        punpckhdq   xmm4,       xmm1
-        movdqa      xmm1,       xmm0
-
-        punpckldq   xmm0,       xmm4
-        punpckhdq   xmm1,       xmm4
-
-        paddd       xmm0,       xmm1
-        paddd       xmm0,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
-        psrad       xmm0,       _1STSTAGESHIFT
-        packssdw    xmm3,       xmm0
-        ; done with one pass
-        ; now start second pass
-        movdqa      xmm0,       xmm2
-        movdqa      xmm1,       xmm3
-
-        pmaddwd     xmm2,       xmm7
-        pmaddwd     xmm3,       xmm7
-
-        movdqa      xmm4,       xmm2
-        punpckldq   xmm2,       xmm3
-
-        punpckhdq   xmm4,       xmm3
-        movdqa      xmm3,       xmm2
-
-        punpckldq   xmm2,       xmm4
-        punpckhdq   xmm3,       xmm4
-
-        paddd       xmm2,       xmm3
-        paddd       xmm2,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm2,       _2NDSTAGESHIFT
-
-        ;second column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+16]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+16]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _2NDSTAGESHIFT
-        packssdw    xmm2,       xmm3
-
-        movdqu      [rcx],      xmm2
-        ;third column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+32]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+32]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _2NDSTAGESHIFT
-        ;fourth column
-        pmaddwd     xmm0,       [rdx+48]
-        pmaddwd     xmm1,       [rdx+48]
-
-        movdqa      xmm4,       xmm0
-        punpckldq   xmm0,       xmm1
-
-        punpckhdq   xmm4,       xmm1
-        movdqa      xmm1,       xmm0
-
-        punpckldq   xmm0,       xmm4
-        punpckhdq   xmm1,       xmm4
-
-        paddd       xmm0,       xmm1
-        paddd       xmm0,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm0,       _2NDSTAGESHIFT
-        packssdw    xmm3,       xmm0
-
-        movdqu     [rcx+16],   xmm3
-
-    mov rsp, rbp
     ; begin epilog
+    pop rdi
+    pop rsi
     RESTORE_GOT
+;;    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
 
-
 SECTION_RODATA
-;static unsigned int dct1st_stage_rounding_sse2[4] =
 align 16
-dct1st_stage_rounding_sse2:
-    times 4 dd 8192
-
-
-;static unsigned int dct2nd_stage_rounding_sse2[4] =
+_5352_2217:
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
 align 16
-dct2nd_stage_rounding_sse2:
-    times 4 dd 32768
-
-;static short dct_matrix_sse2[4][8]=
+_2217_neg5352:
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
 align 16
-dct_matrix_sse2:
-    times 8 dw 23170
+_mult_add:
+    times 8 dw 1
+align 16
+_cmp_mask:
+    times 4 dw 1
+    times 4 dw 0
 
-    dw  30274
-    dw  12540
-    dw -12540
-    dw -30274
-    dw  30274
-    dw  12540
-    dw -12540
-    dw -30274
-
-    dw  23170
-    times 2 dw -23170
-    times 2 dw  23170
-    times 2 dw -23170
-    dw  23170
-
-    dw  12540
-    dw -30274
-    dw  30274
-    dw -12540
-    dw  12540
-    dw -30274
-    dw  30274
-    dw -12540
+align 16
+_mult_sub:
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+align 16
+_7:
+    times 4 dd 7
+align 16
+_14500:
+    times 4 dd 14500
+align 16
+_7500:
+    times 4 dd 7500
+align 16
+_12000:
+    times 4 dd 12000
+align 16
+_51000:
+    times 4 dd 51000
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index bc80e64..05824c6 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -21,46 +22,41 @@
 #if HAVE_MMX
 extern prototype_fdct(vp8_short_fdct4x4_mmx);
 extern prototype_fdct(vp8_short_fdct8x4_mmx);
-extern prototype_fdct(vp8_fast_fdct4x4_mmx);
-extern prototype_fdct(vp8_fast_fdct8x4_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
+#if 0
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
 
 #undef  vp8_fdct_short8x4
 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_mmx
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_mmx
+#endif
 
 #endif
 #endif
 
 
 #if HAVE_SSE2
-extern prototype_fdct(vp8_short_fdct4x4_wmt);
 extern prototype_fdct(vp8_short_fdct8x4_wmt);
-extern prototype_fdct(vp8_fast_fdct8x4_wmt);
-
 extern prototype_fdct(vp8_short_walsh4x4_sse2);
 
-#if !CONFIG_RUNTIME_CPU_DETECT
+extern prototype_fdct(vp8_short_fdct4x4_sse2);
 
-#if 0
+#if !CONFIG_RUNTIME_CPU_DETECT
+#if 1
 /* short SSE2 DCT currently disabled, does not match the MMX version */
 #undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_wmt
+#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
 
 #undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_wmt
+#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
 #endif
 
+#undef  vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
+
 #undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_wmt
+#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
 
 #undef vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_sse2
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index 9397a6c..d090b2d 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 1940471..413d74d 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -1,16 +1,16 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
 %include "vpx_ports/x86_abi_support.asm"
 
-
 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_xmm)
 sym(vp8_block_error_xmm):
@@ -19,11 +19,9 @@
     SHADOW_ARGS_TO_STACK 2
     push rsi
     push rdi
-    ; end prolog
-
+    ; end prologue
 
         mov         rsi,        arg(0) ;coeff_ptr
-        pxor        xmm7,       xmm7
 
         mov         rdi,        arg(1) ;dcoef_ptr
         movdqa      xmm3,       [rsi]
@@ -32,31 +30,25 @@
         movdqa      xmm5,       [rsi+16]
 
         movdqa      xmm6,       [rdi+16]
-        pxor        xmm1,       xmm1    ; from movd xmm1, dc; dc=0
-
-        movdqa      xmm2,       xmm7
-        psubw       xmm5,       xmm6
-
-        por         xmm1,       xmm2
-        pmaddwd     xmm5,       xmm5
-
-        pcmpeqw     xmm1,       xmm7
         psubw       xmm3,       xmm4
 
-        pand        xmm1,       xmm3
-        pmaddwd     xmm1,       xmm1
+        psubw       xmm5,       xmm6
+        pmaddwd     xmm3,       xmm3
+        pmaddwd     xmm5,       xmm5
 
-        paddd       xmm1,       xmm5
-        movdqa      xmm0,       xmm1
+        paddd       xmm3,       xmm5
+
+        pxor        xmm7,       xmm7
+        movdqa      xmm0,       xmm3
 
         punpckldq   xmm0,       xmm7
-        punpckhdq   xmm1,       xmm7
+        punpckhdq   xmm3,       xmm7
 
-        paddd       xmm0,       xmm1
-        movdqa      xmm1,       xmm0
+        paddd       xmm0,       xmm3
+        movdqa      xmm3,       xmm0
 
         psrldq      xmm0,       8
-        paddd       xmm0,       xmm1
+        paddd       xmm0,       xmm3
 
         movd        rax,        xmm0
 
@@ -67,7 +59,6 @@
     pop         rbp
     ret
 
-
 ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_mmx)
 sym(vp8_block_error_mmx):
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 7d86201..38812c8 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index 5661491..e8d658b 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c
index 69617ca..a182c88 100644
--- a/vp8/encoder/x86/preproc_mmx.c
+++ b/vp8/encoder/x86/preproc_mmx.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 847fc6e..a867409 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
new file mode 100644
index 0000000..a1b1c40
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -0,0 +1,254 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+;               short *qcoeff_ptr,short *dequant_ptr,
+;               const int *default_zig_zag, short *round_ptr,
+;               short *quant_ptr, short *dqcoeff_ptr,
+;               unsigned short zbin_oq_value,
+;               short *zbin_boost_ptr);
+;
+global sym(vp8_regular_quantize_b_impl_sse2)
+sym(vp8_regular_quantize_b_impl_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 10
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+
+    %define abs_minus_zbin_lo 0
+    %define abs_minus_zbin_hi 16
+    %define temp_qcoeff_lo 32
+    %define temp_qcoeff_hi 48
+    %define save_xmm6 64
+    %define save_xmm7 80
+    %define eob 96
+
+    %define vp8_regularquantizeb_stack_size eob + 16
+
+    sub         rsp, vp8_regularquantizeb_stack_size
+
+    movdqa      OWORD PTR[rsp + save_xmm6], xmm6
+    movdqa      OWORD PTR[rsp + save_xmm7], xmm7
+
+    mov         rdx, arg(0)                 ;coeff_ptr
+    mov         eax, arg(8)                 ;zbin_oq_value
+
+    mov         rcx, arg(1)                 ;zbin_ptr
+    movd        xmm7, eax
+
+    movdqa      xmm0, OWORD PTR[rdx]
+    movdqa      xmm4, OWORD PTR[rdx + 16]
+
+    movdqa      xmm1, xmm0
+    movdqa      xmm5, xmm4
+
+    psraw       xmm0, 15                    ;sign of z (aka sz)
+    psraw       xmm4, 15                    ;sign of z (aka sz)
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+
+    movdqa      xmm2, OWORD PTR[rcx]        ;load zbin_ptr
+    movdqa      xmm3, OWORD PTR[rcx + 16]   ;load zbin_ptr
+
+    pshuflw     xmm7, xmm7, 0
+    psubw       xmm1, xmm0                  ;x = abs(z)
+
+    punpcklwd   xmm7, xmm7                  ;duplicated zbin_oq_value
+    psubw       xmm5, xmm4                  ;x = abs(z)
+
+    paddw       xmm2, xmm7
+    paddw       xmm3, xmm7
+
+    psubw       xmm1, xmm2                  ;sub (zbin_ptr + zbin_oq_value)
+    psubw       xmm5, xmm3                  ;sub (zbin_ptr + zbin_oq_value)
+
+    mov         rdi, arg(5)                 ;round_ptr
+    mov         rsi, arg(6)                 ;quant_ptr
+
+    movdqa      OWORD PTR[rsp + abs_minus_zbin_lo], xmm1
+    movdqa      OWORD PTR[rsp + abs_minus_zbin_hi], xmm5
+
+    paddw       xmm1, xmm2                  ;add (zbin_ptr + zbin_oq_value) back
+    paddw       xmm5, xmm3                  ;add (zbin_ptr + zbin_oq_value) back
+
+    movdqa      xmm2, OWORD PTR[rdi]
+    movdqa      xmm3, OWORD PTR[rsi]
+
+    movdqa      xmm6, OWORD PTR[rdi + 16]
+    movdqa      xmm7, OWORD PTR[rsi + 16]
+
+    paddw       xmm1, xmm2
+    paddw       xmm5, xmm6
+
+    pmulhw      xmm1, xmm3
+    pmulhw      xmm5, xmm7
+
+    mov         rsi, arg(2)                 ;qcoeff_ptr
+    pxor        xmm6, xmm6
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+
+    psubw       xmm1, xmm0
+    psubw       xmm5, xmm4
+
+    movdqa      OWORD PTR[rsp + temp_qcoeff_lo], xmm1
+    movdqa      OWORD PTR[rsp + temp_qcoeff_hi], xmm5
+
+    movdqa      OWORD PTR[rsi], xmm6        ;zero qcoeff
+    movdqa      OWORD PTR[rsi + 16], xmm6   ;zero qcoeff
+
+    xor         rax, rax
+    mov         rcx, -1
+
+    mov         [rsp + eob], rcx
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+
+    mov         rbx, arg(4)                 ;default_zig_zag
+
+rq_zigzag_loop:
+    movsxd      rcx, DWORD PTR[rbx + rax*4] ;now we have rc
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1a
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1a
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1a:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1b
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1b
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1b:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1c
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1c
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1c:
+    lea         rax, [rax + 1]
+
+    cmp         rax, 16
+    jl          rq_zigzag_loop
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+    mov         rcx, arg(3)                 ;dequant_ptr
+    mov         rsi, arg(7)                 ;dqcoeff_ptr
+
+    movdqa      xmm2, OWORD PTR[rdi]
+    movdqa      xmm3, OWORD PTR[rdi + 16]
+
+    movdqa      xmm0, OWORD PTR[rcx]
+    movdqa      xmm1, OWORD PTR[rcx + 16]
+
+    pmullw      xmm0, xmm2
+    pmullw      xmm1, xmm3
+
+    movdqa      OWORD PTR[rsi], xmm0        ;store dqcoeff
+    movdqa      OWORD PTR[rsi + 16], xmm1   ;store dqcoeff
+
+    mov         rax, [rsp + eob]
+
+    movdqa      xmm6, OWORD PTR[rsp + save_xmm6]
+    movdqa      xmm7, OWORD PTR[rsp + save_xmm7]
+
+    add         rax, 1
+
+    add         rsp, vp8_regularquantizeb_stack_size
+    pop         rsp
+
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
new file mode 100644
index 0000000..b5b22c0
--- /dev/null
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license and patent
+ *  grant that can be found in the LICENSE file in the root of the source
+ *  tree. All contributing project authors may be found in the AUTHORS
+ *  file in the root of the source tree.
+ */
+
+#ifndef QUANTIZE_X86_H
+#define QUANTIZE_X86_H
+
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+#if HAVE_MMX
+
+#endif
+
+
+#if HAVE_SSE2
+extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+/* The sse2 quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ *#undef vp8_quantize_quantb
+ *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
+ */
+
+#endif
+
+#endif
+
+
+#endif
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index a825698..ad9658b 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 53240bb..9f34a7a 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 38cc029..c2a1ae7 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 1bb9561..94bbfff 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index ce3e610..8fe3ee1 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -14,7 +15,7 @@
 ;                            unsigned short *diff, unsigned char *Predictor,
 ;                            int pitch);
 global sym(vp8_subtract_b_mmx_impl)
-sym(vp8_subtract_b_mmx_impl)
+sym(vp8_subtract_b_mmx_impl):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
@@ -150,7 +151,7 @@
 
 ;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
 global sym(vp8_subtract_mbuv_mmx)
-sym(vp8_subtract_mbuv_mmx)
+sym(vp8_subtract_mbuv_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index d0da82a..173238e 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 7e5ee28..f47d9cc 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 4a5b25b..2600ce9 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index ea80753..5e750ba 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 35fc90c..3c9f9c7 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -240,7 +241,7 @@
 #define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3
 
 #undef  vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4 vp8_sad16x16x4d_sse3
+#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3
 
 #undef  vp8_variance_sad16x8x4d
 #define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index f1391ba..18dc49c 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -17,15 +18,10 @@
 #if HAVE_MMX
 void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
 {
-    vp8_short_fdct4x4_mmx(input,   output,    pitch);
-    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
-void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch)
-{
-    vp8_fast_fdct4x4_mmx(input,   output   , pitch);
-    vp8_fast_fdct4x4_mmx(input + 4, output + 16, pitch);
-}
 
 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
@@ -86,10 +82,10 @@
 #endif
 
 #if HAVE_SSE2
-void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
 {
-    vp8_short_fdct4x4_wmt(input,   output,    pitch);
-    vp8_short_fdct4x4_wmt(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_sse2(input,   output,    pitch);
+    vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
 }
 
 int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
@@ -120,6 +116,40 @@
              );
 }
 
+int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+                               short *qcoeff_ptr,short *dequant_ptr,
+                               const int *default_zig_zag, short *round_ptr,
+                               short *quant_ptr, short *dqcoeff_ptr,
+                               unsigned short zbin_oq_value,
+                               short *zbin_boost_ptr);
+
+void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
+{
+    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+    short zbin_oq_value = b->zbin_extra;
+
+    d->eob = vp8_regular_quantize_b_impl_sse2(
+        coeff_ptr,
+        zbin_ptr,
+        qcoeff_ptr,
+        dequant_ptr,
+        vp8_default_zig_zag1d,
+
+        round_ptr,
+        quant_ptr,
+        dqcoeff_ptr,
+        zbin_oq_value,
+        zbin_boost_ptr
+        );
+}
+
 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
 int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
 {
@@ -186,11 +216,19 @@
         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx;
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx;
         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
-
+#if 0 // new fdct
         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
-        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_mmx;
-        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_mmx;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
+#else
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
+
+#endif
+
         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
@@ -200,7 +238,7 @@
         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
     }
 
 #endif
@@ -235,21 +273,20 @@
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2;
         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;
 
-#if 0
-        /* short SSE2 DCT currently disabled, does not match the MMX version */
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_wmt;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_wmt;
-#endif
-        /* cpi->rtcd.fdct.fast4x4  not implemented for wmt */;
-        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_wmt;
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2;
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
+
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c ;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
         /* cpi->rtcd.encodemb.sub* not implemented for wmt */
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
     }
 
 #endif
diff --git a/vp8/exports_dec b/vp8/exports_dec
new file mode 100644
index 0000000..f9b985c
--- /dev/null
+++ b/vp8/exports_dec
@@ -0,0 +1 @@
+data vpx_codec_vp8_dx_algo
diff --git a/vp8/exports_enc b/vp8/exports_enc
new file mode 100644
index 0000000..9967011
--- /dev/null
+++ b/vp8/exports_enc
@@ -0,0 +1 @@
+data vpx_codec_vp8_cx_algo
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index ec467c5..ecca18a 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -1,19 +1,17 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 #add this file to the installed sources list
 VP8_COMMON_SRCS-yes += vp8_common.mk
 
-#common interface
-VP8_COMMON_SRCS-yes += vp8.h
-
 CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 VP8_COMMON_SRCS-yes += common/type_aliases.h
 VP8_COMMON_SRCS-yes += common/pragmas.h
@@ -29,7 +27,6 @@
 
 CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 
-VP8_COMMON_SRCS-yes += common/segmentation_common.c
 VP8_COMMON_SRCS-yes += common/alloccommon.c
 VP8_COMMON_SRCS-yes += common/blockd.c
 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
@@ -66,7 +63,6 @@
 VP8_COMMON_SRCS-yes += common/reconinter.h
 VP8_COMMON_SRCS-yes += common/reconintra.h
 VP8_COMMON_SRCS-yes += common/reconintra4x4.h
-VP8_COMMON_SRCS-yes += common/segmentation_common.h
 VP8_COMMON_SRCS-yes += common/setupintrarecon.h
 VP8_COMMON_SRCS-yes += common/subpixel.h
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
@@ -100,17 +96,17 @@
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.h
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.c
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
 ifeq ($(CONFIG_POSTPROC),yes)
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
@@ -127,6 +123,7 @@
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
+VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
@@ -136,6 +133,7 @@
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x8_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem16x16_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/dc_only_idct_add_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/iwalsh_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/filter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/idct_v6$(ASM)
@@ -152,6 +150,7 @@
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x4_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x8_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem16x16_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dc_only_idct_add_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/iwalsh_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index e129ec9..8845368 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1,18 +1,19 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
-#include "vpx_codec/vpx_codec.h"
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
 #include "onyx_int.h"
-#include "vp8e.h"
+#include "vpx/vp8e.h"
 #include "onyx.h"
 #include <stdlib.h>
 #include <string.h>
@@ -52,15 +53,15 @@
             NULL,
 #if !(CONFIG_REALTIME_ONLY)
             VP8_BEST_QUALITY_ENCODING,  /* Encoding Mode */
-            -4,                         /* cpu_used      */
+            0,                          /* cpu_used      */
 #else
             VP8_REAL_TIME_ENCODING,     /* Encoding Mode */
-            -8,                         /* cpu_used      */
+            4,                          /* cpu_used      */
 #endif
             0,                          /* enable_auto_alt_ref */
             0,                          /* noise_sensitivity */
             0,                          /* Sharpness */
-            800,                        /* static_thresh */
+            0,                          /* static_thresh */
             VP8_ONE_TOKENPARTITION,     /* token_partitions */
             0, /* arnr_max_frames */
             0, /* arnr_strength */
@@ -81,7 +82,7 @@
     vpx_image_t             preview_img;
     unsigned int            next_frame_flag;
     vp8_postproc_cfg_t      preview_ppcfg;
-    vpx_codec_pkt_list_decl(26) pkt_list;              // changed to accomendate the maximum number of lagged frames allowed
+    vpx_codec_pkt_list_decl(64) pkt_list;              // changed to accomendate the maximum number of lagged frames allowed
     int                         deprecated_mode;
     unsigned int                fixed_kf_cntr;
 };
@@ -108,7 +109,7 @@
     } while(0)
 
 #define RANGE_CHECK(p,memb,lo,hi) do {\
-        if(!((p)->memb >= (lo) && (p)->memb <= hi)) \
+        if(!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
             ERROR(#memb " out of range ["#lo".."#hi"]");\
     } while(0)
 
@@ -177,7 +178,7 @@
 
     RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
     RANGE_CHECK(vp8_cfg, Sharpness,         0, 7);
-    RANGE_CHECK(vp8_cfg, arnr_max_frames,    0, 25);
+    RANGE_CHECK(vp8_cfg, arnr_max_frames,    0, 15);
     RANGE_CHECK(vp8_cfg, arnr_strength,     0, 6);
     RANGE_CHECK(vp8_cfg, arnr_type,         0, 0xffffffff);
 
@@ -211,10 +212,10 @@
 {
     switch (img->fmt)
     {
-    case IMG_FMT_YV12:
-    case IMG_FMT_I420:
-    case IMG_FMT_VPXI420:
-    case IMG_FMT_VPXYV12:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_VPXI420:
+    case VPX_IMG_FMT_VPXYV12:
         break;
     default:
         ERROR("Invalid image format. Only YV12 and I420 images are supported");
@@ -295,9 +296,9 @@
     oxcf->under_shoot_pct         = cfg.rc_undershoot_pct;
     //oxcf->over_shoot_pct        = cfg.rc_overshoot_pct;
 
-    oxcf->maximum_buffer_size     = cfg.rc_buf_sz / 1000;
-    oxcf->starting_buffer_level   = cfg.rc_buf_initial_sz / 1000;
-    oxcf->optimal_buffer_level    = cfg.rc_buf_optimal_sz / 1000;
+    oxcf->maximum_buffer_size     = cfg.rc_buf_sz;
+    oxcf->starting_buffer_level   = cfg.rc_buf_initial_sz;
+    oxcf->optimal_buffer_level    = cfg.rc_buf_optimal_sz;
 
     oxcf->two_pass_vbrbias        = cfg.rc_2pass_vbr_bias_pct;
     oxcf->two_pass_vbrmin_section  = cfg.rc_2pass_vbr_minsection_pct;
@@ -536,20 +537,20 @@
                                        YV12_BUFFER_CONFIG  *yv12)
 {
     vpx_codec_err_t        res = VPX_CODEC_OK;
-    yv12->y_buffer = img->planes[PLANE_Y];
-    yv12->u_buffer = img->planes[PLANE_U];
-    yv12->v_buffer = img->planes[PLANE_V];
+    yv12->y_buffer = img->planes[VPX_PLANE_Y];
+    yv12->u_buffer = img->planes[VPX_PLANE_U];
+    yv12->v_buffer = img->planes[VPX_PLANE_V];
 
     yv12->y_width  = img->d_w;
     yv12->y_height = img->d_h;
     yv12->uv_width = (1 + yv12->y_width) / 2;
     yv12->uv_height = (1 + yv12->y_height) / 2;
 
-    yv12->y_stride = img->stride[PLANE_Y];
-    yv12->uv_stride = img->stride[PLANE_U];
+    yv12->y_stride = img->stride[VPX_PLANE_Y];
+    yv12->uv_stride = img->stride[VPX_PLANE_U];
 
-    yv12->border  = (img->stride[PLANE_Y] - img->w) / 2;
-    yv12->clrtype = (img->fmt == IMG_FMT_VPXI420 || img->fmt == IMG_FMT_VPXYV12); //REG_YUV = 0
+    yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12); //REG_YUV = 0
     return res;
 }
 
@@ -825,7 +826,9 @@
         int ctr_id,
         va_list args)
 {
+#if CONFIG_POSTPROC
     vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+    (void)ctr_id;
 
     if (data)
     {
@@ -834,6 +837,12 @@
     }
     else
         return VPX_CODEC_INVALID_PARAM;
+#else
+    (void)ctx;
+    (void)ctr_id;
+    (void)args;
+    return VPX_CODEC_INCAPABLE;
+#endif
 }
 
 
@@ -846,7 +855,7 @@
     {
 
         /*
-        vpx_img_wrap(&ctx->preview_img, IMG_FMT_YV12,
+        vpx_img_wrap(&ctx->preview_img, VPX_IMG_FMT_YV12,
             sd.y_width + 2*VP8BORDERINPIXELS,
             sd.y_height + 2*VP8BORDERINPIXELS,
             1,
@@ -857,23 +866,23 @@
             */
 
         ctx->preview_img.bps = 12;
-        ctx->preview_img.planes[PLANE_Y] = sd.y_buffer;
-        ctx->preview_img.planes[PLANE_U] = sd.u_buffer;
-        ctx->preview_img.planes[PLANE_V] = sd.v_buffer;
+        ctx->preview_img.planes[VPX_PLANE_Y] = sd.y_buffer;
+        ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
+        ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;
 
         if (sd.clrtype == REG_YUV)
-            ctx->preview_img.fmt = IMG_FMT_I420;
+            ctx->preview_img.fmt = VPX_IMG_FMT_I420;
         else
-            ctx->preview_img.fmt = IMG_FMT_VPXI420;
+            ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
 
         ctx->preview_img.x_chroma_shift = 1;
         ctx->preview_img.y_chroma_shift = 1;
 
         ctx->preview_img.d_w = ctx->cfg.g_w;
         ctx->preview_img.d_h = ctx->cfg.g_h;
-        ctx->preview_img.stride[PLANE_Y] = sd.y_stride;
-        ctx->preview_img.stride[PLANE_U] = sd.uv_stride;
-        ctx->preview_img.stride[PLANE_V] = sd.uv_stride;
+        ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride;
+        ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride;
+        ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride;
         ctx->preview_img.w   = sd.y_width;
         ctx->preview_img.h   = sd.y_height;
 
@@ -1023,7 +1032,7 @@
 
         0,                  /* g_lag_in_frames */
 
-        70,                 /* rc_dropframe_thresh */
+        0,                  /* rc_dropframe_thresh */
         0,                  /* rc_resize_allowed */
         60,                 /* rc_resize_down_thresold */
         30,                 /* rc_resize_up_thresold */
@@ -1067,7 +1076,7 @@
 #endif
 vpx_codec_iface_t vpx_codec_vp8_cx_algo =
 {
-    "vpx Technologies VP8 Encoder" VERSION_STRING,
+    "WebM Project VP8 Encoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
     /* vpx_codec_caps_t          caps; */
@@ -1156,7 +1165,7 @@
 
 vpx_codec_iface_t vpx_enc_vp8_algo =
 {
-    "vpx Technologies VP8 Encoder (Deprecated API)" VERSION_STRING,
+    "WebM Project VP8 Encoder (Deprecated API)" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_ENCODER,
     /* vpx_codec_caps_t          caps; */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 3e6cdf4..e7e5356 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -1,18 +1,19 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #include <stdlib.h>
 #include <string.h>
-#include "vpx_codec/vpx_decoder.h"
-#include "vp8dx.h"
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vp8dx.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
 #include "onyxd.h"
 #include "onyxd_int.h"
@@ -195,9 +196,6 @@
     ctx->pbi->fb_storage_ptr[0] = mmap_lkup(ctx, VP6_SEG_IMG0_STRG);
     ctx->pbi->fb_storage_ptr[1] = mmap_lkup(ctx, VP6_SEG_IMG1_STRG);
     ctx->pbi->fb_storage_ptr[2] = mmap_lkup(ctx, VP6_SEG_IMG2_STRG);
-    #if CONFIG_NEW_TOKENS
-    ctx->pbi->token_graph = mmap_lkup(ctx, VP6_SEG_TOKEN_GRAPH);
-    #endif
     #if CONFIG_POSTPROC
     ctx->pbi->postproc.deblock.fragment_variances = mmap_lkup(ctx, VP6_SEG_DEBLOCKER);
     ctx->pbi->fb_storage_ptr[3] = mmap_lkup(ctx, VP6_SEG_PP_IMG_STRG);
@@ -225,11 +223,12 @@
         res = vp8_mmap_alloc(&mmap);
 
         if (!res)
+        {
             vp8_init_ctx(ctx, &mmap);
 
-        ctx->priv->alg_priv->defer_alloc = 1;
-        /*post processing level initialized to do nothing */
-
+            ctx->priv->alg_priv->defer_alloc = 1;
+            /*post processing level initialized to do nothing */
+        }
     }
 
     return res;
@@ -257,12 +256,12 @@
 
     vpx_codec_err_t res = VPX_CODEC_OK;
     {
-        /*Parse from VP8 compressed data, the implies knowledge of the
-         *VP8 bitsteam.
-         * First 3 byte header including version, frame type and an offset
-         * Next 3 bytes are image sizewith 12 bit each for width and height
+        /* Parse uncompresssed part of key frame header.
+         * 3 bytes:- including version, frame type and an offset
+         * 3 bytes:- sync code (0x9d, 0x01, 0x2a)
+         * 4 bytes:- including image width and height in the lowest 14 bits
+         *           of each 2-byte value.
          */
-
         si->is_kf = 0;
 
         if (data_sz >= 10 && !(data[0] & 0x01))  /* I-Frame */
@@ -434,7 +433,7 @@
             unsigned int a_w = (sd.y_width + 15) & ~15;
             unsigned int a_h = (sd.y_height + 15) & ~15;
 
-            vpx_img_wrap(&ctx->img, IMG_FMT_I420,
+            vpx_img_wrap(&ctx->img, VPX_IMG_FMT_I420,
                          a_w + 2 * VP8BORDERINPIXELS,
                          a_h + 2 * VP8BORDERINPIXELS,
                          1,
@@ -529,7 +528,7 @@
 
     done = 1;
 
-    if (ctx->priv->alg_priv)
+    if (!res && ctx->priv->alg_priv)
     {
         for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++)
         {
@@ -557,20 +556,20 @@
                                        YV12_BUFFER_CONFIG  *yv12)
 {
     vpx_codec_err_t        res = VPX_CODEC_OK;
-    yv12->y_buffer = img->planes[PLANE_Y];
-    yv12->u_buffer = img->planes[PLANE_U];
-    yv12->v_buffer = img->planes[PLANE_V];
+    yv12->y_buffer = img->planes[VPX_PLANE_Y];
+    yv12->u_buffer = img->planes[VPX_PLANE_U];
+    yv12->v_buffer = img->planes[VPX_PLANE_V];
 
     yv12->y_width  = img->d_w;
     yv12->y_height = img->d_h;
     yv12->uv_width = yv12->y_width / 2;
     yv12->uv_height = yv12->y_height / 2;
 
-    yv12->y_stride = img->stride[PLANE_Y];
-    yv12->uv_stride = img->stride[PLANE_U];
+    yv12->y_stride = img->stride[VPX_PLANE_Y];
+    yv12->uv_stride = img->stride[VPX_PLANE_U];
 
-    yv12->border  = (img->stride[PLANE_Y] - img->d_w) / 2;
-    yv12->clrtype = (img->fmt == IMG_FMT_VPXI420 || img->fmt == IMG_FMT_VPXYV12);
+    yv12->border  = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
+    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
 
     return res;
 }
@@ -656,7 +655,7 @@
 #endif
 vpx_codec_iface_t vpx_codec_vp8_dx_algo =
 {
-    "vpx Technologies VP8 Decoder" VERSION_STRING,
+    "WebM Project VP8 Decoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
     /* vpx_codec_caps_t          caps; */
@@ -679,7 +678,7 @@
  */
 vpx_codec_iface_t vpx_codec_vp8_algo =
 {
-    "vpx Technologies VP8 Decoder (Deprecated API)" VERSION_STRING,
+    "WebM Project VP8 Decoder (Deprecated API)" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
     /* vpx_codec_caps_t          caps; */
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index e7e7663..4ce18b6 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -1,14 +1,18 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_CX_EXPORTS += exports_enc
+
 VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
 VP8_CX_SRCS-no  += $(VP8_COMMON_SRCS-no)
 VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
@@ -18,7 +22,7 @@
   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx_arm.mk
 endif
 
-VP8_CX_SRCS-yes += vp8cx.h vp8e.h vp8_cx_iface.c
+VP8_CX_SRCS-yes += vp8_cx_iface.c
 
 # encoder
 #INCLUDES += algo/vpx_common/vpx_mem/include
@@ -70,10 +74,14 @@
 VP8_CX_SRCS-yes += encoder/ratectrl.c
 VP8_CX_SRCS-yes += encoder/rdopt.c
 VP8_CX_SRCS-yes += encoder/sad_c.c
-VP8_CX_SRCS-yes += encoder/ssim.c
+VP8_CX_SRCS-yes += encoder/segmentation.c
+VP8_CX_SRCS-yes += encoder/segmentation.h
+VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c
 VP8_CX_SRCS-yes += encoder/tokenize.c
 VP8_CX_SRCS-yes += encoder/treewriter.c
 VP8_CX_SRCS-yes += encoder/variance_c.c
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c
 
 ifeq ($(CONFIG_REALTIME_ONLY),yes)
 VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
@@ -83,22 +91,22 @@
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
+VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
 
 VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))
-
-INSTALL-LIBS-yes += include/vp8.h include/vp8e.h include/vp8cx.h
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index f0753d9..1424bd1 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -1,10 +1,11 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/vp8dx.h b/vp8/vp8dx.h
deleted file mode 100644
index 7310b3b..0000000
--- a/vp8/vp8dx.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include "vp8.h"
-
-/*!\defgroup vp8_decoder WebM VP8 Decoder
- * \ingroup vp8
- *
- * @{
- */
-/*!\file vp8dx.h
- * \brief Provides definitions for using the VP8 algorithm within the vpx Decoder
- *        interface.
- */
-#ifndef VP8DX_H
-#define VP8DX_H
-#include "vpx_codec_impl_top.h"
-
-/*!\brief Algorithm interface for VP8
- *
- * This interface provides the capability to decode raw VP8 streams, as would
- * be found in AVI files and other non-Flash uses.
- */
-extern vpx_codec_iface_t vpx_codec_vp8_dx_algo;
-
-/* Include controls common to both the encoder and decoder */
-#include "vp8.h"
-
-
-/*! @} - end defgroup vp8_decoder */
-
-
-#include "vpx_codec_impl_bottom.h"
-#endif
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index e6af543..9419617 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -1,14 +1,18 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_DX_EXPORTS += exports_dec
+
 VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
 VP8_DX_SRCS-no  += $(VP8_COMMON_SRCS-no)
 VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
@@ -18,7 +22,7 @@
   include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx_arm.mk
 endif
 
-VP8_DX_SRCS-yes += vp8dx.h vp8_dx_iface.c
+VP8_DX_SRCS-yes += vp8_dx_iface.c
 
 CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
 
@@ -26,7 +30,6 @@
 # common
 #define ARM
 #define DISABLE_THREAD
-#define INLINE=__forceinline
 
 #INCLUDES += algo/vpx_common/vpx_mem/include
 #INCLUDES += common
@@ -40,7 +43,6 @@
 # decoder
 #define ARM
 #define DISABLE_THREAD
-#define INLINE=__forceinline
 
 #INCLUDES += algo/vpx_common/vpx_mem/include
 #INCLUDES += common
@@ -52,25 +54,24 @@
 VP8_DX_SRCS-yes += decoder/dboolhuff.c
 VP8_DX_SRCS-yes += decoder/decodemv.c
 VP8_DX_SRCS-yes += decoder/decodframe.c
-VP8_DX_SRCS-yes += decoder/demode.c
 VP8_DX_SRCS-yes += decoder/dequantize.c
 VP8_DX_SRCS-yes += decoder/detokenize.c
 VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c
 VP8_DX_SRCS-yes += decoder/dboolhuff.h
 VP8_DX_SRCS-yes += decoder/decodemv.h
 VP8_DX_SRCS-yes += decoder/decoderthreading.h
-VP8_DX_SRCS-yes += decoder/demode.h
 VP8_DX_SRCS-yes += decoder/dequantize.h
 VP8_DX_SRCS-yes += decoder/detokenize.h
 VP8_DX_SRCS-yes += decoder/onyxd_int.h
 VP8_DX_SRCS-yes += decoder/treereader.h
 VP8_DX_SRCS-yes += decoder/onyxd_if.c
 VP8_DX_SRCS-yes += decoder/threading.c
+VP8_DX_SRCS-yes += decoder/idct_blk.c
 
 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
 
-INSTALL-LIBS-yes += include/vp8.h include/vp8dx.h
-
 VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
 VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
 VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
+VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
+VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 1b4a7ec..989232c 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -1,44 +1,31 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
 #VP8_DX_SRCS list is modified according to different platforms.
 
-#File list for arm
-# decoder
-#VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/decodframe_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-
-#VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/decodframe.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
+VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
+VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
+VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
-# decoder
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantdcidct_v6$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantidct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/idct_blk_v6.c
 
 #File list for neon
-# decoder
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantdcidct_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantidct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
-
-
-#for new token test
-ifeq ($(ARCH_ARM),yes)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm_sjl.c
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm_v6$(ASM)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/onyxd_if_sjl.c
-
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm.c
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS)  += decoder/onyxd_if.c
-endif
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_blk_neon.c
diff --git a/vpx/exports_com b/vpx/exports_com
new file mode 100644
index 0000000..2ab0509
--- /dev/null
+++ b/vpx/exports_com
@@ -0,0 +1,16 @@
+text vpx_codec_build_config
+text vpx_codec_control_
+text vpx_codec_destroy
+text vpx_codec_err_to_string
+text vpx_codec_error
+text vpx_codec_error_detail
+text vpx_codec_get_caps
+text vpx_codec_iface_name
+text vpx_codec_version
+text vpx_codec_version_extra_str
+text vpx_codec_version_str
+text vpx_img_alloc
+text vpx_img_flip
+text vpx_img_free
+text vpx_img_set_rect
+text vpx_img_wrap
diff --git a/vpx/exports_dec b/vpx/exports_dec
new file mode 100644
index 0000000..ed121f7
--- /dev/null
+++ b/vpx/exports_dec
@@ -0,0 +1,9 @@
+text vpx_codec_dec_init_ver
+text vpx_codec_decode
+text vpx_codec_get_frame
+text vpx_codec_get_mem_map
+text vpx_codec_get_stream_info
+text vpx_codec_peek_stream_info
+text vpx_codec_register_put_frame_cb
+text vpx_codec_register_put_slice_cb
+text vpx_codec_set_mem_map
diff --git a/vpx/exports_enc b/vpx/exports_enc
new file mode 100644
index 0000000..3d56749
--- /dev/null
+++ b/vpx/exports_enc
@@ -0,0 +1,8 @@
+text vpx_codec_enc_config_default
+text vpx_codec_enc_config_set
+text vpx_codec_enc_init_ver
+text vpx_codec_encode
+text vpx_codec_get_cx_data
+text vpx_codec_get_global_headers
+text vpx_codec_get_preview_frame
+text vpx_codec_set_cx_data_buf
diff --git a/vpx_codec/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
similarity index 96%
rename from vpx_codec/internal/vpx_codec_internal.h
rename to vpx/internal/vpx_codec_internal.h
index 0867552..ab4cad1 100644
--- a/vpx_codec/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -55,7 +56,7 @@
  * types, removing or reassigning enums, adding/removing/rearranging
  * fields to structures
  */
-#define VPX_CODEC_INTERNAL_ABI_VERSION (2) /**<\hideinitializer*/
+#define VPX_CODEC_INTERNAL_ABI_VERSION (3) /**<\hideinitializer*/
 
 typedef struct vpx_codec_alg_priv  vpx_codec_alg_priv_t;
 
@@ -137,7 +138,7 @@
  * provide type safety for the exchanged data or assign meanings to the
  * control codes. Those details should be specified in the algorithm's
  * header file. In particular, the ctrl_id parameter is guaranteed to exist
- * in the algorithm's control mapping table, and the data paramter may be NULL.
+ * in the algorithm's control mapping table, and the data parameter may be NULL.
  *
  *
  * \param[in]     ctx              Pointer to this instance's context
@@ -314,7 +315,7 @@
 };
 
 /*!\brief Callback function pointer / user data pair storage */
-typedef struct
+typedef struct vpx_codec_priv_cb_pair
 {
     union
     {
@@ -339,7 +340,6 @@
     vpx_codec_iface_t              *iface;
     struct vpx_codec_alg_priv      *alg_priv;
     const char                     *err_detail;
-    unsigned int                    eval_counter;
     vpx_codec_flags_t               init_flags;
     struct
     {
diff --git a/vpx_codec/src/vpx_codec.c b/vpx/src/vpx_codec.c
similarity index 62%
rename from vpx_codec/src/vpx_codec.c
rename to vpx/src/vpx_codec.c
index 6366416..9c1558c 100644
--- a/vpx_codec/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -12,9 +13,9 @@
  * \brief Provides the high level interface to wrap decoder algorithms.
  *
  */
-#include <stdlib.h>
-#include <string.h>
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include <stdarg.h>
+#include "vpx/vpx_integer.h"
+#include "vpx/internal/vpx_codec_internal.h"
 #include "vpx_version.h"
 
 #define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var)
@@ -86,53 +87,6 @@
 }
 
 
-vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t      *ctx,
-                                       vpx_codec_iface_t    *iface,
-                                       vpx_codec_dec_cfg_t  *cfg,
-                                       vpx_codec_flags_t     flags,
-                                       int                   ver)
-{
-    vpx_codec_err_t res;
-
-    if (ver != VPX_DECODER_ABI_VERSION)
-        res = VPX_CODEC_ABI_MISMATCH;
-    else if (!ctx || !iface)
-        res = VPX_CODEC_INVALID_PARAM;
-    else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
-        res = VPX_CODEC_ABI_MISMATCH;
-    else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
-        res = VPX_CODEC_INCAPABLE;
-    else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
-        res = VPX_CODEC_INCAPABLE;
-    else
-    {
-        memset(ctx, 0, sizeof(*ctx));
-        ctx->iface = iface;
-        ctx->name = iface->name;
-        ctx->priv = NULL;
-        ctx->init_flags = flags;
-        ctx->config.dec = cfg;
-        res = VPX_CODEC_OK;
-
-        if (!(flags & VPX_CODEC_USE_XMA))
-        {
-            res = ctx->iface->init(ctx);
-
-            if (res)
-            {
-                ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
-                vpx_codec_destroy(ctx);
-            }
-
-            if (ctx->priv)
-                ctx->priv->iface = ctx->iface;
-        }
-    }
-
-    return SAVE_STATUS(ctx, res);
-}
-
-
 vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
 {
     vpx_codec_err_t res;
diff --git a/vpx_codec/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
similarity index 70%
rename from vpx_codec/src/vpx_decoder.c
rename to vpx/src/vpx_decoder.c
index 7e8575f..b52470b 100644
--- a/vpx_codec/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -12,11 +13,58 @@
  * \brief Provides the high level interface to wrap decoder algorithms.
  *
  */
-#include <stdlib.h>
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include <string.h>
+#include "vpx/internal/vpx_codec_internal.h"
 
 #define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var)
 
+vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t      *ctx,
+                                       vpx_codec_iface_t    *iface,
+                                       vpx_codec_dec_cfg_t  *cfg,
+                                       vpx_codec_flags_t     flags,
+                                       int                   ver)
+{
+    vpx_codec_err_t res;
+
+    if (ver != VPX_DECODER_ABI_VERSION)
+        res = VPX_CODEC_ABI_MISMATCH;
+    else if (!ctx || !iface)
+        res = VPX_CODEC_INVALID_PARAM;
+    else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION)
+        res = VPX_CODEC_ABI_MISMATCH;
+    else if ((flags & VPX_CODEC_USE_XMA) && !(iface->caps & VPX_CODEC_CAP_XMA))
+        res = VPX_CODEC_INCAPABLE;
+    else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC))
+        res = VPX_CODEC_INCAPABLE;
+    else
+    {
+        memset(ctx, 0, sizeof(*ctx));
+        ctx->iface = iface;
+        ctx->name = iface->name;
+        ctx->priv = NULL;
+        ctx->init_flags = flags;
+        ctx->config.dec = cfg;
+        res = VPX_CODEC_OK;
+
+        if (!(flags & VPX_CODEC_USE_XMA))
+        {
+            res = ctx->iface->init(ctx);
+
+            if (res)
+            {
+                ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL;
+                vpx_codec_destroy(ctx);
+            }
+
+            if (ctx->priv)
+                ctx->priv->iface = ctx->iface;
+        }
+    }
+
+    return SAVE_STATUS(ctx, res);
+}
+
+
 vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t       *iface,
         const uint8_t         *data,
         unsigned int           data_sz,
@@ -74,22 +122,10 @@
         res = VPX_CODEC_INVALID_PARAM;
     else if (!ctx->iface || !ctx->priv)
         res = VPX_CODEC_ERROR;
-
-#if CONFIG_EVAL_LIMIT
-    else if (ctx->priv->eval_counter >= 500)
-    {
-        ctx->priv->err_detail = "Evaluation limit exceeded.";
-        res = VPX_CODEC_ERROR;
-    }
-
-#endif
     else
     {
         res = ctx->iface->dec.decode(ctx->priv->alg_priv, data, data_sz,
                                      user_priv, deadline);
-#if CONFIG_EVAL_LIMIT
-        ctx->priv->eval_counter++;
-#endif
     }
 
     return SAVE_STATUS(ctx, res);
diff --git a/vpx_codec/src/vpx_decoder_compat.c b/vpx/src/vpx_decoder_compat.c
similarity index 91%
rename from vpx_codec/src/vpx_decoder_compat.c
rename to vpx/src/vpx_decoder_compat.c
index d5b04ae..e264734 100644
--- a/vpx_codec/src/vpx_decoder_compat.c
+++ b/vpx/src/vpx_decoder_compat.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -14,8 +15,8 @@
  */
 #include <stdlib.h>
 #include <string.h>
-#include "vpx_codec/vpx_decoder.h"
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/internal/vpx_codec_internal.h"
 
 #define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var)
 
diff --git a/vpx_codec/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
similarity index 92%
rename from vpx_codec/src/vpx_encoder.c
rename to vpx/src/vpx_encoder.c
index 98ad8ba..ddbd654 100644
--- a/vpx_codec/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -14,7 +15,7 @@
  */
 #include <limits.h>
 #include <string.h>
-#include "vpx_codec/internal/vpx_codec_internal.h"
+#include "vpx/internal/vpx_codec_internal.h"
 
 #define SAVE_STATUS(ctx,var) (ctx?(ctx->err = var):var)
 
@@ -126,15 +127,6 @@
         res = VPX_CODEC_ERROR;
     else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
         res = VPX_CODEC_INCAPABLE;
-
-#if CONFIG_EVAL_LIMIT
-    else if (ctx->priv->eval_counter >= 500)
-    {
-        ctx->priv->err_detail = "Evaluation limit exceeded.";
-        res = VPX_CODEC_ERROR;
-    }
-
-#endif
     else
     {
         /* Execute in a normalized floating point environment, if the platform
@@ -144,10 +136,6 @@
         res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
                                      duration, flags, deadline);
         FLOATING_POINT_RESTORE();
-
-#if CONFIG_EVAL_LIMIT
-        ctx->priv->eval_counter++;
-#endif
     }
 
     return SAVE_STATUS(ctx, res);
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
new file mode 100644
index 0000000..7a4e270
--- /dev/null
+++ b/vpx/src/vpx_image.c
@@ -0,0 +1,262 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include "vpx/vpx_image.h"
+
+static vpx_image_t *img_alloc_helper(vpx_image_t  *img,
+                                     vpx_img_fmt_t fmt,
+                                     unsigned int  d_w,
+                                     unsigned int  d_h,
+                                     unsigned int  stride_align,
+                                     unsigned char      *img_data)
+{
+
+    unsigned int  h, w, s, xcs, ycs, bps;
+    int           align;
+
+    /* Treat align==0 like align==1 */
+    if (!stride_align)
+        stride_align = 1;
+
+    /* Validate alignment (must be power of 2) */
+    if (stride_align & (stride_align - 1))
+        goto fail;
+
+    /* Get sample size for this format */
+    switch (fmt)
+    {
+    case VPX_IMG_FMT_RGB32:
+    case VPX_IMG_FMT_RGB32_LE:
+    case VPX_IMG_FMT_ARGB:
+    case VPX_IMG_FMT_ARGB_LE:
+        bps = 32;
+        break;
+    case VPX_IMG_FMT_RGB24:
+    case VPX_IMG_FMT_BGR24:
+        bps = 24;
+        break;
+    case VPX_IMG_FMT_RGB565:
+    case VPX_IMG_FMT_RGB565_LE:
+    case VPX_IMG_FMT_RGB555:
+    case VPX_IMG_FMT_RGB555_LE:
+    case VPX_IMG_FMT_UYVY:
+    case VPX_IMG_FMT_YUY2:
+    case VPX_IMG_FMT_YVYU:
+        bps = 16;
+        break;
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_VPXI420:
+    case VPX_IMG_FMT_VPXYV12:
+        bps = 12;
+        break;
+    default:
+        bps = 16;
+        break;
+    }
+
+    /* Get chroma shift values for this format */
+    switch (fmt)
+    {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_VPXI420:
+    case VPX_IMG_FMT_VPXYV12:
+        xcs = 1;
+        break;
+    default:
+        xcs = 0;
+        break;
+    }
+
+    switch (fmt)
+    {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_VPXI420:
+    case VPX_IMG_FMT_VPXYV12:
+        ycs = 1;
+        break;
+    default:
+        ycs = 0;
+        break;
+    }
+
+    /* Calculate storage sizes given the chroma subsampling */
+    align = (1 << xcs) - 1;
+    w = (d_w + align) & ~align;
+    align = (1 << ycs) - 1;
+    h = (d_h + align) & ~align;
+    s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8;
+    s = (s + stride_align - 1) & ~(stride_align - 1);
+
+    /* Allocate the new image */
+    if (!img)
+    {
+        img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t));
+
+        if (!img)
+            goto fail;
+
+        img->self_allocd = 1;
+    }
+    else
+    {
+        memset(img, 0, sizeof(vpx_image_t));
+    }
+
+    img->img_data = img_data;
+
+    if (!img_data)
+    {
+        img->img_data = malloc((fmt & VPX_IMG_FMT_PLANAR) ? h * w * bps / 8 : h * s);
+        img->img_data_owner = 1;
+    }
+
+    if (!img->img_data)
+        goto fail;
+
+    img->fmt = fmt;
+    img->w = w;
+    img->h = h;
+    img->x_chroma_shift = xcs;
+    img->y_chroma_shift = ycs;
+    img->bps = bps;
+
+    /* Calculate strides */
+    img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = s;
+    img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = s >> xcs;
+
+    /* Default viewport to entire image */
+    if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
+        return img;
+
+fail:
+    vpx_img_free(img);
+    return NULL;
+}
+
+vpx_image_t *vpx_img_alloc(vpx_image_t  *img,
+                           vpx_img_fmt_t fmt,
+                           unsigned int  d_w,
+                           unsigned int  d_h,
+                           unsigned int  stride_align)
+{
+    return img_alloc_helper(img, fmt, d_w, d_h, stride_align, NULL);
+}
+
+vpx_image_t *vpx_img_wrap(vpx_image_t  *img,
+                          vpx_img_fmt_t fmt,
+                          unsigned int  d_w,
+                          unsigned int  d_h,
+                          unsigned int  stride_align,
+                          unsigned char       *img_data)
+{
+    return img_alloc_helper(img, fmt, d_w, d_h, stride_align, img_data);
+}
+
+int vpx_img_set_rect(vpx_image_t  *img,
+                     unsigned int  x,
+                     unsigned int  y,
+                     unsigned int  w,
+                     unsigned int  h)
+{
+    unsigned char      *data;
+
+    if (x + w <= img->w && y + h <= img->h)
+    {
+        img->d_w = w;
+        img->d_h = h;
+
+        /* Calculate plane pointers */
+        if (!(img->fmt & VPX_IMG_FMT_PLANAR))
+        {
+            img->planes[VPX_PLANE_PACKED] =
+                img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED];
+        }
+        else
+        {
+            data = img->img_data;
+
+            if (img->fmt & VPX_IMG_FMT_HAS_ALPHA)
+            {
+                img->planes[VPX_PLANE_ALPHA] =
+                    data + x + y * img->stride[VPX_PLANE_ALPHA];
+                data += img->h * img->stride[VPX_PLANE_ALPHA];
+            }
+
+            img->planes[VPX_PLANE_Y] = data + x + y * img->stride[VPX_PLANE_Y];
+            data += img->h * img->stride[VPX_PLANE_Y];
+
+            if (!(img->fmt & VPX_IMG_FMT_UV_FLIP))
+            {
+                img->planes[VPX_PLANE_U] = data
+                                       + (x >> img->x_chroma_shift)
+                                       + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+                data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+                img->planes[VPX_PLANE_V] = data
+                                       + (x >> img->x_chroma_shift)
+                                       + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+            }
+            else
+            {
+                img->planes[VPX_PLANE_V] = data
+                                       + (x >> img->x_chroma_shift)
+                                       + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+                data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V];
+                img->planes[VPX_PLANE_U] = data
+                                       + (x >> img->x_chroma_shift)
+                                       + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U];
+            }
+        }
+
+        return 0;
+    }
+
+    return -1;
+}
+
+void vpx_img_flip(vpx_image_t *img)
+{
+    /* Note: In the calculation pointer adjustment calculation, we want the
+     * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
+     * standard indicates that if the adjustment parameter is unsigned, the
+     * stride parameter will be promoted to unsigned, causing errors when
+     * the lhs is a larger type than the rhs.
+     */
+    img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y];
+    img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y];
+
+    img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
+                            * img->stride[VPX_PLANE_U];
+    img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U];
+
+    img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
+                            * img->stride[VPX_PLANE_V];
+    img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V];
+
+    img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA];
+    img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA];
+}
+
+void vpx_img_free(vpx_image_t *img)
+{
+    if (img)
+    {
+        if (img->img_data && img->img_data_owner)
+            free(img->img_data);
+
+        if (img->self_allocd)
+            free(img);
+    }
+}
diff --git a/vp8/vp8.h b/vpx/vp8.h
similarity index 86%
rename from vp8/vp8.h
rename to vpx/vp8.h
index 87ca217..c7553ec 100644
--- a/vp8/vp8.h
+++ b/vpx/vp8.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -29,7 +30,7 @@
  */
 #ifndef VP8_H
 #define VP8_H
-#include "vpx_codec_impl_top.h"
+#include "vpx/vpx_codec_impl_top.h"
 
 /*!\brief Control functions
  *
@@ -112,5 +113,5 @@
 DECLSPEC_DEPRECATED extern vpx_codec_iface_t vpx_codec_vp8_algo DEPRECATED;
 #endif
 
-#include "vpx_codec_impl_bottom.h"
+#include "vpx/vpx_codec_impl_bottom.h"
 #endif
diff --git a/vp8/vp8cx.h b/vpx/vp8cx.h
similarity index 94%
rename from vp8/vp8cx.h
rename to vpx/vp8cx.h
index dd48c07..e1c8211 100644
--- a/vp8/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -21,7 +22,7 @@
  */
 #ifndef VP8CX_H
 #define VP8CX_H
-#include "vpx_codec_impl_top.h"
+#include "vpx/vpx_codec_impl_top.h"
 
 /*!\brief Algorithm interface for VP8
  *
@@ -257,5 +258,5 @@
 VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64,  int *)
 
 /*! @} - end defgroup vp8_encoder */
-#include "vpx_codec_impl_bottom.h"
+#include "vpx/vpx_codec_impl_bottom.h"
 #endif
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
new file mode 100644
index 0000000..4cad838
--- /dev/null
+++ b/vpx/vp8dx.h
@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vp8.h"
+
+/*!\defgroup vp8_decoder WebM VP8 Decoder
+ * \ingroup vp8
+ *
+ * @{
+ */
+/*!\file vp8dx.h
+ * \brief Provides definitions for using the VP8 algorithm within the vpx Decoder
+ *        interface.
+ */
+#ifndef VP8DX_H
+#define VP8DX_H
+#include "vpx/vpx_codec_impl_top.h"
+
+/*!\brief Algorithm interface for VP8
+ *
+ * This interface provides the capability to decode raw VP8 streams, as would
+ * be found in AVI files and other non-Flash uses.
+ */
+extern vpx_codec_iface_t vpx_codec_vp8_dx_algo;
+
+/* Include controls common to both the encoder and decoder */
+#include "vp8.h"
+
+
+/*! @} - end defgroup vp8_decoder */
+
+
+#include "vpx/vpx_codec_impl_bottom.h"
+#endif
diff --git a/vp8/vp8e.h b/vpx/vp8e.h
similarity index 76%
rename from vp8/vp8e.h
rename to vpx/vp8e.h
index a90aa2a..abfce33 100644
--- a/vp8/vp8e.h
+++ b/vpx/vp8e.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -13,7 +14,7 @@
  */
 #ifndef VP8E_H
 #define VP8E_H
-#include "vpx_codec_impl_top.h"
+#include "vpx/vpx_codec_impl_top.h"
 
 #if defined(VPX_CODEC_DISABLE_COMPAT) && VPX_CODEC_DISABLE_COMPAT
 #error "Backwards compatibility disabled: don't include vp8e.h"
@@ -58,5 +59,5 @@
  * #VPX_DL_BEST_QUALITY constants to that parameter instead.
  */
 VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ENCODING_MODE, vp8e_encoding_mode)
-#include "vpx_codec_impl_bottom.h"
+#include "vpx/vpx_codec_impl_bottom.h"
 #endif
diff --git a/vpx_codec/vpx_codec.h b/vpx/vpx_codec.h
similarity index 97%
rename from vpx_codec/vpx_codec.h
rename to vpx/vpx_codec.h
index e2a79f9..371df00 100644
--- a/vpx_codec/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -41,14 +42,7 @@
 
 #ifndef VPX_CODEC_H
 #define VPX_CODEC_H
-#ifdef HAVE_CONFIG_H
-#  include "vpx_config.h"
-#endif
-#if defined(HAVE_VPX_PORTS) && HAVE_VPX_PORTS
-#  include "vpx_ports/vpx_integer.h"
-#else
-#  include "vpx_integer.h"
-#endif
+#include "vpx_integer.h"
 #include "vpx_image.h"
 
     /*!\brief Decorator indicating a function is deprecated */
@@ -196,7 +190,7 @@
      * may reference the 'name' member to get a printable description of the
      * algorithm.
      */
-    typedef struct
+    typedef struct vpx_codec_ctx
     {
         const char              *name;        /**< Printable interface name */
         vpx_codec_iface_t       *iface;       /**< Interface pointers */
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
new file mode 100644
index 0000000..4f1d74b
--- /dev/null
+++ b/vpx/vpx_codec.mk
@@ -0,0 +1,27 @@
+##
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+
+
+API_EXPORTS += exports
+
+API_SRCS-$(CONFIG_DECODERS) += src/vpx_decoder.c
+API_SRCS-$(CONFIG_DECODERS) += src/vpx_decoder_compat.c
+API_SRCS-$(CONFIG_DECODERS) += vpx_decoder.h
+API_SRCS-$(CONFIG_DECODERS) += vpx_decoder_compat.h
+API_SRCS-$(CONFIG_ENCODERS) += src/vpx_encoder.c
+API_SRCS-$(CONFIG_ENCODERS) += vpx_encoder.h
+API_SRCS-yes                += internal/vpx_codec_internal.h
+API_SRCS-yes                += src/vpx_codec.c
+API_SRCS-yes                += src/vpx_image.c
+API_SRCS-yes                += vpx_codec.h
+API_SRCS-yes                += vpx_codec.mk
+API_SRCS-yes                += vpx_codec_impl_bottom.h
+API_SRCS-yes                += vpx_codec_impl_top.h
+API_SRCS-yes                += vpx_image.h
diff --git a/vpx/vpx_codec_impl_bottom.h b/vpx/vpx_codec_impl_bottom.h
new file mode 100644
index 0000000..6eb79a8
--- /dev/null
+++ b/vpx/vpx_codec_impl_bottom.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file is to be included at the bottom of the header files defining the
+ * interface to individual codecs and contains matching blocks to those defined
+ * in vpx_codec_impl_top.h
+ */
+#ifdef __cplusplus
+}
+#endif
diff --git a/vpx/vpx_codec_impl_top.h b/vpx/vpx_codec_impl_top.h
new file mode 100644
index 0000000..c9b8cfa
--- /dev/null
+++ b/vpx/vpx_codec_impl_top.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file is to be included at the top of the header files defining the
+ * interface to individual codecs and contains various workarounds common
+ * to all codec implementations.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
diff --git a/vpx_codec/vpx_decoder.h b/vpx/vpx_decoder.h
similarity index 96%
rename from vpx_codec/vpx_decoder.h
rename to vpx/vpx_decoder.h
index 5e4968d..6ffc2d4 100644
--- a/vpx_codec/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -68,7 +69,7 @@
      * stream. Algorithms may extend this structure with data specific
      * to their bitstream by setting the sz member appropriately.
      */
-    typedef struct
+    typedef struct vpx_codec_stream_info
     {
         unsigned int sz;     /**< Size of this structure */
         unsigned int w;      /**< Width (or 0 for unknown/default) */
diff --git a/vpx_codec/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
similarity index 97%
rename from vpx_codec/vpx_decoder_compat.h
rename to vpx/vpx_decoder_compat.h
index 25bb5eb..9e1e492 100644
--- a/vpx_codec/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_codec/vpx_encoder.h b/vpx/vpx_encoder.h
similarity index 98%
rename from vpx_codec/vpx_encoder.h
rename to vpx/vpx_encoder.h
index 67393be..f894cd8 100644
--- a/vpx_codec/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
new file mode 100644
index 0000000..4506dd3
--- /dev/null
+++ b/vpx/vpx_image.h
@@ -0,0 +1,242 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*!\file vpx_image.h
+ * \brief Describes the vpx image descriptor and associated operations
+ *
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef VPX_IMAGE_H
+#define VPX_IMAGE_H
+
+    /*!\brief Current ABI version number
+     *
+     * \internal
+     * If this file is altered in any way that changes the ABI, this value
+     * must be bumped.  Examples include, but are not limited to, changing
+     * types, removing or reassigning enums, adding/removing/rearranging
+     * fields to structures
+     */
+#define VPX_IMAGE_ABI_VERSION (1) /**<\hideinitializer*/
+
+
+#define VPX_IMG_FMT_PLANAR     0x100  /**< Image is a planar format */
+#define VPX_IMG_FMT_UV_FLIP    0x200  /**< V plane precedes U plane in memory */
+#define VPX_IMG_FMT_HAS_ALPHA  0x400  /**< Image has an alpha channel componnent */
+
+
+    /*!\brief List of supported image formats */
+    typedef enum vpx_img_fmt {
+        VPX_IMG_FMT_NONE,
+        VPX_IMG_FMT_RGB24,   /**< 24 bit per pixel packed RGB */
+        VPX_IMG_FMT_RGB32,   /**< 32 bit per pixel packed 0RGB */
+        VPX_IMG_FMT_RGB565,  /**< 16 bit per pixel, 565 */
+        VPX_IMG_FMT_RGB555,  /**< 16 bit per pixel, 555 */
+        VPX_IMG_FMT_UYVY,    /**< UYVY packed YUV */
+        VPX_IMG_FMT_YUY2,    /**< YUYV packed YUV */
+        VPX_IMG_FMT_YVYU,    /**< YVYU packed YUV */
+        VPX_IMG_FMT_BGR24,   /**< 24 bit per pixel packed BGR */
+        VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
+        VPX_IMG_FMT_ARGB,     /**< 32 bit packed ARGB, alpha=255 */
+        VPX_IMG_FMT_ARGB_LE,  /**< 32 bit packed BGRA, alpha=255 */
+        VPX_IMG_FMT_RGB565_LE,  /**< 16 bit per pixel, gggbbbbb rrrrrggg */
+        VPX_IMG_FMT_RGB555_LE,  /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
+        VPX_IMG_FMT_YV12    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
+        VPX_IMG_FMT_I420    = VPX_IMG_FMT_PLANAR | 2,
+        VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
+        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,  /** < planar 4:2:0 format with vpx color space */
+    }
+    vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
+
+#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
+#define IMG_FMT_PLANAR         VPX_IMG_FMT_PLANAR     /**< \deprecated Use #VPX_IMG_FMT_PLANAR */
+#define IMG_FMT_UV_FLIP        VPX_IMG_FMT_UV_FLIP    /**< \deprecated Use #VPX_IMG_FMT_UV_FLIP */
+#define IMG_FMT_HAS_ALPHA      VPX_IMG_FMT_HAS_ALPHA  /**< \deprecated Use #VPX_IMG_FMT_HAS_ALPHA */
+
+    /*!\brief Deprecated list of supported image formats
+     * \deprecated New code should use #vpx_img_fmt
+     */
+#define img_fmt   vpx_img_fmt
+    /*!\brief alias for enum img_fmt.
+     * \deprecated New code should use #vpx_img_fmt_t
+     */
+#define img_fmt_t vpx_img_fmt_t
+
+#define IMG_FMT_NONE       VPX_IMG_FMT_NONE       /**< \deprecated Use #VPX_IMG_FMT_NONE */
+#define IMG_FMT_RGB24      VPX_IMG_FMT_RGB24      /**< \deprecated Use #VPX_IMG_FMT_RGB24 */
+#define IMG_FMT_RGB32      VPX_IMG_FMT_RGB32      /**< \deprecated Use #VPX_IMG_FMT_RGB32 */
+#define IMG_FMT_RGB565     VPX_IMG_FMT_RGB565     /**< \deprecated Use #VPX_IMG_FMT_RGB565 */
+#define IMG_FMT_RGB555     VPX_IMG_FMT_RGB555     /**< \deprecated Use #VPX_IMG_FMT_RGB555 */
+#define IMG_FMT_UYVY       VPX_IMG_FMT_UYVY       /**< \deprecated Use #VPX_IMG_FMT_UYVY */
+#define IMG_FMT_YUY2       VPX_IMG_FMT_YUY2       /**< \deprecated Use #VPX_IMG_FMT_YUY2 */
+#define IMG_FMT_YVYU       VPX_IMG_FMT_YVYU       /**< \deprecated Use #VPX_IMG_FMT_YVYU */
+#define IMG_FMT_BGR24      VPX_IMG_FMT_BGR24      /**< \deprecated Use #VPX_IMG_FMT_BGR24 */
+#define IMG_FMT_RGB32_LE   VPX_IMG_FMT_RGB32_LE   /**< \deprecated Use #VPX_IMG_FMT_RGB32_LE */
+#define IMG_FMT_ARGB       VPX_IMG_FMT_ARGB       /**< \deprecated Use #VPX_IMG_FMT_ARGB */
+#define IMG_FMT_ARGB_LE    VPX_IMG_FMT_ARGB_LE    /**< \deprecated Use #VPX_IMG_FMT_ARGB_LE */
+#define IMG_FMT_RGB565_LE  VPX_IMG_FMT_RGB565_LE  /**< \deprecated Use #VPX_IMG_FMT_RGB565_LE */
+#define IMG_FMT_RGB555_LE  VPX_IMG_FMT_RGB555_LE  /**< \deprecated Use #VPX_IMG_FMT_RGB555_LE */
+#define IMG_FMT_YV12       VPX_IMG_FMT_YV12       /**< \deprecated Use #VPX_IMG_FMT_YV12 */
+#define IMG_FMT_I420       VPX_IMG_FMT_I420       /**< \deprecated Use #VPX_IMG_FMT_I420 */
+#define IMG_FMT_VPXYV12    VPX_IMG_FMT_VPXYV12    /**< \deprecated Use #VPX_IMG_FMT_VPXYV12 */
+#define IMG_FMT_VPXI420    VPX_IMG_FMT_VPXI420    /**< \deprecated Use #VPX_IMG_FMT_VPXI420 */
+#endif /* VPX_CODEC_DISABLE_COMPAT */
+
+    /**\brief Image Descriptor */
+    typedef struct vpx_image
+    {
+        vpx_img_fmt_t fmt; /**< Image Format */
+
+        /* Image storage dimensions */
+        unsigned int  w;   /**< Stored image width */
+        unsigned int  h;   /**< Stored image height */
+
+        /* Image display dimensions */
+        unsigned int  d_w;   /**< Displayed image width */
+        unsigned int  d_h;   /**< Displayed image height */
+
+        /* Chroma subsampling info */
+        unsigned int  x_chroma_shift;   /**< subsampling order, X */
+        unsigned int  y_chroma_shift;   /**< subsampling order, Y */
+
+        /* Image data pointers. */
+#define VPX_PLANE_PACKED 0   /**< To be used for all packed formats */
+#define VPX_PLANE_Y      0   /**< Y (Luminance) plane */
+#define VPX_PLANE_U      1   /**< U (Chroma) plane */
+#define VPX_PLANE_V      2   /**< V (Chroma) plane */
+#define VPX_PLANE_ALPHA  3   /**< A (Transparancy) plane */
+#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
+#define PLANE_PACKED     VPX_PLANE_PACKED
+#define PLANE_Y          VPX_PLANE_Y
+#define PLANE_U          VPX_PLANE_U
+#define PLANE_V          VPX_PLANE_V
+#define PLANE_ALPHA      VPX_PLANE_ALPHA
+#endif
+        unsigned char *planes[4];  /**< pointer to the top left pixel for each plane */
+        int      stride[4];  /**< stride between rows for each plane */
+
+        int     bps; /**< bits per sample (for packed formats) */
+
+        /* The following member may be set by the application to associate data
+         * with this image.
+         */
+        void    *user_priv; /**< may be set by the application to associate data
+                         *   with this image. */
+
+        /* The following members should be treated as private. */
+        unsigned char *img_data;       /**< private */
+        int      img_data_owner; /**< private */
+        int      self_allocd;    /**< private */
+    } vpx_image_t; /**< alias for struct vpx_image */
+
+    /**\brief Representation of a rectangle on a surface */
+    typedef struct vpx_image_rect
+    {
+        unsigned int x; /**< leftmost column */
+        unsigned int y; /**< topmost row */
+        unsigned int w; /**< width */
+        unsigned int h; /**< height */
+    } vpx_image_rect_t; /**< alias for struct vpx_image_rect */
+
+    /*!\brief Open a descriptor, allocating storage for the underlying image
+     *
+     * Returns a descriptor for storing an image of the given format. The
+     * storage for the descriptor is allocated on the heap.
+     *
+     * \param[in]    img       Pointer to storage for descriptor. If this parameter
+     *                         is NULL, the storage for the descriptor will be
+     *                         allocated on the heap.
+     * \param[in]    fmt       Format for the image
+     * \param[in]    d_w       Width of the image
+     * \param[in]    d_h       Height of the image
+     * \param[in]    align     Alignment, in bytes, of each row in the image.
+     *
+     * \return Returns a pointer to the initialized image descriptor. If the img
+     *         parameter is non-null, the value of the img parameter will be
+     *         returned.
+     */
+    vpx_image_t *vpx_img_alloc(vpx_image_t  *img,
+                               vpx_img_fmt_t fmt,
+                               unsigned int d_w,
+                               unsigned int d_h,
+                               unsigned int align);
+
+    /*!\brief Open a descriptor, using existing storage for the underlying image
+     *
+     * Returns a descriptor for storing an image of the given format. The
+     * storage for descriptor has been allocated elsewhere, and a descriptor is
+     * desired to "wrap" that storage.
+     *
+     * \param[in]    img       Pointer to storage for descriptor. If this parameter
+     *                         is NULL, the storage for the descriptor will be
+     *                         allocated on the heap.
+     * \param[in]    fmt       Format for the image
+     * \param[in]    d_w       Width of the image
+     * \param[in]    d_h       Height of the image
+     * \param[in]    align     Alignment, in bytes, of each row in the image.
+     * \param[in]    img_data  Storage to use for the image
+     *
+     * \return Returns a pointer to the initialized image descriptor. If the img
+     *         parameter is non-null, the value of the img parameter will be
+     *         returned.
+     */
+    vpx_image_t *vpx_img_wrap(vpx_image_t  *img,
+                              vpx_img_fmt_t fmt,
+                              unsigned int d_w,
+                              unsigned int d_h,
+                              unsigned int align,
+                              unsigned char      *img_data);
+
+
+    /*!\brief Set the rectangle identifying the displayed portion of the image
+     *
+     * Updates the displayed rectangle (aka viewport) on the image surface to
+     * match the specified coordinates and size.
+     *
+     * \param[in]    img       Image descriptor
+     * \param[in]    x         leftmost column
+     * \param[in]    y         topmost row
+     * \param[in]    w         width
+     * \param[in]    h         height
+     *
+     * \return 0 if the requested rectangle is valid, nonzero otherwise.
+     */
+    int vpx_img_set_rect(vpx_image_t  *img,
+                         unsigned int  x,
+                         unsigned int  y,
+                         unsigned int  w,
+                         unsigned int  h);
+
+
+    /*!\brief Flip the image vertically (top for bottom)
+     *
+     * Adjusts the image descriptor's pointers and strides to make the image
+     * be referenced upside-down.
+     *
+     * \param[in]    img       Image descriptor
+     */
+    void vpx_img_flip(vpx_image_t *img);
+
+    /*!\brief Close an image descriptor
+     *
+     * Frees all allocated storage associated with an image descriptor.
+     *
+     * \param[in]    img       Image descriptor
+     */
+    void vpx_img_free(vpx_image_t *img);
+
+#endif
+#ifdef __cplusplus
+}
+#endif
diff --git a/vpx_ports/vpx_integer.h b/vpx/vpx_integer.h
similarity index 61%
rename from vpx_ports/vpx_integer.h
rename to vpx/vpx_integer.h
index d3f7ddd..9a06c1a 100644
--- a/vpx_ports/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -14,13 +15,7 @@
 /* get ptrdiff_t, size_t, wchar_t, NULL */
 #include <stddef.h>
 
-#if defined(HAVE_STDINT_H) && HAVE_STDINT_H
-#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS)
-#define __STDC_FORMAT_MACROS
-#endif
-#include <stdint.h>
-#include <inttypes.h>
-#else
+#if defined(_MSC_VER) || defined(VPX_EMULATE_INTTYPES)
 typedef signed char  int8_t;
 typedef signed short int16_t;
 typedef signed int   int32_t;
@@ -47,6 +42,16 @@
 typedef unsigned int   uintptr_t;
 #endif
 
+#else
+
+/* Most platforms have the C99 standard integer types. */
+
+#if defined(__cplusplus) && !defined(__STDC_FORMAT_MACROS)
+#define __STDC_FORMAT_MACROS
+#endif
+#include <stdint.h>
+#include <inttypes.h>
+
 #endif
 
 #endif
diff --git a/vpx_codec/exports b/vpx_codec/exports
deleted file mode 100644
index f5e7473..0000000
--- a/vpx_codec/exports
+++ /dev/null
@@ -1,17 +0,0 @@
-text vpx_dec_control
-text vpx_dec_decode
-text vpx_dec_destroy
-text vpx_dec_err_to_string
-text vpx_dec_error
-text vpx_dec_error_detail
-text vpx_dec_get_caps
-text vpx_dec_get_frame
-text vpx_dec_get_mem_map
-text vpx_dec_get_stream_info
-text vpx_dec_iface_name
-text vpx_dec_init_ver
-text vpx_dec_peek_stream_info
-text vpx_dec_register_put_frame_cb
-text vpx_dec_register_put_slice_cb
-text vpx_dec_set_mem_map
-text vpx_dec_xma_init_ver
diff --git a/vpx_codec/src/vpx_image.c b/vpx_codec/src/vpx_image.c
deleted file mode 100644
index 8a16e58..0000000
--- a/vpx_codec/src/vpx_image.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-#include <stdlib.h>
-#include <string.h>
-#include "vpx_codec/vpx_image.h"
-
-static vpx_image_t *img_alloc_helper(vpx_image_t  *img,
-                                     img_fmt_t     fmt,
-                                     unsigned int  d_w,
-                                     unsigned int  d_h,
-                                     unsigned int  stride_align,
-                                     unsigned char      *img_data)
-{
-
-    unsigned int  h, w, s, xcs, ycs, bps;
-    int           align;
-
-    /* Treat align==0 like align==1 */
-    if (!stride_align)
-        stride_align = 1;
-
-    /* Validate alignment (must be power of 2) */
-    if (stride_align & (stride_align - 1))
-        goto fail;
-
-    /* Get sample size for this format */
-    switch (fmt)
-    {
-    case IMG_FMT_RGB32:
-    case IMG_FMT_RGB32_LE:
-    case IMG_FMT_ARGB:
-    case IMG_FMT_ARGB_LE:
-        bps = 32;
-        break;
-    case IMG_FMT_RGB24:
-    case IMG_FMT_BGR24:
-        bps = 24;
-        break;
-    case IMG_FMT_RGB565:
-    case IMG_FMT_RGB565_LE:
-    case IMG_FMT_RGB555:
-    case IMG_FMT_RGB555_LE:
-    case IMG_FMT_UYVY:
-    case IMG_FMT_YUY2:
-    case IMG_FMT_YVYU:
-        bps = 16;
-        break;
-    case IMG_FMT_I420:
-    case IMG_FMT_YV12:
-    case IMG_FMT_VPXI420:
-    case IMG_FMT_VPXYV12:
-        bps = 12;
-        break;
-    default:
-        bps = 16;
-        break;
-    }
-
-    /* Get chroma shift values for this format */
-    switch (fmt)
-    {
-    case IMG_FMT_I420:
-    case IMG_FMT_YV12:
-    case IMG_FMT_VPXI420:
-    case IMG_FMT_VPXYV12:
-        xcs = 1;
-        break;
-    default:
-        xcs = 0;
-        break;
-    }
-
-    switch (fmt)
-    {
-    case IMG_FMT_I420:
-    case IMG_FMT_YV12:
-    case IMG_FMT_VPXI420:
-    case IMG_FMT_VPXYV12:
-        ycs = 1;
-        break;
-    default:
-        ycs = 0;
-        break;
-    }
-
-    /* Calculate storage sizes given the chroma subsampling */
-    align = (1 << xcs) - 1;
-    w = (d_w + align) & ~align;
-    align = (1 << ycs) - 1;
-    h = (d_h + align) & ~align;
-    s = (fmt & IMG_FMT_PLANAR) ? w : bps * w / 8;
-    s = (s + stride_align - 1) & ~(stride_align - 1);
-
-    /* Allocate the new image */
-    if (!img)
-    {
-        img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t));
-
-        if (!img)
-            goto fail;
-
-        img->self_allocd = 1;
-    }
-    else
-    {
-        memset(img, 0, sizeof(vpx_image_t));
-    }
-
-    img->img_data = img_data;
-
-    if (!img_data)
-    {
-        img->img_data = malloc((fmt & IMG_FMT_PLANAR) ? h * w * bps / 8 : h * s);
-        img->img_data_owner = 1;
-    }
-
-    if (!img->img_data)
-        goto fail;
-
-    img->fmt = fmt;
-    img->w = w;
-    img->h = h;
-    img->x_chroma_shift = xcs;
-    img->y_chroma_shift = ycs;
-    img->bps = bps;
-
-    /* Calculate strides */
-    img->stride[PLANE_Y] = img->stride[PLANE_ALPHA] = s;
-    img->stride[PLANE_U] = img->stride[PLANE_V] = s >> xcs;
-
-    /* Default viewport to entire image */
-    if (!vpx_img_set_rect(img, 0, 0, d_w, d_h))
-        return img;
-
-fail:
-    vpx_img_free(img);
-    return NULL;
-}
-
-vpx_image_t *vpx_img_alloc(vpx_image_t  *img,
-                           img_fmt_t     fmt,
-                           unsigned int  d_w,
-                           unsigned int  d_h,
-                           unsigned int  stride_align)
-{
-    return img_alloc_helper(img, fmt, d_w, d_h, stride_align, NULL);
-}
-
-vpx_image_t *vpx_img_wrap(vpx_image_t  *img,
-                          img_fmt_t     fmt,
-                          unsigned int  d_w,
-                          unsigned int  d_h,
-                          unsigned int  stride_align,
-                          unsigned char       *img_data)
-{
-    return img_alloc_helper(img, fmt, d_w, d_h, stride_align, img_data);
-}
-
-int vpx_img_set_rect(vpx_image_t  *img,
-                     unsigned int  x,
-                     unsigned int  y,
-                     unsigned int  w,
-                     unsigned int  h)
-{
-    unsigned char      *data;
-
-    if (x + w <= img->w && y + h <= img->h)
-    {
-        img->d_w = w;
-        img->d_h = h;
-
-        /* Calculate plane pointers */
-        if (!(img->fmt & IMG_FMT_PLANAR))
-        {
-            img->planes[PLANE_PACKED] =
-                img->img_data + x * img->bps / 8 + y * img->stride[PLANE_PACKED];
-        }
-        else
-        {
-            data = img->img_data;
-
-            if (img->fmt & IMG_FMT_HAS_ALPHA)
-            {
-                img->planes[PLANE_ALPHA] =
-                    data + x + y * img->stride[PLANE_ALPHA];
-                data += img->h * img->stride[PLANE_ALPHA];
-            }
-
-            img->planes[PLANE_Y] = data + x + y * img->stride[PLANE_Y];
-            data += img->h * img->stride[PLANE_Y];
-
-            if (!(img->fmt & IMG_FMT_UV_FLIP))
-            {
-                img->planes[PLANE_U] = data
-                                       + (x >> img->x_chroma_shift)
-                                       + (y >> img->y_chroma_shift) * img->stride[PLANE_U];
-                data += (img->h >> img->y_chroma_shift) * img->stride[PLANE_U];
-                img->planes[PLANE_V] = data
-                                       + (x >> img->x_chroma_shift)
-                                       + (y >> img->y_chroma_shift) * img->stride[PLANE_V];
-            }
-            else
-            {
-                img->planes[PLANE_V] = data
-                                       + (x >> img->x_chroma_shift)
-                                       + (y >> img->y_chroma_shift) * img->stride[PLANE_V];
-                data += (img->h >> img->y_chroma_shift) * img->stride[PLANE_V];
-                img->planes[PLANE_U] = data
-                                       + (x >> img->x_chroma_shift)
-                                       + (y >> img->y_chroma_shift) * img->stride[PLANE_U];
-            }
-        }
-
-        return 0;
-    }
-
-    return -1;
-}
-
-void vpx_img_flip(vpx_image_t *img)
-{
-    /* Note: In the calculation pointer adjustment calculation, we want the
-     * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99
-     * standard indicates that if the adjustment parameter is unsigned, the
-     * stride parameter will be promoted to unsigned, causing errors when
-     * the lhs is a larger type than the rhs.
-     */
-    img->planes[PLANE_Y] += (signed)(img->d_h - 1) * img->stride[PLANE_Y];
-    img->stride[PLANE_Y] = -img->stride[PLANE_Y];
-
-    img->planes[PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
-                            * img->stride[PLANE_U];
-    img->stride[PLANE_U] = -img->stride[PLANE_U];
-
-    img->planes[PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1)
-                            * img->stride[PLANE_V];
-    img->stride[PLANE_V] = -img->stride[PLANE_V];
-
-    img->planes[PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[PLANE_ALPHA];
-    img->stride[PLANE_ALPHA] = -img->stride[PLANE_ALPHA];
-}
-
-void vpx_img_free(vpx_image_t *img)
-{
-    if (img)
-    {
-        if (img->img_data && img->img_data_owner)
-            free(img->img_data);
-
-        if (img->self_allocd)
-            free(img);
-    }
-}
diff --git a/vpx_codec/vpx_codec.mk b/vpx_codec/vpx_codec.mk
deleted file mode 100644
index 75fbeea..0000000
--- a/vpx_codec/vpx_codec.mk
+++ /dev/null
@@ -1,26 +0,0 @@
-##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
-##
-
-
-API_EXPORTS += exports
-
-API_SRCS-yes += internal/vpx_codec_internal.h
-API_SRCS-yes += vpx_codec.h
-API_SRCS-yes += vpx_codec.mk
-API_SRCS-yes += vpx_codec_impl_top.h
-API_SRCS-yes += vpx_codec_impl_bottom.h
-API_SRCS-yes += vpx_decoder.h
-API_SRCS-yes += vpx_decoder_compat.h
-API_SRCS-yes += vpx_encoder.h
-API_SRCS-yes += vpx_image.h
-API_SRCS-yes += src/vpx_codec.c
-API_SRCS-yes += src/vpx_decoder.c
-API_SRCS-yes += src/vpx_decoder_compat.c
-API_SRCS-yes += src/vpx_image.c
-API_SRCS-yes += src/vpx_encoder.c
diff --git a/vpx_codec/vpx_codec_impl_bottom.h b/vpx_codec/vpx_codec_impl_bottom.h
deleted file mode 100644
index c52654c..0000000
--- a/vpx_codec/vpx_codec_impl_bottom.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-/*
- * This file is to be included at the bottom of the header files defining the
- * interface to individual codecs and contains matching blocks to those defined
- * in vpx_codec_impl_top.h
- */
-#ifdef __cplusplus
-}
-#endif
diff --git a/vpx_codec/vpx_codec_impl_top.h b/vpx_codec/vpx_codec_impl_top.h
deleted file mode 100644
index f73809a..0000000
--- a/vpx_codec/vpx_codec_impl_top.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-/*
- * This file is to be included at the top of the header files defining the
- * interface to individual codecs and contains various workarounds common
- * to all codec implementations.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
diff --git a/vpx_codec/vpx_image.h b/vpx_codec/vpx_image.h
deleted file mode 100644
index a8a9416..0000000
--- a/vpx_codec/vpx_image.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
- */
-
-
-/*!\file vpx_image.h
- * \brief Describes the vpx image descriptor and associated operations
- *
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef VPX_IMAGE_H
-#define VPX_IMAGE_H
-
-    /*!\brief Current ABI version number
-     *
-     * \internal
-     * If this file is altered in any way that changes the ABI, this value
-     * must be bumped.  Examples include, but are not limited to, changing
-     * types, removing or reassigning enums, adding/removing/rearranging
-     * fields to structures
-     */
-#define VPX_IMAGE_ABI_VERSION (1) /**<\hideinitializer*/
-
-
-#define IMG_FMT_PLANAR     0x100  /**< Image is a planar format */
-#define IMG_FMT_UV_FLIP    0x200  /**< V plane precedes U plane in memory */
-#define IMG_FMT_HAS_ALPHA  0x400  /**< Image has an alpha channel componnent */
-
-
-    /*!\brief List of supported image formats */
-    typedef enum img_fmt {
-        IMG_FMT_NONE,
-        IMG_FMT_RGB24,   /**< 24 bit per pixel packed RGB */
-        IMG_FMT_RGB32,   /**< 32 bit per pixel packed 0RGB */
-        IMG_FMT_RGB565,  /**< 16 bit per pixel, 565 */
-        IMG_FMT_RGB555,  /**< 16 bit per pixel, 555 */
-        IMG_FMT_UYVY,    /**< UYVY packed YUV */
-        IMG_FMT_YUY2,    /**< YUYV packed YUV */
-        IMG_FMT_YVYU,    /**< YVYU packed YUV */
-        IMG_FMT_BGR24,   /**< 24 bit per pixel packed BGR */
-        IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
-        IMG_FMT_ARGB,     /**< 32 bit packed ARGB, alpha=255 */
-        IMG_FMT_ARGB_LE,  /**< 32 bit packed BGRA, alpha=255 */
-        IMG_FMT_RGB565_LE,  /**< 16 bit per pixel, gggbbbbb rrrrrggg */
-        IMG_FMT_RGB555_LE,  /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
-        IMG_FMT_YV12    = IMG_FMT_PLANAR | IMG_FMT_UV_FLIP | 1, /**< planar YVU */
-        IMG_FMT_I420    = IMG_FMT_PLANAR | 2,
-        IMG_FMT_VPXYV12 = IMG_FMT_PLANAR | IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
-        IMG_FMT_VPXI420 = IMG_FMT_PLANAR | 4,  /** < planar 4:2:0 format with vpx color space */
-    }
-    img_fmt_t; /**< alias for enum img_fmt */
-
-
-    /**\brief Image Descriptor */
-    typedef struct
-    {
-        img_fmt_t     fmt; /**< Image Format */
-
-        /* Image storage dimensions */
-        unsigned int  w;   /**< Stored image width */
-        unsigned int  h;   /**< Stored image height */
-
-        /* Image display dimensions */
-        unsigned int  d_w;   /**< Displayed image width */
-        unsigned int  d_h;   /**< Displayed image height */
-
-        /* Chroma subsampling info */
-        unsigned int  x_chroma_shift;   /**< subsampling order, X */
-        unsigned int  y_chroma_shift;   /**< subsampling order, Y */
-
-        /* Image data pointers. */
-#define PLANE_PACKED 0   /**< To be used for all packed formats */
-#define PLANE_Y   0      /**< Y (Luminance) plane */
-#define PLANE_U   1      /**< U (Chroma) plane */
-#define PLANE_V   2      /**< V (Chroma) plane */
-#define PLANE_ALPHA 3    /**< A (Transparancy) plane */
-        unsigned char *planes[4];  /**< pointer to the top left pixel for each plane */
-        int      stride[4];  /**< stride between rows for each plane */
-
-        int     bps; /**< bits per sample (for packed formats) */
-
-        /* The following member may be set by the application to associate data
-         * with this image.
-         */
-        void    *user_priv; /**< may be set by the application to associate data
-                         *   with this image. */
-
-        /* The following members should be treated as private. */
-        unsigned char *img_data;       /**< private */
-        int      img_data_owner; /**< private */
-        int      self_allocd;    /**< private */
-    } vpx_image_t; /**< alias for struct vpx_image */
-
-    /**\brief Representation of a rectangle on a surface */
-    typedef struct vpx_image_rect
-    {
-        unsigned int x; /**< leftmost column */
-        unsigned int y; /**< topmost row */
-        unsigned int w; /**< width */
-        unsigned int h; /**< height */
-    } vpx_image_rect_t; /**< alias for struct vpx_image_rect */
-
-    /*!\brief Open a descriptor, allocating storage for the underlying image
-     *
-     * Returns a descriptor for storing an image of the given format. The
-     * storage for the descriptor is allocated on the heap.
-     *
-     * \param[in]    img       Pointer to storage for descriptor. If this parameter
-     *                         is NULL, the storage for the descriptor will be
-     *                         allocated on the heap.
-     * \param[in]    fmt       Format for the image
-     * \param[in]    d_w       Width of the image
-     * \param[in]    d_h       Height of the image
-     * \param[in]    align     Alignment, in bytes, of each row in the image.
-     *
-     * \return Returns a pointer to the initialized image descriptor. If the img
-     *         parameter is non-null, the value of the img parameter will be
-     *         returned.
-     */
-    vpx_image_t *vpx_img_alloc(vpx_image_t  *img,
-                               img_fmt_t fmt,
-                               unsigned int d_w,
-                               unsigned int d_h,
-                               unsigned int align);
-
-    /*!\brief Open a descriptor, using existing storage for the underlying image
-     *
-     * Returns a descriptor for storing an image of the given format. The
-     * storage for descriptor has been allocated elsewhere, and a descriptor is
-     * desired to "wrap" that storage.
-     *
-     * \param[in]    img       Pointer to storage for descriptor. If this parameter
-     *                         is NULL, the storage for the descriptor will be
-     *                         allocated on the heap.
-     * \param[in]    fmt       Format for the image
-     * \param[in]    d_w       Width of the image
-     * \param[in]    d_h       Height of the image
-     * \param[in]    align     Alignment, in bytes, of each row in the image.
-     * \param[in]    img_data  Storage to use for the image
-     *
-     * \return Returns a pointer to the initialized image descriptor. If the img
-     *         parameter is non-null, the value of the img parameter will be
-     *         returned.
-     */
-    vpx_image_t *vpx_img_wrap(vpx_image_t  *img,
-                              img_fmt_t fmt,
-                              unsigned int d_w,
-                              unsigned int d_h,
-                              unsigned int align,
-                              unsigned char      *img_data);
-
-
-    /*!\brief Set the rectangle identifying the displayed portion of the image
-     *
-     * Updates the displayed rectangle (aka viewport) on the image surface to
-     * match the specified coordinates and size.
-     *
-     * \param[in]    img       Image descriptor
-     * \param[in]    x         leftmost column
-     * \param[in]    y         topmost row
-     * \param[in]    w         width
-     * \param[in]    h         height
-     *
-     * \return 0 if the requested rectangle is valid, nonzero otherwise.
-     */
-    int vpx_img_set_rect(vpx_image_t  *img,
-                         unsigned int  x,
-                         unsigned int  y,
-                         unsigned int  w,
-                         unsigned int  h);
-
-
-    /*!\brief Flip the image vertically (top for bottom)
-     *
-     * Adjusts the image descriptor's pointers and strides to make the image
-     * be referenced upside-down.
-     *
-     * \param[in]    img       Image descriptor
-     */
-    void vpx_img_flip(vpx_image_t *img);
-
-    /*!\brief Close an image descriptor
-     *
-     * Frees all allocated storage associated with an image descriptor.
-     *
-     * \param[in]    img       Image descriptor
-     */
-    void vpx_img_free(vpx_image_t *img);
-
-#endif
-#ifdef __cplusplus
-}
-#endif
diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h
index c332403..e54f54d 100644
--- a/vpx_mem/include/nds/vpx_mem_nds.h
+++ b/vpx_mem/include/nds/vpx_mem_nds.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 3b68d86..00f9c90 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
index ab85d19..ef2b29b 100644
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ b/vpx_mem/include/vpx_mem_tracker.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/intel_linux/vpx_mem.c b/vpx_mem/intel_linux/vpx_mem.c
index 002e407..00150ac 100644
--- a/vpx_mem/intel_linux/vpx_mem.c
+++ b/vpx_mem/intel_linux/vpx_mem.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/intel_linux/vpx_mem_tracker.c b/vpx_mem/intel_linux/vpx_mem_tracker.c
index fa023e3..5bed4b5 100644
--- a/vpx_mem/intel_linux/vpx_mem_tracker.c
+++ b/vpx_mem/intel_linux/vpx_mem_tracker.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
index 9abd81e..22c4a54 100644
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ b/vpx_mem/memory_manager/hmm_alloc.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
index 0cacc3f..ad1da03 100644
--- a/vpx_mem/memory_manager/hmm_base.c
+++ b/vpx_mem/memory_manager/hmm_base.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
index dc59f55..d92435c 100644
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ b/vpx_mem/memory_manager/hmm_dflt_abort.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
index 79d75a7..9a4b6e4 100644
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ b/vpx_mem/memory_manager/hmm_grow.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
index 5ebe398..c3c6f2c 100644
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ b/vpx_mem/memory_manager/hmm_largest.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
index 6e3f2f0..f90da96 100644
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ b/vpx_mem/memory_manager/hmm_resize.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
index 5ef9b23..78fe268 100644
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ b/vpx_mem/memory_manager/hmm_shrink.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
index 41103c8..3f7be8f 100644
--- a/vpx_mem/memory_manager/hmm_true.c
+++ b/vpx_mem/memory_manager/hmm_true.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
index e2733ef..1b2c9b7 100644
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ b/vpx_mem/memory_manager/include/cavl_if.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
index 267bc73..5e165dd 100644
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ b/vpx_mem/memory_manager/include/cavl_impl.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
index 933e30d..33004ca 100644
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ b/vpx_mem/memory_manager/include/heapmm.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
index 86e4e9f..30b9f50 100644
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ b/vpx_mem/memory_manager/include/hmm_cnfg.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
index 6e2be08..5d62abc 100644
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ b/vpx_mem/memory_manager/include/hmm_intrnl.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/nds/vpx_mem_nds.c b/vpx_mem/nds/vpx_mem_nds.c
index f2a3043..11ac95c 100644
--- a/vpx_mem/nds/vpx_mem_nds.c
+++ b/vpx_mem/nds/vpx_mem_nds.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
index 6501855..d55b7d9 100644
--- a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
+++ b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index f6b1a35..85b05ab 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index 6ccb9be..31f8f9c 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index 4427e27..938ad07 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
index da38137..1abe70d 100644
--- a/vpx_ports/config.h
+++ b/vpx_ports/config.h
@@ -1,19 +1,10 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
-
-
-/* This file uses some preprocessor magic to expand the value of HAVE_CONFIG_H,
- * as defined by the build system, so that different projects can use the file
- * name for config.h that suits them.
- */
-#define QUOTE_(x) #x
-#define QUOTE(x) QUOTE_(x)
-#include QUOTE(HAVE_CONFIG_H)
-#undef QUOTE
-#undef QUOTE_
+#include "vpx_config.h"
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 03e3499..87eece8 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 10942f1..9ec34fe 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -1,17 +1,18 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef VPX_PORTS_MEM_H
 #define VPX_PORTS_MEM_H
 #include "vpx_config.h"
-#include "vpx_integer.h"
+#include "vpx/vpx_integer.h"
 
 #if defined(__GNUC__) && __GNUC__
 #define DECLARE_ALIGNED(n,typ,val)  typ val __attribute__ ((aligned (n)))
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index 869d583..c178b8b 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -59,7 +60,7 @@
 
 #undef  mem_get_be16
 #define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
-static INLINE unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -71,7 +72,7 @@
 
 #undef  mem_get_be24
 #define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
-static INLINE unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -84,7 +85,7 @@
 
 #undef  mem_get_be32
 #define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
-static INLINE unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -98,7 +99,7 @@
 
 #undef  mem_get_le16
 #define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
-static INLINE unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -110,7 +111,7 @@
 
 #undef  mem_get_le24
 #define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
-static INLINE unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -123,7 +124,7 @@
 
 #undef  mem_get_le32
 #define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
-static INLINE unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -136,7 +137,7 @@
 }
 
 #define mem_get_s_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
         const MAU_T *mem = (const MAU_T*)vmem;\
         signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
         return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
@@ -168,7 +169,7 @@
 
 #undef  mem_put_be16
 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-static INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val)
+static void mem_put_be16(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -178,7 +179,7 @@
 
 #undef  mem_put_be24
 #define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24)
-static INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val)
+static void mem_put_be24(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -189,7 +190,7 @@
 
 #undef  mem_put_be32
 #define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32)
-static INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val)
+static void mem_put_be32(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -201,7 +202,7 @@
 
 #undef  mem_put_le16
 #define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16)
-static INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val)
+static void mem_put_le16(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -211,7 +212,7 @@
 
 #undef  mem_put_le24
 #define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24)
-static INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val)
+static void mem_put_le24(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -222,7 +223,7 @@
 
 #undef  mem_put_le32
 #define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32)
-static INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val)
+static void mem_put_le32(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 1d0db2c..4c44aa2 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -39,19 +40,19 @@
 #define swap_endian_32_se(val,raw) swap_endian_32(val,raw)
 
 #define mem_get_ne_aligned_generic(end,sz) \
-    static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+    static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
         const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
         return *mem;\
     }
 
 #define mem_get_sne_aligned_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
         const int##sz##_t *mem = (const int##sz##_t *)vmem;\
         return *mem;\
     }
 
 #define mem_get_se_aligned_generic(end,sz) \
-    static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+    static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
         const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
         unsigned MEM_VALUE_T val, raw = *mem;\
         swap_endian_##sz(val,raw);\
@@ -59,7 +60,7 @@
     }
 
 #define mem_get_sse_aligned_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
         const int##sz##_t *mem = (const int##sz##_t *)vmem;\
         unsigned MEM_VALUE_T val, raw = *mem;\
         swap_endian_##sz##_se(val,raw);\
@@ -67,13 +68,13 @@
     }
 
 #define mem_put_ne_aligned_generic(end,sz) \
-    static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+    static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
         uint##sz##_t *mem = (uint##sz##_t *)vmem;\
         *mem = (uint##sz##_t)val;\
     }
 
 #define mem_put_se_aligned_generic(end,sz) \
-    static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+    static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
         uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
         swap_endian_##sz(raw,val);\
         *mem = (uint##sz##_t)raw;\
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index 5c04538..37a0c7c 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -1,17 +1,18 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef VPX_TIMER_H
 #define VPX_TIMER_H
 
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 /*
  * Win32 specific includes
  */
@@ -42,7 +43,7 @@
 
 struct vpx_usec_timer
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     LARGE_INTEGER  begin, end;
 #else
     struct timeval begin, end;
@@ -50,10 +51,10 @@
 };
 
 
-static INLINE void
+static void
 vpx_usec_timer_start(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     QueryPerformanceCounter(&t->begin);
 #else
     gettimeofday(&t->begin, NULL);
@@ -61,10 +62,10 @@
 }
 
 
-static INLINE void
+static void
 vpx_usec_timer_mark(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     QueryPerformanceCounter(&t->end);
 #else
     gettimeofday(&t->end, NULL);
@@ -72,10 +73,10 @@
 }
 
 
-static INLINE long
+static long
 vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     LARGE_INTEGER freq, diff;
 
     diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index 86525b7..2ab66b1 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -1,19 +1,18 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
 #ifndef __VPXTYPES_H__
 #define __VPXTYPES_H__
 
-#ifdef   HAVE_CONFIG_H
-#include HAVE_CONFIG_H
-#endif
+#include "vpx_ports/config.h"
 
 //#include <sys/types.h>
 #ifdef _MSC_VER
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 935d037..a8e4607 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index db8208f..dc9e2d9 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
@@ -89,7 +90,7 @@
 %macro ALIGN_STACK 2
     mov         %2, rsp
     and         rsp, -%1
-    sub         rsp, %1 - REG_SZ_BYTES
+    lea         rsp, [rsp - (%1 - REG_SZ_BYTES)]
     push        %2
 %endmacro
 
@@ -104,7 +105,6 @@
 %idefine XMMWORD
 %idefine MMWORD
 
-
 ; PIC macros
 ;
 %if ABI_IS_32BIT
@@ -115,9 +115,13 @@
       extern _GLOBAL_OFFSET_TABLE_
       push %1
       call %%get_got
+      %%sub_offset:
+      jmp %%exitGG
       %%get_got:
-      pop %1
-      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%get_got wrt ..gotpc
+      mov %1, [esp]
+      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
+      ret
+      %%exitGG:
       %undef GLOBAL
       %define GLOBAL + %1 wrt ..gotoff
       %undef RESTORE_GOT
@@ -127,9 +131,13 @@
     %macro GET_GOT 1
       push %1
       call %%get_got
+      %%sub_offset:
+      jmp  %%exitGG
       %%get_got:
-      pop %1
-      add %1, fake_got - %%get_got
+      mov  %1, [esp]
+      add %1, fake_got - %%sub_offset
+      ret
+      %%exitGG:
       %undef GLOBAL
       %define GLOBAL + %1 - fake_got
       %undef RESTORE_GOT
@@ -137,12 +145,16 @@
     %endmacro
   %endif
   %endif
+  %define HIDDEN_DATA(x) x
 %else
   %macro GET_GOT 1
   %endmacro
   %define GLOBAL wrt rip
   %ifidn __OUTPUT_FORMAT__,elf64
     %define WRT_PLT wrt ..plt
+    %define HIDDEN_DATA(x) x:data hidden
+  %else
+    %define HIDDEN_DATA(x) x
   %endif
 %endif
 %ifnmacro GET_GOT
@@ -198,22 +210,38 @@
         push r9
     %endif
     %if %1 > 6
-        mov rax,[rbp+16]
+      %assign i %1-6
+      %assign off 16
+      %rep i
+        mov rax,[rbp+off]
         push rax
-    %endif
-    %if %1 > 7
-        mov rax,[rbp+24]
-        push rax
-    %endif
-    %if %1 > 8
-        mov rax,[rbp+32]
-        push rax
+        %assign off off+8
+      %endrep
     %endif
   %endm
 %endif
   %define UNSHADOW_ARGS mov rsp, rbp
 %endif
 
+; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
+; rsp register has to be aligned
+%ifidn __OUTPUT_FORMAT__,x64
+%macro SAVE_XMM 0
+  sub rsp, 32
+  movdqa XMMWORD PTR [rsp], xmm6
+  movdqa XMMWORD PTR [rsp+16], xmm7
+%endmacro
+%macro RESTORE_XMM 0
+  movdqa xmm6, XMMWORD PTR [rsp]
+  movdqa xmm7, XMMWORD PTR [rsp+16]
+  add rsp, 32
+%endmacro
+%else
+%macro SAVE_XMM 0
+%endmacro
+%macro RESTORE_XMM 0
+%endmacro
+%endif
 
 ; Name of the rodata section
 ;
@@ -229,3 +257,14 @@
 %else
 %define SECTION_RODATA section .rodata
 %endif
+
+
+; Tell GNU ld that we don't require an executable stack.
+%ifidn __OUTPUT_FORMAT__,elf32
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%elifidn __OUTPUT_FORMAT__,elf64
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%endif
+
diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
index 1c904ed..e495184 100644
--- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm
+++ b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/nds/yv12extend.c b/vpx_scale/arm/nds/yv12extend.c
index 56959cb..48c0dfb 100644
--- a/vpx_scale/arm/nds/yv12extend.c
+++ b/vpx_scale/arm/nds/yv12extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index 26384c4..24d46cb 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
index a50ae60..6534827 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index c8923d5..dfc8db5 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index 8c9ce19..e475b92 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 3c355be..1e8bcb8 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -1,18 +1,17 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
+#include "vpx_ports/config.h"
 #include "vpx_scale/vpxscale.h"
 
-#ifdef HAVE_CONFIG_H
-#include "vpx_config.h"
-#endif
 
 void (*vp8_yv12_extend_frame_borders_ptr)(YV12_BUFFER_CONFIG *ybf);
 extern void vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf);
diff --git a/vpx_scale/arm/yv12extend_arm.c b/vpx_scale/arm/yv12extend_arm.c
index 7c3f7cd..d7a8289 100644
--- a/vpx_scale/arm/yv12extend_arm.c
+++ b/vpx_scale/arm/yv12extend_arm.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 7cb083f..42538af 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/blackfin/yv12extend.c b/vpx_scale/blackfin/yv12extend.c
index d5be495..cfd3de0 100644
--- a/vpx_scale/blackfin/yv12extend.c
+++ b/vpx_scale/blackfin/yv12extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/bicubic_scaler_c64.c b/vpx_scale/dm642/bicubic_scaler_c64.c
index 9bd3797..5166ace 100644
--- a/vpx_scale/dm642/bicubic_scaler_c64.c
+++ b/vpx_scale/dm642/bicubic_scaler_c64.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/gen_scalers_c64.c b/vpx_scale/dm642/gen_scalers_c64.c
index 2126a75..87ff998 100644
--- a/vpx_scale/dm642/gen_scalers_c64.c
+++ b/vpx_scale/dm642/gen_scalers_c64.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/yv12extend.c b/vpx_scale/dm642/yv12extend.c
index ca25a5f..646e69a 100644
--- a/vpx_scale/dm642/yv12extend.c
+++ b/vpx_scale/dm642/yv12extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index e3c2b4a..420f719 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -45,7 +46,7 @@
 //         3     2
 // C0 = a*t - a*t
 //
-static INLINE short c0_fixed(unsigned int t)
+static short c0_fixed(unsigned int t)
 {
     // put t in Q16 notation
     unsigned short v1, v2;
@@ -66,7 +67,7 @@
 //                     2          3
 // C1 = a*t + (3-2*a)*t  - (2-a)*t
 //
-static INLINE short c1_fixed(unsigned int t)
+static short c1_fixed(unsigned int t)
 {
     unsigned short v1, v2, v3;
     unsigned short two, three;
@@ -95,7 +96,7 @@
 //                 2          3
 // C2 = 1 - (3-a)*t  + (2-a)*t
 //
-static INLINE short c2_fixed(unsigned int t)
+static short c2_fixed(unsigned int t)
 {
     unsigned short v1, v2, v3;
     unsigned short two, three;
@@ -123,7 +124,7 @@
 //                 2      3
 // C3 = a*t - 2*a*t  + a*t
 //
-static INLINE short c3_fixed(unsigned int t)
+static short c3_fixed(unsigned int t)
 {
     int v1, v2, v3;
 
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index a5e545f..b084e81 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
@@ -936,12 +937,13 @@
 
 void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
 {
-    unsigned int i;
+    int i;
     int temp;
+    int width = dest_width;
 
     (void) dest_pitch;
 
-    for (i = 0; i < dest_width; i++)
+    for (i = 0; i < width; i++)
     {
         temp = 8;
         temp += source[i-(int)src_pitch] * 3;
diff --git a/vpx_scale/generic/scalesystemdependant.c b/vpx_scale/generic/scalesystemdependant.c
index 28f5c72..926feb7 100644
--- a/vpx_scale/generic/scalesystemdependant.c
+++ b/vpx_scale/generic/scalesystemdependant.c
@@ -1,18 +1,17 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
+#include "vpx_ports/config.h"
 #include "vpx_scale/vpxscale.h"
 
-#ifdef HAVE_CONFIG_H
-#include "vpx_config.h"
-#endif
 
 void (*vp8_yv12_extend_frame_borders_ptr)(YV12_BUFFER_CONFIG *ybf);
 extern void vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf);
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 206cd55..e1c8028 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 04617be..3034cc3 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 4906625..39d2fa8 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h
index d6181d2..3e1a9fa 100644
--- a/vpx_scale/include/arm/vpxscale_nofp.h
+++ b/vpx_scale/include/arm/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
index 2b50f24..39de181 100644
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h
index 015eed0..3f7fe0f 100644
--- a/vpx_scale/include/generic/vpxscale_depricated.h
+++ b/vpx_scale/include/generic/vpxscale_depricated.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h
index c4d5f4c..7b8205a 100644
--- a/vpx_scale/include/generic/vpxscale_nofp.h
+++ b/vpx_scale/include/generic/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/leapster/vpxscale.h b/vpx_scale/include/leapster/vpxscale.h
index f70029c..5021849 100644
--- a/vpx_scale/include/leapster/vpxscale.h
+++ b/vpx_scale/include/leapster/vpxscale.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h
index d6181d2..3e1a9fa 100644
--- a/vpx_scale/include/symbian/vpxscale_nofp.h
+++ b/vpx_scale/include/symbian/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h
index f6482f9..a704bd9 100644
--- a/vpx_scale/include/vpxscale_nofp.h
+++ b/vpx_scale/include/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/intel_linux/scaleopt.c b/vpx_scale/intel_linux/scaleopt.c
index 6555600..499f0ed 100644
--- a/vpx_scale/intel_linux/scaleopt.c
+++ b/vpx_scale/intel_linux/scaleopt.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/intel_linux/scalesystemdependant.c b/vpx_scale/intel_linux/scalesystemdependant.c
index 9ed48bf..eab741f 100644
--- a/vpx_scale/intel_linux/scalesystemdependant.c
+++ b/vpx_scale/intel_linux/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/doptsystemdependant_lf.c b/vpx_scale/leapster/doptsystemdependant_lf.c
index ca13167..7cbb1c5 100644
--- a/vpx_scale/leapster/doptsystemdependant_lf.c
+++ b/vpx_scale/leapster/doptsystemdependant_lf.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/gen_scalers_lf.c b/vpx_scale/leapster/gen_scalers_lf.c
index 1b9c7c7..f1ee056 100644
--- a/vpx_scale/leapster/gen_scalers_lf.c
+++ b/vpx_scale/leapster/gen_scalers_lf.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/vpxscale_lf.c b/vpx_scale/leapster/vpxscale_lf.c
index 5f05e5d..616ddf5 100644
--- a/vpx_scale/leapster/vpxscale_lf.c
+++ b/vpx_scale/leapster/vpxscale_lf.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/yv12extend.c b/vpx_scale/leapster/yv12extend.c
index 480d971..1cddd95 100644
--- a/vpx_scale/leapster/yv12extend.c
+++ b/vpx_scale/leapster/yv12extend.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
index 2a9ab76..1476e64 100644
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/symbian/gen_scalers_armv4.asm b/vpx_scale/symbian/gen_scalers_armv4.asm
index 1c904ed..e495184 100644
--- a/vpx_scale/symbian/gen_scalers_armv4.asm
+++ b/vpx_scale/symbian/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/symbian/scalesystemdependant.c b/vpx_scale/symbian/scalesystemdependant.c
index a2acc3e..b0bffdd 100644
--- a/vpx_scale/symbian/scalesystemdependant.c
+++ b/vpx_scale/symbian/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index 9a86b75..86fc128 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/wce/gen_scalers_armv4.asm b/vpx_scale/wce/gen_scalers_armv4.asm
index 1c904ed..e495184 100644
--- a/vpx_scale/wce/gen_scalers_armv4.asm
+++ b/vpx_scale/wce/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/wce/scalesystemdependant.c b/vpx_scale/wce/scalesystemdependant.c
index a5a6a52..e9f2c26 100644
--- a/vpx_scale/wce/scalesystemdependant.c
+++ b/vpx_scale/wce/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index da0533e..3711fe5 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/win32/scalesystemdependant.c b/vpx_scale/win32/scalesystemdependant.c
index 9ed48bf..eab741f 100644
--- a/vpx_scale/win32/scalesystemdependant.c
+++ b/vpx_scale/win32/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/x86_64/scaleopt.c b/vpx_scale/x86_64/scaleopt.c
index 3d2d5f2..101f5ff 100644
--- a/vpx_scale/x86_64/scaleopt.c
+++ b/vpx_scale/x86_64/scaleopt.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/x86_64/scalesystemdependant.c b/vpx_scale/x86_64/scalesystemdependant.c
index 43f05a6..89ae86e 100644
--- a/vpx_scale/x86_64/scalesystemdependant.c
+++ b/vpx_scale/x86_64/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index a8d0ce4..50d6e3b 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/yv12extend.h b/vpx_scale/yv12extend.h
index 9968fea..0c239b9 100644
--- a/vpx_scale/yv12extend.h
+++ b/vpx_scale/yv12extend.h
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/wince_wmain_adapter.cpp b/wince_wmain_adapter.cpp
index db2119c..57f880a 100644
--- a/wince_wmain_adapter.cpp
+++ b/wince_wmain_adapter.cpp
@@ -1,10 +1,11 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/y4minput.c b/y4minput.c
new file mode 100644
index 0000000..3eaec4e
--- /dev/null
+++ b/y4minput.c
@@ -0,0 +1,881 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ *  Based on code from the OggTheora software codec source code,
+ *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "y4minput.h"
+
+static int y4m_parse_tags(y4m_input *_y4m,char *_tags){
+  int   got_w;
+  int   got_h;
+  int   got_fps;
+  int   got_interlace;
+  int   got_par;
+  int   got_chroma;
+  char *p;
+  char *q;
+  got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
+  for(p=_tags;;p=q){
+    /*Skip any leading spaces.*/
+    while(*p==' ')p++;
+    /*If that's all we have, stop.*/
+    if(p[0]=='\0')break;
+    /*Find the end of this tag.*/
+    for(q=p+1;*q!='\0'&&*q!=' ';q++);
+    /*Process the tag.*/
+    switch(p[0]){
+      case 'W':{
+        if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1;
+        got_w=1;
+      }break;
+      case 'H':{
+        if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1;
+        got_h=1;
+      }break;
+      case 'F':{
+        if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){
+          return -1;
+        }
+        got_fps=1;
+      }break;
+      case 'I':{
+        _y4m->interlace=p[1];
+        got_interlace=1;
+      }break;
+      case 'A':{
+        if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){
+          return -1;
+        }
+        got_par=1;
+      }break;
+      case 'C':{
+        if(q-p>16)return -1;
+        memcpy(_y4m->chroma_type,p+1,q-p-1);
+        _y4m->chroma_type[q-p-1]='\0';
+        got_chroma=1;
+      }break;
+      /*Ignore unknown tags.*/
+    }
+  }
+  if(!got_w||!got_h||!got_fps)return -1;
+  if(!got_interlace)_y4m->interlace='?';
+  if(!got_par)_y4m->par_n=_y4m->par_d=0;
+  /*Chroma-type is not specified in older files, e.g., those generated by
+     mplayer.*/
+  if(!got_chroma)strcpy(_y4m->chroma_type,"420");
+  return 0;
+}
+
+
+
+/*All anti-aliasing filters in the following conversion functions are based on
+   one of two window functions:
+  The 6-tap Lanczos window (for down-sampling and shifts):
+   sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
+   0,                         |t|>=3
+  The 4-tap Mitchell window (for up-sampling):
+   7|t|^3-12|t|^2+16/3,             |t|<1
+   -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
+   0,                               |t|>=2
+  The number of taps is intentionally kept small to reduce computational
+   overhead and limit ringing.
+
+  The taps from these filters are scaled so that their sum is 1, and the result
+   is scaled by 128 and rounded to integers to create a filter whose
+   intermediate values fit inside 16 bits.
+  Coefficients are rounded in such a way as to ensure their sum is still 128,
+   which is usually equivalent to normal rounding.
+
+  Conversions which require both horizontal and vertical filtering could
+   have these steps pipelined, for less memory consumption and better cache
+   performance, but we do them separately for simplicity.*/
+
+#define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  420mpeg2 chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  BR      |       BR      |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  BR      |       BR      |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the site locations one quarter pixel (at
+   the chroma plane's resolution) to the right.
+  The 4:2:2 modes look exactly the same, except there are twice as many chroma
+   lines, and they are vertically co-sited with the luma samples in both the
+   mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
+static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
+ const unsigned char *_src,int _c_w,int _c_h){
+  int pli;
+  int y;
+  int x;
+  for(y=0;y<_c_h;y++){
+    /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
+       window.*/
+    for(x=0;x<OC_MINI(_c_w,2);x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
+       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
+       _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
+    }
+    for(;x<_c_w-3;x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
+       114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
+    }
+    for(;x<_c_w;x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
+       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
+       _src[_c_w-1]+64)>>7,255);
+    }
+    _dst+=_c_w;
+    _src+=_c_w;
+  }
+}
+
+/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
+static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_w;
+  int c_h;
+  int c_sz;
+  int pli;
+  int y;
+  int x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  for(pli=1;pli<3;pli++){
+    y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h);
+    _dst+=c_sz;
+    _aux+=c_sz;
+  }
+}
+
+/*This format is only used for interlaced content, but is included for
+   completeness.
+
+  420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  420paldv chroma samples are sited like:
+  YR------Y-------YR------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YB------Y-------YB------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YR------Y-------YR------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YB------Y-------YB------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the site locations one quarter pixel (at
+   the chroma plane's resolution) to the right.
+  Then we use another filter to move the C_r location down one quarter pixel,
+   and the C_b location up one quarter pixel.*/
+static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+1)/2;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_sz=c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*First do the horizontal re-sampling.
+      This is the same as the mpeg2 case, except that after the horizontal
+       case, we need to apply a second vertical filter.*/
+    y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
+    _aux+=c_sz;
+    switch(pli){
+      case 1:{
+        /*Slide C_b up a quarter-pel.
+          This is the same filter used above, but in the other order.*/
+        for(x=0;x<c_w;x++){
+          for(y=0;y<OC_MINI(c_h,3);y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
+             -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
+             +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
+             +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h-2;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
+             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
+             -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
+             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
+             -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
+          }
+          _dst++;
+          tmp++;
+        }
+        _dst+=c_sz-c_w;
+        tmp-=c_w;
+      }break;
+      case 2:{
+        /*Slide C_r down a quarter-pel.
+          This is the same as the horizontal filter.*/
+        for(x=0;x<c_w;x++){
+          for(y=0;y<OC_MINI(c_h,2);y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
+             -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
+             +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
+             +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h-3;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
+             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
+             -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
+             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
+             -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
+          }
+          _dst++;
+          tmp++;
+        }
+      }break;
+    }
+    /*For actual interlaced material, this would have to be done separately on
+       each field, and the shift amounts would be different.
+      C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
+       C_b up 1/8 in the bottom field.
+      The corresponding filters would be:
+       Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
+       Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
+  }
+}
+
+/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
+  This is used as a helper by several converation routines.*/
+static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
+ const unsigned char *_src,int _c_w,int _c_h){
+  int y;
+  int x;
+  /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
+  for(x=0;x<_c_w;x++){
+    for(y=0;y<OC_MINI(_c_h,2);y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
+       +78*_src[OC_MINI(1,_c_h-1)*_c_w]
+       -17*_src[OC_MINI(2,_c_h-1)*_c_w]
+       +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
+    }
+    for(;y<_c_h-3;y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
+       -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
+       +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
+    }
+    for(;y<_c_h;y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
+       +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
+       +_src[OC_MINI(y+2,_c_h-1)*_c_w])
+       +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
+    }
+    _src++;
+    _dst++;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  422jpeg chroma samples are sited like:
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to decimate the chroma planes by two in the
+   vertical direction.*/
+static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_w;
+  int c_h;
+  int c_sz;
+  int dst_c_w;
+  int dst_c_h;
+  int dst_c_sz;
+  int tmp_sz;
+  int pic_sz;
+  int pli;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  for(pli=1;pli<3;pli++){
+    y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h);
+    _aux+=c_sz;
+    _dst+=dst_c_sz;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  422 chroma samples are sited like:
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the original site locations one quarter
+   pixel (at the original chroma resolution) to the right.
+  Then we use a second resampling filter to decimate the chroma planes by two
+   in the vertical direction.*/
+static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=c_w*dst_c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*In reality, the horizontal and vertical steps could be pipelined, for
+       less memory consumption and better cache performance, but we do them
+       separately for simplicity.*/
+    /*First do horizontal filtering (convert to 422jpeg)*/
+    y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h);
+    _aux+=c_sz;
+    _dst+=dst_c_sz;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  411 chroma samples are sited like:
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a filter to resample at site locations one eighth pixel (at the source
+   chroma plane's horizontal resolution) and five eighths of a pixel to the
+   right.
+  Then we use another filter to decimate the planes by 2 in the vertical
+   direction.*/
+static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            tmp_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  tmp_sz=dst_c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*In reality, the horizontal and vertical steps could be pipelined, for
+       less memory consumption and better cache performance, but we do them
+       separately for simplicity.*/
+    /*First do horizontal filtering (convert to 422jpeg)*/
+    for(y=0;y<c_h;y++){
+      /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
+         4-tap Mitchell window.*/
+      for(x=0;x<OC_MINI(c_w,1);x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
+         +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
+        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
+         +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
+      }
+      for(;x<c_w-2;x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
+         +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
+        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
+         +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
+      }
+      for(;x<c_w;x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
+         +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
+        if((x<<1|1)<dst_c_w){
+          tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
+           +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
+        }
+      }
+      tmp+=dst_c_w;
+      _aux+=c_w;
+    }
+    tmp-=tmp_sz;
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
+    _dst+=dst_c_sz;
+  }
+}
+
+/*Convert 444 to 420jpeg.*/
+static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            tmp_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  tmp_sz=dst_c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
+    for(y=0;y<c_h;y++){
+      for(x=0;x<OC_MINI(c_w,2);x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)]
+         -17*_aux[OC_MINI(2,c_w-1)]
+         +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255);
+      }
+      for(;x<c_w-3;x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3])
+         -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255);
+      }
+      for(;x<c_w;x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])-
+         17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+
+         78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255);
+      }
+      tmp+=dst_c_w;
+      _aux+=c_w;
+    }
+    tmp-=tmp_sz;
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
+    _dst+=dst_c_sz;
+  }
+}
+
+/*The image is padded with empty chroma components at 4:2:0.*/
+static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_sz;
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
+   ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
+  memset(_dst,128,c_sz*2);
+}
+
+/*No conversion function needed.*/
+static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+}
+
+int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
+  char buffer[80];
+  int  ret;
+  int  i;
+  /*Read until newline, or 80 cols, whichever happens first.*/
+  for(i=0;i<79;i++){
+    if(_nskip>0){
+      buffer[i]=*_skip++;
+      _nskip--;
+    }
+    else{
+      ret=fread(buffer+i,1,1,_fin);
+      if(ret<1)return -1;
+    }
+    if(buffer[i]=='\n')break;
+  }
+  /*We skipped too much header data.*/
+  if(_nskip>0)return -1;
+  if(i==79){
+    fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n");
+    return -1;
+  }
+  buffer[i]='\0';
+  if(memcmp(buffer,"YUV4MPEG",8)){
+    fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n");
+    return -1;
+  }
+  if(buffer[8]!='2'){
+    fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
+  }
+  ret=y4m_parse_tags(_y4m,buffer+5);
+  if(ret<0){
+    fprintf(stderr,"Error parsing YUV4MPEG2 header.\n");
+    return ret;
+  }
+  if(_y4m->interlace=='?'){
+    fprintf(stderr,"Warning: Input video interlacing format unknown; "
+     "assuming progressive scan.\n");
+  }
+  else if(_y4m->interlace!='p'){
+    fprintf(stderr,"Input video is interlaced; "
+     "Only progressive scan handled.\n");
+    return -1;
+  }
+  if(strcmp(_y4m->chroma_type,"420")==0||
+   strcmp(_y4m->chroma_type,"420jpeg")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h
+     +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    /*Natively supported: no conversion required.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
+    _y4m->convert=y4m_convert_null;
+  }
+  else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=
+     2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->convert=y4m_convert_42xmpeg2_42xjpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"420paldv")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->convert=y4m_convert_42xpaldv_42xjpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"422jpeg")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_422jpeg_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"422")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_422_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"411")==0){
+    _y4m->src_c_dec_h=4;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_411_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"444")==0){
+    _y4m->src_c_dec_h=1;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_444_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"444alpha")==0){
+    _y4m->src_c_dec_h=1;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.
+      The extra plane also gets read into the aux buf.
+      It will be discarded.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h;
+    _y4m->convert=y4m_convert_444_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"mono")==0){
+    _y4m->src_c_dec_h=_y4m->src_c_dec_v=0;
+    _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*No extra space required, but we need to clear the chroma planes.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
+    _y4m->convert=y4m_convert_mono_420jpeg;
+  }
+  else{
+    fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type);
+    return -1;
+  }
+  /*The size of the final frame buffers is always computed from the
+     destination chroma decimation type.*/
+  _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h
+   +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
+   ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
+  _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz);
+  _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz);
+  return 0;
+}
+
+void y4m_input_close(y4m_input *_y4m){
+  free(_y4m->dst_buf);
+  free(_y4m->aux_buf);
+}
+
+int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
+  char frame[6];
+  int  pic_sz;
+  int  frame_c_w;
+  int  frame_c_h;
+  int  c_w;
+  int  c_h;
+  int  c_sz;
+  int  ret;
+  /*Read and skip the frame header.*/
+  ret=fread(frame,1,6,_fin);
+  if(ret<6)return 0;
+  if(memcmp(frame,"FRAME",5)){
+    fprintf(stderr,"Loss of framing in Y4M input data\n");
+    return -1;
+  }
+  if(frame[5]!='\n'){
+    char c;
+    int  j;
+    for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++);
+    if(j==79){
+      fprintf(stderr,"Error parsing Y4M frame header\n");
+      return -1;
+    }
+  }
+  /*Read the frame data that needs no conversion.*/
+  if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){
+    fprintf(stderr,"Error reading Y4M frame data.\n");
+    return -1;
+  }
+  /*Read the frame data that does need conversion.*/
+  if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){
+    fprintf(stderr,"Error reading Y4M frame data.\n");
+    return -1;
+  }
+  /*Now convert the just read frame.*/
+  (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf);
+  /*Fill in the frame buffer pointers.
+    We don't use vpx_img_wrap() because it forces padding for odd picture
+     sizes, which would require a separate fread call for every row.*/
+  memset(_img,0,sizeof(*_img));
+  /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
+  _img->fmt=IMG_FMT_I420;
+  _img->w=_img->d_w=_y4m->pic_w;
+  _img->h=_img->d_h=_y4m->pic_h;
+  /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
+  _img->x_chroma_shift=1;
+  _img->y_chroma_shift=1;
+  _img->bps=12;
+  /*Set up the buffer pointers.*/
+  pic_sz=_y4m->pic_w*_y4m->pic_h;
+  c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  _img->stride[PLANE_Y]=_y4m->pic_w;
+  _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w;
+  _img->planes[PLANE_Y]=_y4m->dst_buf;
+  _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
+  _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
+  return 0;
+}
diff --git a/y4minput.h b/y4minput.h
new file mode 100644
index 0000000..1a01bcd
--- /dev/null
+++ b/y4minput.h
@@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ *
+ *  Based on code from the OggTheora software codec source code,
+ *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
+ */
+#if !defined(_y4minput_H)
+# define _y4minput_H (1)
+# include <stdio.h>
+# include "vpx/vpx_image.h"
+
+
+
+typedef struct y4m_input y4m_input;
+
+
+
+/*The function used to perform chroma conversion.*/
+typedef void (*y4m_convert_func)(y4m_input *_y4m,
+ unsigned char *_dst,unsigned char *_src);
+
+
+
+struct y4m_input{
+  int               pic_w;
+  int               pic_h;
+  int               fps_n;
+  int               fps_d;
+  int               par_n;
+  int               par_d;
+  char              interlace;
+  int               src_c_dec_h;
+  int               src_c_dec_v;
+  int               dst_c_dec_h;
+  int               dst_c_dec_v;
+  char              chroma_type[16];
+  /*The size of each converted frame buffer.*/
+  size_t            dst_buf_sz;
+  /*The amount to read directly into the converted frame buffer.*/
+  size_t            dst_buf_read_sz;
+  /*The size of the auxilliary buffer.*/
+  size_t            aux_buf_sz;
+  /*The amount to read into the auxilliary buffer.*/
+  size_t            aux_buf_read_sz;
+  y4m_convert_func  convert;
+  unsigned char    *dst_buf;
+  unsigned char    *aux_buf;
+};
+
+int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip);
+void y4m_input_close(y4m_input *_y4m);
+int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *img);
+
+#endif