blob: 9fb67613df62c37404324c50f94fbf40ee10e532 [file] [log] [blame]
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "variance.h"
#include "onyx_int.h"
SADFunction *vp8_sad16x16;
SADFunction *vp8_sad16x8;
SADFunction *vp8_sad8x16;
SADFunction *vp8_sad8x8;
SADFunction *vp8_sad4x4;
variance_function *vp8_variance4x4;
variance_function *vp8_variance8x8;
variance_function *vp8_variance8x16;
variance_function *vp8_variance16x8;
variance_function *vp8_variance16x16;
variance_function *vp8_mse16x16;
sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
int (*vp8_block_error)(short *, short *);
int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride);
extern int vp8_block_error_c(short *, short *);
extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc);
extern int vp8_block_error_mmx(short *, short *);
extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc);
extern int vp8_block_error_xmm(short *, short *);
extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc);
int (*vp8_mbuverror)(MACROBLOCK *mb);
unsigned int (*vp8_get_mb_ss)(short *);
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// c imports
extern int vp8_mbuverror_c(MACROBLOCK *mb);
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch);
extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch);
extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
extern SADFunction vp8_sad16x16_c;
extern SADFunction vp8_sad16x8_c;
extern SADFunction vp8_sad8x16_c;
extern SADFunction vp8_sad8x8_c;
extern SADFunction vp8_sad4x4_c;
extern SADFunction vp8_sad16x16_wmt;
extern SADFunction vp8_sad16x8_wmt;
extern SADFunction vp8_sad8x16_wmt;
extern SADFunction vp8_sad8x8_wmt;
extern SADFunction vp8_sad4x4_wmt;
extern SADFunction vp8_sad16x16_mmx;
extern SADFunction vp8_sad16x8_mmx;
extern SADFunction vp8_sad8x16_mmx;
extern SADFunction vp8_sad8x8_mmx;
extern SADFunction vp8_sad4x4_mmx;
extern variance_function vp8_variance16x16_c;
extern variance_function vp8_variance8x16_c;
extern variance_function vp8_variance16x8_c;
extern variance_function vp8_variance8x8_c;
extern variance_function vp8_variance4x4_c;
extern variance_function vp8_mse16x16_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c;
extern unsigned int vp8_get_mb_ss_c(short *);
extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// mmx imports
extern int vp8_mbuverror_mmx(MACROBLOCK *mb);
extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d);
extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch);
extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch);
extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch);
extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch);
extern variance_function vp8_variance4x4_mmx;
extern variance_function vp8_variance8x8_mmx;
extern variance_function vp8_variance8x16_mmx;
extern variance_function vp8_variance16x8_mmx;
extern variance_function vp8_variance16x16_mmx;
extern variance_function vp8_mse16x16_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx;
extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get_mb_ss_mmx(short *);
extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// wmt imports
extern int vp8_mbuverror_xmm(MACROBLOCK *mb);
extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d);
extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch);
extern variance_function vp8_variance4x4_wmt;
extern variance_function vp8_variance8x8_wmt;
extern variance_function vp8_variance8x16_wmt;
extern variance_function vp8_variance16x8_wmt;
extern variance_function vp8_variance16x16_wmt;
extern variance_function vp8_mse16x16_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt;
extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt;
extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr);
extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
void vp8_cmachine_specific_config(void)
{
int mmx_enabled;
int xmm_enabled;
int wmt_enabled;
vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
if (wmt_enabled) // Willamette
{
// Willamette instruction set available:
vp8_mbuverror = vp8_mbuverror_xmm;
/* The sse quantizer has not been updated to match the new exact
* quantizer introduced in commit e04e2935
*/
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
#if 0 //new fdct
vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
vp8_fast_fdct4x4 = vp8_short_fdct4x4_mmx;
vp8_fast_fdct8x4 = vp8_short_fdct8x4_wmt;
#else
vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
vp8_fast_fdct4x4 = vp8_short_fdct4x4_c;
vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c;
#endif
vp8_subtract_b = vp8_subtract_b_mmx;
vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
vp8_variance4x4 = vp8_variance4x4_mmx;
vp8_variance8x8 = vp8_variance8x8_mmx;
vp8_variance8x16 = vp8_variance8x16_wmt;
vp8_variance16x8 = vp8_variance16x8_wmt;
vp8_variance16x16 = vp8_variance16x16_wmt;
vp8_mse16x16 = vp8_mse16x16_wmt;
vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
vp8_get_mb_ss = vp8_get_mb_ss_sse2;
vp8_get16x16pred_error = vp8_get16x16pred_error_sse2;
vp8_get8x8var = vp8_get8x8var_sse2;
vp8_get16x16var = vp8_get16x16var_sse2;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
vp8_sad16x16 = vp8_sad16x16_wmt;
vp8_sad16x8 = vp8_sad16x8_wmt;
vp8_sad8x16 = vp8_sad8x16_wmt;
vp8_sad8x8 = vp8_sad8x8_wmt;
vp8_sad4x4 = vp8_sad4x4_wmt;
vp8_block_error = vp8_block_error_xmm;
vp8_mbblock_error = vp8_mbblock_error_xmm;
vp8_subtract_mby = vp8_subtract_mby_mmx;
}
else if (mmx_enabled)
{
// MMX instruction set available:
vp8_mbuverror = vp8_mbuverror_mmx;
/* The mmx quantizer has not been updated to match the new exact
* quantizer introduced in commit e04e2935
*/
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
#if 0 // new fdct
vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
vp8_fast_fdct4x4 = vp8_short_fdct4x4_mmx;
vp8_fast_fdct8x4 = vp8_short_fdct8x4_mmx;
#else
vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
vp8_fast_fdct4x4 = vp8_short_fdct4x4_c;
vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c;
#endif
vp8_subtract_b = vp8_subtract_b_mmx;
vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
vp8_variance4x4 = vp8_variance4x4_mmx;
vp8_variance8x8 = vp8_variance8x8_mmx;
vp8_variance8x16 = vp8_variance8x16_mmx;
vp8_variance16x8 = vp8_variance16x8_mmx;
vp8_variance16x16 = vp8_variance16x16_mmx;
vp8_mse16x16 = vp8_mse16x16_mmx;
vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
vp8_get_mb_ss = vp8_get_mb_ss_mmx;
vp8_get16x16pred_error = vp8_get16x16pred_error_mmx;
vp8_get8x8var = vp8_get8x8var_mmx;
vp8_get16x16var = vp8_get16x16var_mmx;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
vp8_sad16x16 = vp8_sad16x16_mmx;
vp8_sad16x8 = vp8_sad16x8_mmx;
vp8_sad8x16 = vp8_sad8x16_mmx;
vp8_sad8x8 = vp8_sad8x8_mmx;
vp8_sad4x4 = vp8_sad4x4_mmx;
vp8_block_error = vp8_block_error_mmx;
vp8_mbblock_error = vp8_mbblock_error_mmx;
vp8_subtract_mby = vp8_subtract_mby_mmx;
}
else
{
// Pure C:
vp8_mbuverror = vp8_mbuverror_c;
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
vp8_fast_fdct4x4 = vp8_short_fdct4x4_c;
vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c;
vp8_subtract_b = vp8_subtract_b_c;
vp8_subtract_mbuv = vp8_subtract_mbuv_c;
vp8_variance4x4 = vp8_variance4x4_c;
vp8_variance8x8 = vp8_variance8x8_c;
vp8_variance8x16 = vp8_variance8x16_c;
vp8_variance16x8 = vp8_variance16x8_c;
vp8_variance16x16 = vp8_variance16x16_c;
vp8_mse16x16 = vp8_mse16x16_c;
vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
vp8_get_mb_ss = vp8_get_mb_ss_c;
vp8_get16x16pred_error = vp8_get16x16pred_error_c;
vp8_get8x8var = vp8_get8x8var_c;
vp8_get16x16var = vp8_get16x16var_c;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
vp8_sad16x16 = vp8_sad16x16_c;
vp8_sad16x8 = vp8_sad16x8_c;
vp8_sad8x16 = vp8_sad8x16_c;
vp8_sad8x8 = vp8_sad8x8_c;
vp8_sad4x4 = vp8_sad4x4_c;
vp8_block_error = vp8_block_error_c;
vp8_mbblock_error = vp8_mbblock_error_c;
vp8_subtract_mby = vp8_subtract_mby_c;
}
}