| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* ---- includes ----------------------------------------------------------- */ |
| |
| #include "b_TensorEm/CompactMat.h" |
| #include "b_TensorEm/Functions.h" |
| #include "b_BasicEm/Math.h" |
| #include "b_BasicEm/Functions.h" |
| #include "b_BasicEm/Memory.h" |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ auxiliary functions } ---------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /** Returns dot product of inVec with indexed row |
| The result is a floating point expresstion: |
| upper 16 bit: signed value |
| lower 16 bit: signed exponent |
| */ |
| int32 bts_CompactMat_fltDotPrdRow( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA, |
| const int16* inVecA, |
| uint32 inNormBitsA, |
| uint32 rowA ) |
| { |
| const int16* rowPtrL = ptrA->cpsArrE.arrPtrE + ptrA->wordsPerRowE * rowA; |
| |
| /* extract row-header info */ |
| uint32 offsL = *rowPtrL++; |
| uint32 sizeL = *rowPtrL++; |
| int32 factorManL = *rowPtrL++; |
| int32 factorExpL = *rowPtrL++; |
| uint32 rowNormBitsL = *rowPtrL++; |
| |
| /* consider possible overflow */ |
| uint16 overflowBitsL = ( inNormBitsA + rowNormBitsL >= 31 ) ? inNormBitsA + rowNormBitsL - 31 : 0; |
| |
| const int16* inPtrL = inVecA + offsL; |
| |
| count_t iL; |
| int32 sumL = 0; |
| |
| if( overflowBitsL == 0 ) /* raw dot product fits in int32 */ |
| { |
| switch( ptrA->bitsPerValueE ) |
| { |
| case 16: |
| { |
| for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ); |
| } |
| break; |
| |
| #ifndef HW_TMS320C5x /* platforms that don't have int8 must use the 'default' implementation */ |
| |
| case 8: |
| { |
| const uint16* dpL = ( uint16* )rowPtrL; |
| for( iL = sizeL; iL >= 8; iL -= 8 ) |
| { |
| sumL += ( ( int8 ) dpL[ 0 ] * ( int32 )inPtrL[ 0 ] ); |
| sumL += ( ( int8 )( dpL[ 0 ] >> 8 ) * ( int32 )inPtrL[ 1 ] ); |
| sumL += ( ( int8 ) dpL[ 1 ] * ( int32 )inPtrL[ 2 ] ); |
| sumL += ( ( int8 )( dpL[ 1 ] >> 8 ) * ( int32 )inPtrL[ 3 ] ); |
| sumL += ( ( int8 ) dpL[ 2 ] * ( int32 )inPtrL[ 4 ] ); |
| sumL += ( ( int8 )( dpL[ 2 ] >> 8 ) * ( int32 )inPtrL[ 5 ] ); |
| sumL += ( ( int8 ) dpL[ 3 ] * ( int32 )inPtrL[ 6 ] ); |
| sumL += ( ( int8 )( dpL[ 3 ] >> 8 ) * ( int32 )inPtrL[ 7 ] ); |
| dpL += 4; |
| inPtrL += 8; |
| } |
| for( ; iL >= 2; iL -= 2 ) |
| { |
| sumL += ( ( int8 ) *dpL * ( int32 )inPtrL[ 0 ] ); |
| sumL += ( ( int8 )( *dpL >> 8 ) * ( int32 )inPtrL[ 1 ] ); |
| dpL++; |
| inPtrL += 2; |
| } |
| if( iL > 0 ) |
| { |
| sumL += ( ( int8 )*dpL++ * ( int32 )inPtrL[ 0 ] ); |
| } |
| } |
| break; |
| |
| case 6: |
| { |
| const uint16* dpL = ( uint16* )rowPtrL; |
| for( iL = sizeL; iL >= 8; iL -= 8 ) |
| { |
| int32 lSumL = 0; |
| lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 8 ) & 0x00FC ) * ( int32 )inPtrL[ 7 ] ); |
| sumL += ( lSumL >> 2 ); |
| dpL += 3; |
| inPtrL += 8; |
| } |
| |
| { |
| int32 lSumL = 0; |
| if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 2 ) * ( int32 )inPtrL[ 0 ] ); |
| if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 4 ) & 0x00FC ) * ( int32 )inPtrL[ 1 ] ); |
| if( iL > 2 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 10 ) | ( dpL[ 1 ] << 6 ) ) & 0x00FC ) * ( int32 )inPtrL[ 2 ] ); |
| if( iL > 3 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] ) & 0x00FC ) * ( int32 )inPtrL[ 3 ] ); |
| if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00FC ) * ( int32 )inPtrL[ 4 ] ); |
| if( iL > 5 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 12 ) | ( dpL[ 2 ] << 4 ) ) & 0x00FC ) * ( int32 )inPtrL[ 5 ] ); |
| if( iL > 6 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 2 ) & 0x00FC ) * ( int32 )inPtrL[ 6 ] ); |
| sumL += ( lSumL >> 2 ); |
| } |
| } |
| break; |
| |
| case 5: |
| { |
| const uint16* dpL = ( uint16* )rowPtrL; |
| for( iL = sizeL; iL >= 16; iL -= 16 ) |
| { |
| int32 lSumL = 0; |
| lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] ); |
| lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] ); |
| lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 8 ) & 0x00F8 ) * ( int32 )inPtrL[ 15 ] ); |
| sumL += ( lSumL >> 3 ); |
| dpL += 5; |
| inPtrL += 16; |
| } |
| |
| { |
| int32 lSumL = 0; |
| if( iL > 0 ) lSumL += ( ( int8 ) ( dpL[ 0 ] << 3 ) * ( int32 )inPtrL[ 0 ] ); |
| if( iL > 1 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 1 ] ); |
| if( iL > 2 ) lSumL += ( ( int8 ) ( ( dpL[ 0 ] >> 7 ) & 0x00F8 ) * ( int32 )inPtrL[ 2 ] ); |
| if( iL > 3 ) lSumL += ( ( int8 ) ( ( ( dpL[ 0 ] >> 12 ) | ( dpL[ 1 ] << 4 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 3 ] ); |
| if( iL > 4 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 4 ] ); |
| if( iL > 5 ) lSumL += ( ( int8 ) ( ( dpL[ 1 ] >> 6 ) & 0x00F8 ) * ( int32 )inPtrL[ 5 ] ); |
| if( iL > 6 ) lSumL += ( ( int8 ) ( ( ( dpL[ 1 ] >> 11 ) | ( dpL[ 2 ] << 5 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 6 ] ); |
| if( iL > 7 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] ) & 0x00F8 ) * ( int32 )inPtrL[ 7 ] ); |
| if( iL > 8 ) lSumL += ( ( int8 ) ( ( dpL[ 2 ] >> 5 ) & 0x00F8 ) * ( int32 )inPtrL[ 8 ] ); |
| if( iL > 9 ) lSumL += ( ( int8 ) ( ( ( dpL[ 2 ] >> 10 ) | ( dpL[ 3 ] << 6 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 9 ] ); |
| if( iL > 10 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] << 1 ) & 0x00F8 ) * ( int32 )inPtrL[ 10 ] ); |
| if( iL > 11 ) lSumL += ( ( int8 ) ( ( dpL[ 3 ] >> 4 ) & 0x00F8 ) * ( int32 )inPtrL[ 11 ] ); |
| if( iL > 12 ) lSumL += ( ( int8 ) ( ( ( dpL[ 3 ] >> 9 ) | ( dpL[ 4 ] << 7 ) ) & 0x00F8 ) * ( int32 )inPtrL[ 12 ] ); |
| if( iL > 13 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] << 2 ) & 0x00F8 ) * ( int32 )inPtrL[ 13 ] ); |
| if( iL > 14 ) lSumL += ( ( int8 ) ( ( dpL[ 4 ] >> 3 ) & 0x00F8 ) * ( int32 )inPtrL[ 14 ] ); |
| sumL += ( lSumL >> 3 ); |
| } |
| } |
| break; |
| |
| case 4: |
| { |
| for( iL = sizeL; iL >= 4; iL -= 4 ) |
| { |
| uint16 v1L = *rowPtrL++; |
| int32 lSumL = 0; |
| lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] ); |
| lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] ); |
| lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] ); |
| lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] ); |
| inPtrL += 4; |
| sumL += ( lSumL >> 4 ); |
| } |
| { |
| uint16 v1L = *rowPtrL++; |
| int32 lSumL = 0; |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] ); |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] ); |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] ); |
| sumL += ( lSumL >> 4 ); |
| } |
| } |
| break; |
| |
| #endif /*ifndef HW_TMS320C5x*/ |
| |
| /* The default case can process all bit sizes including those that are explicitly encoded above |
| * Use the default for all bit sizes when the platform cannot handle the int8 data type (e.g. HW_TMS320C5x) |
| */ |
| default: |
| { |
| uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16; |
| uint32 bitsL = ptrA->bitsPerValueE; |
| uint16 adjL = 16 - bitsL; |
| uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL; |
| uint32 srL = bitsL; |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| if( srL > 16 ) |
| { |
| bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 ); |
| srL -= 16; |
| } |
| sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] ) >> adjL; |
| srL += bitsL; |
| } |
| } |
| } |
| } |
| else /* raw dot product does not fit in int32 */ |
| { |
| int32 roundL = 1 << ( overflowBitsL - 1 ); |
| switch( ptrA->bitsPerValueE ) |
| { |
| case 16: |
| { |
| for( iL = sizeL; iL > 0; iL-- ) sumL += ( ( ( int32 )*rowPtrL++ * ( int32 )*inPtrL++ ) + roundL ) >> overflowBitsL; |
| } |
| break; |
| |
| case 8: |
| { |
| for( iL = sizeL; iL >= 2; iL -= 2 ) |
| { |
| uint16 v1L = *rowPtrL++; |
| int32 lSumL = ( ( int8 ) v1L * ( int32 )inPtrL[ 0 ] ) |
| + ( ( int8 )( v1L >> 8 ) * ( int32 )inPtrL[ 1 ] ); |
| sumL += ( lSumL + roundL ) >> overflowBitsL; |
| inPtrL += 2; |
| } |
| if( iL > 0 ) |
| { |
| sumL += ( ( ( int8 )*rowPtrL++ * ( int32 )inPtrL[ 0 ] ) + roundL ) >> overflowBitsL; |
| } |
| } |
| break; |
| |
| case 4: |
| { |
| for( iL = sizeL; iL >= 4; iL -= 4 ) |
| { |
| uint16 v1L = *rowPtrL++; |
| int32 lSumL = 0; |
| lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] ); |
| lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] ); |
| lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] ); |
| lSumL += ( ( int8 )( ( v1L >> 8 ) & 0xF0 ) * ( int32 )inPtrL[ 3 ] ); |
| inPtrL += 4; |
| sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL; |
| } |
| { |
| uint16 v1L = *rowPtrL++; |
| int32 lSumL = 0; |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L << 4 ) ) * ( int32 )inPtrL[ 0 ] ); |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L ) & 0xF0 ) * ( int32 )inPtrL[ 1 ] ); |
| if( iL-- > 0 ) lSumL += ( ( int8 )( ( v1L >> 4 ) & 0xF0 ) * ( int32 )inPtrL[ 2 ] ); |
| sumL += ( ( lSumL >> 4 ) + roundL ) >> overflowBitsL; |
| } |
| } |
| break; |
| |
| default: |
| { |
| uint32 bfL = ( ( uint32 )*rowPtrL++ ) << 16; |
| uint32 bitsL = ptrA->bitsPerValueE; |
| uint16 adjL = 16 - bitsL; |
| uint32 mkL = ( ( 1 << bitsL ) - 1 ) << adjL; |
| uint32 srL = bitsL; |
| int32 lRoundL = roundL << adjL; |
| int32 lAdjL = overflowBitsL + adjL; |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| if( srL > 16 ) |
| { |
| bfL = ( ( ( uint32 )*rowPtrL++ ) << 16 ) | ( bfL >> 16 ); |
| srL -= 16; |
| } |
| sumL += ( ( int16 )( ( bfL >> srL ) & mkL ) * ( int32 )inPtrL[ iL ] + lRoundL ) >> lAdjL; |
| srL += bitsL; |
| } |
| } |
| } |
| } |
| |
| /* compute result */ |
| { |
| int32 resultManL; |
| int32 resultExpL; |
| int32 resultLogL; |
| bbs_mulS32( sumL, factorManL, &resultManL, &resultExpL ); |
| resultExpL += factorExpL + overflowBitsL; |
| resultLogL = bbs_intLog2( resultManL > 0 ? resultManL : -resultManL ); |
| if( resultLogL < 30 ) |
| { |
| resultManL <<= 30 - resultLogL; |
| resultExpL -= 30 - resultLogL; |
| } |
| |
| resultManL = ( ( resultManL >> 15 ) + 1 ) >> 1; |
| resultExpL = resultExpL + 16; |
| |
| return ( ( resultManL & 0x0000FFFF ) << 16 ) | ( resultExpL & 0x0000FFFF ); |
| } |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ constructor / destructor } ----------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_CompactMat_init( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA ) |
| { |
| ptrA->widthE = 0; |
| ptrA->heightE = 0; |
| ptrA->bitsPerValueE = 0; |
| ptrA->wordsPerRowE = 0; |
| ptrA->maxRowBitsE = 0; |
| bbs_Int16Arr_init( cpA, &ptrA->cpsArrE ); |
| bbs_Int16Arr_init( cpA, &ptrA->expArrE ); |
| |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_CompactMat_exit( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA ) |
| { |
| ptrA->widthE = 0; |
| ptrA->heightE = 0; |
| ptrA->bitsPerValueE = 0; |
| ptrA->wordsPerRowE = 0; |
| ptrA->maxRowBitsE = 0; |
| bbs_Int16Arr_exit( cpA, &ptrA->cpsArrE ); |
| bbs_Int16Arr_exit( cpA, &ptrA->expArrE ); |
| } |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ operators } -------------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ query functions } -------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ modify functions } ------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_CompactMat_create( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA, |
| uint32 widthA, |
| uint32 heightA, |
| uint32 bitsA, |
| uint32 maxRowSizeA, |
| struct bbs_MemSeg* mspA ) |
| { |
| if( bbs_Context_error( cpA ) ) return; |
| if( bitsA < 2 || bitsA > 16 ) |
| { |
| bbs_ERROR0( "bts_CompactMat_create:\nbitsA must be between 2 and 16" ); |
| return; |
| } |
| |
| ptrA->widthE = widthA; |
| ptrA->heightE = heightA; |
| ptrA->bitsPerValueE = bitsA; |
| ptrA->wordsPerRowE = 6 /*header + 1*/ + ( ( maxRowSizeA * bitsA ) / ( 8 * sizeof( short ) ) ); |
| ptrA->maxRowBitsE = 0; |
| if( ( ptrA->wordsPerRowE & 1 ) != 0 ) ptrA->wordsPerRowE++; |
| bbs_Int16Arr_create( cpA, &ptrA->cpsArrE, heightA * ptrA->wordsPerRowE, mspA ); |
| bbs_Int16Arr_fill( cpA, &ptrA->cpsArrE, 0 ); |
| bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA ); |
| bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_CompactMat_copy( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA, |
| const struct bts_CompactMat* srcPtrA ) |
| { |
| ptrA->widthE = srcPtrA->widthE; |
| ptrA->heightE = srcPtrA->heightE; |
| ptrA->bitsPerValueE = srcPtrA->bitsPerValueE; |
| ptrA->wordsPerRowE = srcPtrA->wordsPerRowE; |
| ptrA->maxRowBitsE = srcPtrA->maxRowBitsE; |
| bbs_Int16Arr_copy( cpA, &ptrA->cpsArrE, &srcPtrA->cpsArrE ); |
| bbs_Int16Arr_size( cpA, &ptrA->expArrE, ptrA->heightE ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ I/O } -------------------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_CompactMat_memSize( struct bbs_Context* cpA, |
| const struct bts_CompactMat *ptrA ) |
| { |
| return bbs_SIZEOF16( uint32 ) |
| + bbs_SIZEOF16( uint32 ) /* version */ |
| + bbs_SIZEOF16( ptrA->widthE ) |
| + bbs_SIZEOF16( ptrA->heightE ) |
| + bbs_SIZEOF16( ptrA->bitsPerValueE ) |
| + bbs_SIZEOF16( ptrA->wordsPerRowE ) |
| + bbs_SIZEOF16( ptrA->maxRowBitsE ) |
| + bbs_Int16Arr_memSize( cpA, &ptrA->cpsArrE ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_CompactMat_memWrite( struct bbs_Context* cpA, |
| const struct bts_CompactMat* ptrA, |
| uint16* memPtrA ) |
| { |
| uint32 memSizeL = bts_CompactMat_memSize( cpA, ptrA ); |
| memPtrA += bbs_memWrite32( &memSizeL, memPtrA ); |
| memPtrA += bbs_memWriteUInt32( bts_COMPACT_MAT_VERSION, memPtrA ); |
| memPtrA += bbs_memWrite32( &ptrA->widthE, memPtrA ); |
| memPtrA += bbs_memWrite32( &ptrA->heightE, memPtrA ); |
| memPtrA += bbs_memWrite32( &ptrA->bitsPerValueE, memPtrA ); |
| memPtrA += bbs_memWrite32( &ptrA->wordsPerRowE, memPtrA ); |
| memPtrA += bbs_memWrite32( &ptrA->maxRowBitsE, memPtrA ); |
| memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->cpsArrE, memPtrA ); |
| return memSizeL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_CompactMat_memRead( struct bbs_Context* cpA, |
| struct bts_CompactMat* ptrA, |
| const uint16* memPtrA, |
| struct bbs_MemSeg* mspA ) |
| { |
| uint32 memSizeL, versionL; |
| if( bbs_Context_error( cpA ) ) return 0; |
| memPtrA += bbs_memRead32( &memSizeL, memPtrA ); |
| memPtrA += bbs_memReadVersion32( cpA, &versionL, bts_COMPACT_MAT_VERSION, memPtrA ); |
| memPtrA += bbs_memRead32( &ptrA->widthE, memPtrA ); |
| memPtrA += bbs_memRead32( &ptrA->heightE, memPtrA ); |
| memPtrA += bbs_memRead32( &ptrA->bitsPerValueE, memPtrA ); |
| memPtrA += bbs_memRead32( &ptrA->wordsPerRowE, memPtrA ); |
| memPtrA += bbs_memRead32( &ptrA->maxRowBitsE, memPtrA ); |
| memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->cpsArrE, memPtrA, mspA ); |
| |
| if( memSizeL != bts_CompactMat_memSize( cpA, ptrA ) ) |
| { |
| bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_CompactMat_memRead( const struct bts_CompactMat* ptrA, const void* memPtrA ):\n" |
| "size mismatch" ); |
| } |
| |
| bbs_Int16Arr_create( cpA, &ptrA->expArrE, ptrA->heightE, mspA ); |
| bbs_Int16Arr_fill( cpA, &ptrA->expArrE, 0 ); |
| |
| return memSizeL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ exec functions } --------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_CompactMat_map( struct bbs_Context* cpA, |
| const struct bts_CompactMat* ptrA, |
| const int16* inVecA, |
| int16* outVecA, |
| int16* outExpPtrA ) |
| { |
| uint32 inNormBitsL = bbs_intLog2( bbs_vecNorm16( inVecA, ptrA->widthE ) ) + 1; |
| uint32 iL; |
| |
| int16* expArrL = ( ( struct bts_CompactMat* )ptrA )->expArrE.arrPtrE; |
| int16 maxExpL = -32767; |
| |
| for( iL = 0; iL < ptrA->heightE; iL++ ) |
| { |
| int32 fltL = bts_CompactMat_fltDotPrdRow( cpA, ( struct bts_CompactMat* )ptrA, inVecA, inNormBitsL, iL ); |
| outVecA[ iL ] = fltL >> 16; |
| expArrL[ iL ] = fltL & 0x0000FFFF; |
| |
| maxExpL = ( expArrL[ iL ] > maxExpL ) ? expArrL[ iL ] : maxExpL; |
| } |
| |
| if( outExpPtrA != NULL ) *outExpPtrA = maxExpL; |
| |
| for( iL = 0; iL < ptrA->heightE; iL++ ) |
| { |
| int32 shrL = maxExpL - expArrL[ iL ]; |
| if( shrL > 0 ) |
| { |
| outVecA[ iL ] = ( ( outVecA[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1; |
| } |
| } |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| |