| /* |
| * Copyright (C) 2008 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* ---- includes ----------------------------------------------------------- */ |
| |
| #include "b_TensorEm/Flt16Vec.h" |
| #include "b_BasicEm/Memory.h" |
| #include "b_BasicEm/Math.h" |
| #include "b_BasicEm/Functions.h" |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ auxiliary functions } ---------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ constructor / destructor } ----------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_init( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA ) |
| { |
| bbs_Int16Arr_init( cpA, &ptrA->arrE ); |
| ptrA->expE = 0; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_exit( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA ) |
| { |
| bbs_Int16Arr_exit( cpA, &ptrA->arrE ); |
| ptrA->expE = 0; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ operators } -------------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_copy( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA, |
| const struct bts_Flt16Vec* srcPtrA ) |
| { |
| bbs_Int16Arr_copy( cpA, &ptrA->arrE, &srcPtrA->arrE ); |
| ptrA->expE = srcPtrA->expE; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| flag bts_Flt16Vec_equal( struct bbs_Context* cpA, |
| const struct bts_Flt16Vec* ptrA, |
| const struct bts_Flt16Vec* srcPtrA ) |
| { |
| if( !bbs_Int16Arr_equal( cpA, &ptrA->arrE, &srcPtrA->arrE ) ) return FALSE; |
| if( ptrA->expE != srcPtrA->expE ) return FALSE; |
| return TRUE; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ query functions } -------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| int16 bts_Flt16Vec_avg( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) |
| { |
| uint16 iL; |
| uint16 sizeL = ptrA->arrE.sizeE; |
| int32 sumL = 0; |
| const int16* srcL = ptrA->arrE.arrPtrE; |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| sumL += srcL[ iL ]; |
| } |
| return sumL / ( int32 )sizeL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_Flt16Vec_norm( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) |
| { |
| return bbs_vecNorm16( ptrA->arrE.arrPtrE, ptrA->arrE.sizeE ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint16 bts_Flt16Vec_maxAbs( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA ) |
| { |
| uint16 iL; |
| uint16 sizeL = ptrA->arrE.sizeE; |
| uint16 maxL = 0; |
| const int16* srcL = ptrA->arrE.arrPtrE; |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| uint16 vL = srcL[ iL ] > 0 ? srcL[ iL ] : -srcL[ iL ]; |
| maxL = vL > maxL ? vL : maxL; |
| } |
| return maxL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ modify functions } ------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_create( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA, |
| uint32 sizeA, |
| struct bbs_MemSeg* mspA ) |
| { |
| bbs_Int16Arr_create( cpA, &ptrA->arrE, sizeA, mspA ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_size( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA, |
| uint32 sizeA ) |
| { |
| bbs_Int16Arr_size( cpA, &ptrA->arrE, sizeA ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ I/O } -------------------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_Flt16Vec_memSize( struct bbs_Context* cpA, |
| const struct bts_Flt16Vec *ptrA ) |
| { |
| return bbs_SIZEOF16( uint32 ) /* mem size */ |
| + bbs_Int16Arr_memSize( cpA, &ptrA->arrE ) |
| + bbs_SIZEOF16( ptrA->expE ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_Flt16Vec_memWrite( struct bbs_Context* cpA, |
| const struct bts_Flt16Vec* ptrA, |
| uint16* memPtrA ) |
| { |
| uint32 memSizeL = bts_Flt16Vec_memSize( cpA, ptrA ); |
| memPtrA += bbs_memWrite32( &memSizeL, memPtrA ); |
| memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->arrE, memPtrA ); |
| memPtrA += bbs_memWrite16( &ptrA->expE, memPtrA ); |
| return memSizeL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_Flt16Vec_memRead( struct bbs_Context* cpA, |
| struct bts_Flt16Vec* ptrA, |
| const uint16* memPtrA, |
| struct bbs_MemSeg* mspA ) |
| { |
| uint32 memSizeL; |
| if( bbs_Context_error( cpA ) ) return 0; |
| memPtrA += bbs_memRead32( &memSizeL, memPtrA ); |
| memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->arrE, memPtrA, mspA ); |
| memPtrA += bbs_memRead16( &ptrA->expE, memPtrA ); |
| |
| if( memSizeL != bts_Flt16Vec_memSize( cpA, ptrA ) ) |
| { |
| bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_Flt16Vec_memRead( const struct bts_Flt16Vec* ptrA, const void* memPtrA ):\n" |
| "size mismatch" ); |
| return 0; |
| } |
| return memSizeL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| /* */ |
| /* ---- \ghd{ exec functions } --------------------------------------------- */ |
| /* */ |
| /* ========================================================================= */ |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_maximizeMantisse( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) |
| { |
| uint32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA ); |
| int16 shlL = 0; |
| |
| if( maxAbsL == 0 ) return; /* cannot maximize 0 */ |
| |
| while( maxAbsL < 0x4000 ) |
| { |
| shlL++; |
| maxAbsL <<= 1; |
| } |
| |
| if( shlL > 0 ) |
| { |
| uint32 iL; |
| uint32 sizeL = ptrA->arrE.sizeE; |
| int16* dstL = ptrA->arrE.arrPtrE; |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] <<= shlL; |
| ptrA->expE -= shlL; |
| } |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| uint32 bts_Flt16Vec_maximizeAbsValue( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) |
| { |
| int32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA ); |
| int32 fL; |
| if( maxAbsL == 0 ) return 0; /* vector is zero */ |
| |
| fL = ( int32 )0x7FFF0000 / maxAbsL; |
| |
| { |
| uint32 iL; |
| uint32 sizeL = ptrA->arrE.sizeE; |
| int16* dstL = ptrA->arrE.arrPtrE; |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( int32 )dstL[ iL ] * fL + 32768 ) >> 16; |
| } |
| |
| return fL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_zeroAverage( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) |
| { |
| uint16 iL; |
| uint16 sizeL = ptrA->arrE.sizeE; |
| int16* dstL = ptrA->arrE.arrPtrE; |
| int16 avgL = bts_Flt16Vec_avg( cpA, ptrA ); |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] -= avgL; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_normalize( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) |
| { |
| uint32 normL = bts_Flt16Vec_norm( cpA, ptrA ); |
| |
| if( normL == 0 ) |
| { |
| /* vector is zero - do nothing */ |
| return; |
| } |
| else |
| { |
| int16* dstL = ptrA->arrE.arrPtrE; |
| uint16 iL; |
| uint16 sizeL = ptrA->arrE.sizeE; |
| int16 expL = 0; |
| int32 fL; |
| |
| /* let norm occupy 17 bits */ |
| if( ( normL & 0xFFFE0000 ) != 0 ) |
| { |
| while( ( ( normL >> -expL ) & 0xFFFE0000 ) != 0 ) expL--; |
| normL >>= -expL; |
| } |
| else |
| { |
| while( ( ( normL << expL ) & 0xFFFF0000 ) == 0 ) expL++; |
| normL <<= expL; |
| } |
| |
| /* fL is positive and occupies only 16 bits - a product with int16 fits in int32 */ |
| fL = ( uint32 )0xFFFFFFFF / normL; |
| |
| /* multiply with factor */ |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * fL ) >> 15 ) + 1 ) >> 1; |
| |
| /* set exponent */ |
| ptrA->expE = expL - 16; |
| } |
| /* |
| { |
| uint32 testNormL = bts_Flt16Vec_norm( cpA, ptrA ); |
| printf( "test norm %f\n", ( float )testNormL / ( 1 << -ptrA->expE ) ); |
| } |
| */ |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_setZero( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA ) |
| { |
| bbs_Int16Arr_fill( cpA, &ptrA->arrE, 0 ); |
| ptrA->expE = 0; |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_mul( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, int32 valA, int16 expA ) |
| { |
| int32 valL = valA; |
| int16 expL = expA; |
| |
| if( valL == 0 ) |
| { |
| bts_Flt16Vec_setZero( cpA, ptrA ); |
| return; |
| } |
| else |
| { |
| uint32 iL; |
| uint32 sizeL = ptrA->arrE.sizeE; |
| int16* dstL = ptrA->arrE.arrPtrE; |
| |
| /* adjust valL to maximum 16 bit accuracy */ |
| uint32 absValL = valL > 0 ? valL : -valL; |
| if( ( absValL & 0xFFFF8000 ) != 0 ) |
| { |
| int32 shrL = 0; |
| while( ( absValL & 0xFFFF8000 ) != 0 ) |
| { |
| absValL >>= 1; |
| shrL++; |
| } |
| |
| if( shrL > 0 ) |
| { |
| valL = ( ( valL >> ( shrL - 1 ) ) + 1 ) >> 1; |
| expL += shrL; |
| if( valL >= 0x08000 ) valL = 0x07FFF; /* saturate */ |
| } |
| } |
| else |
| { |
| int32 shlL = 0; |
| while( ( absValL & 0xFFFFC000 ) == 0 ) |
| { |
| absValL <<= 1; |
| shlL++; |
| } |
| |
| valL <<= shlL; |
| expL -= shlL; |
| } |
| |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * valL ) >> 15 ) + 1 ) >> 1; |
| } |
| ptrA->expE += expL + 16; |
| } |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* manPtrA, int32* expPtrA ) |
| { |
| bbs_DEF_fNameL( "void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* matPtrA, int32* expPtrA )" ) |
| uint16 iL; |
| uint16 sizeL = vp1A->arrE.sizeE; |
| const int16* arr1L = vp1A->arrE.arrPtrE; |
| const int16* arr2L = vp2A->arrE.arrPtrE; |
| int16 shrm1L = -1; /* shift minus 1 */ |
| int32 sumL; |
| |
| if( vp1A->arrE.sizeE != vp2A->arrE.sizeE ) |
| { |
| bbs_ERROR1( "%s:\nVectors have different size", fNameL ); |
| return; |
| } |
| |
| sumL = 0; |
| /* shrm1L == -1 */ |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| sumL += ( int32 )arr1L[ iL ] * ( int32 )arr2L[ iL ]; |
| if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break; |
| } |
| |
| if( iL < sizeL ) |
| { |
| /* danger of overflow: increase shift; adjust sum */ |
| shrm1L++; |
| sumL = ( ( sumL >> 1 ) + 1 ) >> 1; |
| |
| /* shrm1L == 0 */ |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| sumL += ( int32 )( ( arr1L[ iL ] + 1 ) >> 1 ) * ( int32 )( ( arr2L[ iL ] + 1 ) >> 1 ); |
| if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break; |
| } |
| |
| for( iL = 0; iL < sizeL; iL++ ) |
| { |
| if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) |
| { |
| /* danger of overflow: increase shift; adjust sum */ |
| shrm1L++; |
| sumL = ( ( sumL >> 1 ) + 1 ) >> 1; |
| } |
| |
| sumL += ( int32 )( ( ( arr1L[ iL ] >> shrm1L ) + 1 ) >> 1 ) * ( int32 )( ( ( arr2L[ iL ] >> shrm1L ) + 1 ) >> 1 ); |
| } |
| } |
| |
| if( manPtrA != NULL ) *manPtrA = sumL; |
| if( expPtrA != NULL ) *expPtrA = vp1A->expE + vp2A->expE + ( ( shrm1L + 1 ) << 1 ); |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| void bts_Flt16Vec_append( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, struct bts_Flt16Vec* srcPtrA ) |
| { |
| if( ptrA->arrE.sizeE == 0 ) |
| { |
| bts_Flt16Vec_copy( cpA, ptrA, srcPtrA ); |
| } |
| else |
| { |
| uint32 idxL = ptrA->arrE.sizeE; |
| bts_Flt16Vec_size( cpA, ptrA, idxL + srcPtrA->arrE.sizeE ); |
| |
| /* copy data */ |
| bbs_memcpy16( ptrA->arrE.arrPtrE + idxL, srcPtrA->arrE.arrPtrE, srcPtrA->arrE.sizeE ); |
| |
| /* equalize exponent */ |
| if( ptrA->expE > srcPtrA->expE ) |
| { |
| uint32 iL; |
| uint32 sizeL = srcPtrA->arrE.sizeE; |
| uint32 shrL = ptrA->expE - srcPtrA->expE; |
| int16* dstL = ptrA->arrE.arrPtrE + idxL; |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1; |
| } |
| else if( ptrA->expE < srcPtrA->expE ) |
| { |
| uint32 iL; |
| uint32 sizeL = idxL; |
| uint32 shrL = srcPtrA->expE - ptrA->expE; |
| int16* dstL = ptrA->arrE.arrPtrE; |
| for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1; |
| ptrA->expE = srcPtrA->expE; |
| } |
| } |
| } |
| |
| /* ------------------------------------------------------------------------- */ |
| |
| /* ========================================================================= */ |
| |