可能是最快的算法alpha blend汇编源代码.docx

上传人:b****6 文档编号:7290379 上传时间:2023-01-22 格式:DOCX 页数:13 大小:18.77KB
下载 相关 举报
可能是最快的算法alpha blend汇编源代码.docx_第1页
第1页 / 共13页
可能是最快的算法alpha blend汇编源代码.docx_第2页
第2页 / 共13页
可能是最快的算法alpha blend汇编源代码.docx_第3页
第3页 / 共13页
可能是最快的算法alpha blend汇编源代码.docx_第4页
第4页 / 共13页
可能是最快的算法alpha blend汇编源代码.docx_第5页
第5页 / 共13页
点击查看更多>>
下载资源
资源描述

可能是最快的算法alpha blend汇编源代码.docx

《可能是最快的算法alpha blend汇编源代码.docx》由会员分享,可在线阅读,更多相关《可能是最快的算法alpha blend汇编源代码.docx(13页珍藏版)》请在冰豆网上搜索。

可能是最快的算法alpha blend汇编源代码.docx

可能是最快的算法alphablend汇编源代码

可能是最快的算法alphablend汇编源代码,Intel官方提供

Intel官方网站有一个ablend_565的快速汇编算法,理论上是是把一块32bitRGBA渲染到16bit的buffer上,我的机器是PIII800,函数在systemmenory中进行,640*480的256级alphablending,达到100fps,我想可以满足绝大部分的要求了,在这里,我提供了这个算法的应用,希望可以对大家有所帮助。

ablend_565函数,源代码可以直接编译使用,无需其他库函数,感谢intel提供这么好的东西。

首先,我提供一些本人编写的把32bittga文件读入pRGBABuffer的函数

文件尺寸保存在width,height

//-----------------------------------------------------------------------

//Name:

LoadTgaFile(TCHAR*strPathname,DWORD**pRGBABuffer,long*width,long*height)

//Desc:

读取32bittga文件到DWORD缓冲里,返回其尺寸

//Time:

2002.06.2200:

36

//Author:

RealRender

//Para:

//Return:

//Note:

这段代码来自directx7.0sample中的d3dtextr.cpp,我把他提取了出来

//方便使用

//-----------------------------------------------------------------------

BOOLLoadTgaFile(TCHAR*strPathname,DWORD**pRGBABuffer,long*width,long*height)

{

FILE*file=fopen(strPathname,"rb");

if(NULL==file)

returnfalse;

structTargaHeader

{

BYTEIDLength;

BYTEColormapType;

BYTEImageType;

BYTEColormapSpecification[5];

WORDXOrigin;

WORDYOrigin;

WORDImageWidth;

WORDImageHeight;

BYTEPixelDepth;

BYTEImageDescriptor;

}tga;

fread(&tga,sizeof(TargaHeader),1,file);

//Onlytruecolor,non-mappedimagesaresupported

if((0!

=tga.ColormapType)||

(tga.ImageType!

=10&&tga.ImageType!

=2))

{

fclose(file);

returnfalse;

}

//SkiptheIDfield.Thefirstbyteoftheheaderisthelengthofthisfield

if(tga.IDLength)

fseek(file,tga.IDLength,SEEK_CUR);

DWORDm_dwWidth=tga.ImageWidth;

DWORDm_dwHeight=tga.ImageHeight;

DWORDm_dwBPP=tga.PixelDepth;

DWORD*m_pRGBAData=newDWORD[m_dwWidth*m_dwHeight];

if(m_pRGBAData==NULL)

{

fclose(file);

returnfalse;

}

for(DWORDy=0;y

{

DWORDdwOffset=y*m_dwWidth;

if(0==(tga.ImageDescriptor&0x0010))

dwOffset=(m_dwHeight-y-1)*m_dwWidth;

for(DWORDx=0;x

{

if(tga.ImageType==10)

{

BYTEPacketInfo=getc(file);

WORDPacketType=0x80&PacketInfo;

WORDPixelCount=(0x007f&PacketInfo)+1;

if(PacketType)

{

DWORDb=getc(file);

DWORDg=getc(file);

DWORDr=getc(file);

DWORDa=0xff;

if(m_dwBPP==32)

a=getc(file);

while(PixelCount--)

{

m_pRGBAData[dwOffset+x]=(r<<24L)+(g<<16L)+(b<<8L)+(a);

x++;

}

}

else

{

while(PixelCount--)

{

BYTEb=getc(file);

BYTEg=getc(file);

BYTEr=getc(file);

BYTEa=0xff;

if(m_dwBPP==32)

a=getc(file);

m_pRGBAData[dwOffset+x]=(r<<24L)+(g<<16L)+(b<<8L)+(a);

x++;

}

}

}

else

{

BYTEb=getc(file);

BYTEg=getc(file);

BYTEr=getc(file);

BYTEa=0xff;

if(m_dwBPP==32)

a=getc(file);

m_pRGBAData[dwOffset+x]=(r<<24L)+(g<<16L)+(b<<8L)+(a);

x++;

}

}

}

fclose(file);

//Checkforalphacontent

for(DWORDi=0;i<(m_dwWidth*m_dwHeight);i++)

{

if(m_pRGBAData[i]&0x000000ff!

=0xff)

{

//m_bHasAlpha=TRUE;

break;

}

}

*pRGBABuffer=m_pRGBAData;

*width=m_dwWidth;

*height=m_dwHeight;

returntrue;

}

把32bitbuffer分割为rgb和alpha的代码。

注意,分割后的pBitmap一定要是8字节对齐,这是优化的一个重要条件,所以,我的算法中:

BYTE*p=newBYTE[lSize*2+8];

BYTE*pOrig=p;

p+=(DWORD)p%8;

WORD*color=(WORD*)p;

这是不规范的写法,把指针强行改变为8位对齐,实际使用的时候,要记住释放的原始指针不是p,而是pOrig,在这里,我没有释放分配的内存,请谅解。

//-----------------------------------------------------------------------

//Name:

SplitRGBA(DWORD*pRGBABuffer,LPBYTE*pAlpha,LPWORD*pBitmap,longlWidth,longlHeight)

//Desc:

//Time:

2002.06.2200:

36

//Author:

RealRender

//Para:

//Return:

//Note:

把从32bit的缓冲建立16bit的565缓冲和8bit的alpha通道

//-----------------------------------------------------------------------

voidSplitRGBA(DWORD*pRGBABuffer,LPBYTE*pAlpha,LPWORD*pBitmap,longlWidth,longlHeight)

{

longlSize=lWidth*lHeight;

BYTE*alpha=newBYTE[lSize];

BYTE*p=newBYTE[lSize*2+8];

//强行转换为8字节对齐

p+=(DWORD)p%8;

WORD*color=(WORD*)p;

DWORDdwPixel;

DWORDr,g,b,a;

for(inti=0;i

{

dwPixel=pRGBABuffer[i];

r=((dwPixel>>24)&0x000000ff);

g=((dwPixel>>16)&0x000000ff);

b=((dwPixel>>8)&0x000000ff);

a=((dwPixel>>0)&0x000000ff);

alpha[i]=a;

//888i转化为565

color[i]=RGBTo16(r,g,b);

}

*pAlpha=alpha;

*pBitmap=color;

}

//

这个视intel官方提供的函数,函数的描述,用我的话来说就是把一个带有256级alpha通道的565颜色数据绘制到16位目标页面。

函数说明:

unsignedchar*lpAlpha,//256级alpha通道

unsignedintiAlpPitch,//alpha通道的pitch

unsignedchar*lpSrc,//原色彩缓冲

unsignedintiSrcX,//

unsignedintiSrcY,//原色彩位置

unsignedintiSrcPitch,//原色彩pitch

unsignedchar*lpDst,//目标缓冲

unsignedintiDstX,

unsignedintiDstY,//目标位置

unsignedintiDstW,

unsignedintiDstH,//目标缓冲的尺寸

unsignedintiDstPitch//目标缓冲的pitch

voidablend_565(unsignedchar*lpAlpha,unsignedintiAlpPitch,

unsignedchar*lpSrc,unsignedintiSrcX,unsignedintiSrcY,

unsignedintiSrcPitch,unsignedchar*lpDst,

unsignedintiDstX,unsignedintiDstY,

unsignedintiDstW,unsignedintiDstH,

unsignedintiDstPitch)

{

//Maskforisolatingthered,green,andbluecomponents

static__int64MASKB=0x001F001F001F001F;

static__int64MASKG=0x07E007E007E007E0;

static__int64MASKSHIFTG=0x03F003F003F003F0;

static__int64MASKR=0xF800F800F800F800;

//constantsusedbytheintegeralphablendingequation

static__int64SIXTEEN=0x0010001000100010;

static__int64FIVETWELVE=0x0200020002000200;

static__int64SIXONES=0x003F003F003F003F;

unsignedchar*lpLinearDstBp=(iDstX<<1)+(iDstY*iDstPitch)+lpDst;//basepointerforlineardestination

unsignedchar*lpLinearSrcBp=(iSrcX<<1)+(iSrcY*iSrcPitch)+lpSrc;//basepointerforlinearsource

unsignedchar*lpLinearAlpBp=iSrcX+(iSrcY*iAlpPitch)+lpAlpha;//basepointerforlinearalpha

_asm{

movesi,lpLinearSrcBp;//src

movedi,lpLinearDstBp;//dst

moveax,lpLinearAlpBp;//alpha

movecx,iDstH;//ecx=numberoflinestocopy

movebx,iDstW;//ebx=spanwidthtocopy

testesi,6;//checkifsourceaddressisqwordaligned

//sinceaddrcominginisalwayswordaligned(16bit)

jnzdone;//ifnotqwordalignedwedon'tdoanything

primeloop:

movdmm1,[eax];//mm1=00000000a3a2a1a0

pxormm2,mm2;//mm2=0;

movqmm4,[esi];//g1:

mm4=src3src2src1src0

punpcklbwmm1,mm2;//mm1=00a300a200a100a0

loopqword:

movedx,[eax];

testebx,0xFFFFFFFC;//checkifonly3pixelsleft

jzcheckback;//3orlesspixelsleft

//earlyouttests

cmpedx,0xffffffff;//testforalphavalueof1

jecopyback;//if1'scopythesourcepixelstothedestination

testedx,0xffffffff;//testforalphavalueof0

jzleavefront;//ifsogotothenext4pixels

//thealphablendstarts

//green

//i=a*sg+(63-a)*dg;

//i=(i+32)+((i+32)>>6)>>6;

//red

//i=a*sr+(31-a)*dr;

//i=(i+16)+((i+16)>>5)>>5;

movqmm5,[edi];//g2:

mm5=dst3dst2dst1dst0

psrlwmm1,2;//mm1=a?

>>2nukeoutlower2bits

movqmm7,MASKSHIFTG;//g3:

mm7=1bitshiftedgreenmask

psrlwmm4,1;//g3a:

movesrcgreendownby1sothatwewon'toverflow

movqmm0,mm1;//mm0=00a300a200a100a0

psrlwmm5,1;//g3b:

movedstgreendownby1sothatwewon'toverflow

psrlwmm1,1;//mm1=a?

>>1nukeoutlower1bits

pandmm4,mm7;//g5:

mm4=sg3sg2sg1sg0

movqmm2,SIXONES;//g4:

mm2=63

pandmm5,mm7;//g7:

mm5=dg3dg2dg1dg0

movqmm3,[esi];//b1:

mm3=src3src2src1src0

psubsbmm2,mm0;//g6:

mm2=63-a363-a263-a163-a0

movqmm7,MASKB;//b2:

mm7=BLUEMASK

pmullwmm4,mm0;//g8:

mm4=sg?

*a?

movqmm0,[edi];//b3:

mm0=dst3dst2dst1dst0

pmullwmm5,mm2;//g9:

mm5=dg?

*(1-a?

movqmm2,mm7;//b4:

mm2=fiveones

pandmm3,mm7;//b4:

mm3=sb3sb2sb1sb0

pmullwmm3,mm1;//b6:

mm3=sb?

*a?

pandmm0,mm7;//b5:

mm0=db3db2db1db0

movqmm7,[esi];//r1:

mm7=src3src2src1src0

paddwmm4,mm5;//g10:

mm4=sg?

*a?

+dg?

*(1-a?

pandmm7,MASKR;//r2:

mm7=sr3sr2sr1sr0 

psubsbmm2,mm1;//b5a:

mm2=31-a331-a231-a131-a0

paddwmm4,FIVETWELVE;//g11:

mm4=(mm4+512)green

pmullwmm0,mm2;//b7:

mm0=db?

*(1-a?

movqmm5,mm4;//g12:

mm5=mm4green

psrlwmm7,11;//r4:

shiftsrcreddowntoposition0

psrlwmm4,6;//g13:

mm4=mm4>>6

paddwmm4,mm5;//g14:

mm4=mm4+mm5green

paddwmm0,mm3;//b8:

mm0=sb?

*a?

+db?

*(1-a?

movqmm5,[edi];//r3:

mm5=dst3dst2dst1dst0

paddwmm0,SIXTEEN;//b9:

mm0=(mm0+16)blue

pandmm5,MASKR;//r5:

mm5=dr3dr2dr1dr0

psrlwmm4,5;//g15:

mm4=0?

g00?

g00?

g00?

g0green

movqmm3,mm0;//b10:

mm3=mm0blue

psrlwmm0,5;//b11:

mm0=mm0>>5blue

psrlwmm5,11;//r6:

shiftdstreddowntoposition0

paddwmm0,mm3;//b12:

mm0=mm3+mm0blue

psrlwmm0,5;//b13:

mm0=000b000b000b000bblue

pmullwmm7,mm1;//mm7=sr?

*a?

pandmm4,MASKG;//g16:

mm4=00g000g000g000g0green

pmullwmm5,mm2;//r7:

mm5=dr?

*(31-a?

pormm0,mm4;//mm0=00gb00gb00gb00gb

addeax,4;//movetonext4alphas

addesi,8;//movetonext4pixelsinsrc

addedi,8;//movetonext4pixelsindst

movdmm1,[eax];//mm1=00000000a3a2a1a0

paddwmm5,mm7;//r8:

mm5=sr?

*a?

+dr?

*(31-a?

paddwmm5,SIXTEEN;//r9:

mm5=(mm5+16)red

pxormm2,mm2;//mm2=0;

movqmm7,mm5;//r10:

mm7=mm5red

psrlwmm5,5;//r11:

mm5=mm5>>5red

movqmm4,[esi];//g1:

mm4=src3src2src1src0

paddwmm5,mm7;//r12:

mm5=mm7+mm5red

punpcklbwmm1,mm2;//mm1=00a300a200a100a0

psrlwmm5,5;//r13:

mm5=mm5>>5red

psllwmm5,11;//r14:

mm5=mm5<<10red

pormm0,mm5;//mm0=0rgb0rgb0rgb0rgb

subebx,4;//polishedoff4pixels

movq[edi-8],mm0;//dst=0rgb0rgb0rgb0rgb

jmploopqword;//gobacktostart

copyback:

movq[edi],mm4;//copysourcetodestination

leavefront:

addedi,8;//advancedestinationby4pixels

addeax,4;//advancealphaby4

addesi,8;//advancesourceby4pixels

subebx,4;//decreasepixelcountby4

jmpprimeloop;

checkback:

testebx,0xFF;//checkif0pixelsleft

jznextline;//donewiththisspan

//backalign:

//workoutbackendpixels

movqmm5,[edi];//g2:

mm5=dst3dst2dst1dst0

psrlwmm1,2;//mm1=a?

>>2nukeout

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 初中教育 > 科学

copyright@ 2008-2022 冰豆网网站版权所有

经营许可证编号:鄂ICP备2022015515号-1