Linux下VFP NEON浮点编译-pppStar-ChinaUnix博客

The last one

首页　| 　博文目录　| 　关于我

pppStar

博客访问： 1875774
博文数量： 274
博客积分： 2366
博客等级：大尉
技术积分： 1880
用户组：普通用户
注册时间： 2007-04-22 09:37

文章分类

全部博文（274）

samba（2）
live555（1）
ffmpeg（9）
mp4（6）
rtsp（11）
net program（3）
h.264（5）

X264实现（2）

差错控制（0）

NAL（0）

变换编码（0）

熵编码（0）

速率控制(rc)（0）

运动补偿（mc）（0）

运动估计（me）（0）
杂2（14）
生活（4）
杂货铺（32）
linux（154）
项目管理（6）
C++（20）
未分配的博文（7）

文章存档

2022年（1）

2020年（10）

2019年（7）

2018年（18）

2017年（26）

2016年（32）

2015年（43）

2014年（30）

2013年（44）

2012年（36）

2011年（17）

2010年（10）

我的朋友

相关博文

Linux下VFP NEON浮点编译

分类： LINUX

2016-01-31 08:54:11

NEON:SIMD(Single Instruction Multiple Data 单指令多重数据) 指令集，其针对多媒体和讯号处理程式具备标准化的加速能力。

VFP: (Vector Float Point), 向量浮点运算单元，arm11（s3c6410 支持VFPv2），Cortex-A8（s5pv210）支持VFPv3.

NEON和VFPv3 浮点协处理器共享寄存器组，所以在汇编时，指令是一样的。

编译选项：

-mfpu = name（neon or vfpvx）指定FPU 单元

-mfloat-abi = name（soft、hard、 softfp）：指定软件浮点或硬件浮点或兼容软浮点调用接口

如果只指定 -mfpu，那么默认编译不会选择选择硬件浮点指令集

如果只指定 -mfloat-abi = hard或者softfp，那么编译会使用硬件浮点指令集

测试C文件

			[cpp] view plaincopy
			
			int main(void)  
		
			{  
		
			    float f1, f2, f3;  
		
			    f1 = 1.2;  
		
			    f2 = 1.3;  
		
			    f3 = f1 / f2;  
		
			    return 0;  
		
			}

1、 arm-eabi-gcc -S hello.c -mfpu=neon

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .fpu softvfp  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .global __aeabi_fdiv  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    stmfd   sp!, {fp, lr}  
		
			    .save {fp, lr}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #4  
		
			    add fp, sp, #4  
		
			.LCFI1:  
		
			    .pad #16  
		
			    sub sp, sp, #16  
		
			.LCFI2:  
		
			    ldr r3, .L3 @ float  
		
			    str r3, [fp, #-16]  @ float  
		
			    ldr r3, .L3+4   @ float  
		
			    str r3, [fp, #-12]  @ float  
		
			    ldr r0, [fp, #-16]  @ float  
		
			    ldr r1, [fp, #-12]  @ float  
		
			    bl  __aeabi_fdiv  
		
			    mov r3, r0  
		
			    str r3, [fp, #-8]   @ float  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    sub sp, fp, #4  
		
			    ldmfd   sp!, {fp, pc}  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

2、 arm-eabi-gcc -S hello.c -mfpu=vfp

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .fpu softvfp  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .global __aeabi_fdiv  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    stmfd   sp!, {fp, lr}  
		
			    .save {fp, lr}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #4  
		
			    add fp, sp, #4  
		
			.LCFI1:  
		
			    .pad #16  
		
			    sub sp, sp, #16  
		
			.LCFI2:  
		
			    ldr r3, .L3 @ float  
		
			    str r3, [fp, #-16]  @ float  
		
			    ldr r3, .L3+4   @ float  
		
			    str r3, [fp, #-12]  @ float  
		
			    ldr r0, [fp, #-16]  @ float  
		
			    ldr r1, [fp, #-12]  @ float  
		
			    bl  __aeabi_fdiv  
		
			    mov r3, r0  
		
			    str r3, [fp, #-8]   @ float  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    sub sp, fp, #4  
		
			    ldmfd   sp!, {fp, pc}  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

可以看到上面两个例子，使用的是 .fpu softvfp

3、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=hard

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .eabi_attribute 27, 3  
		
			    .eabi_attribute 28, 1  
		
			    .fpu neon  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    @ link register save eliminated.  
		
			    str fp, [sp, #-4]!  
		
			    .save {fp}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #0  
		
			    add fp, sp, #0  
		
			.LCFI1:  
		
			    .pad #20  
		
			    sub sp, sp, #20  
		
			.LCFI2:  
		
			    flds    s15, .L3  
		
			    fsts    s15, [fp, #-16]  
		
			    flds    s15, .L3+4  
		
			    fsts    s15, [fp, #-12]  
		
			    flds    s14, [fp, #-16]  
		
			    flds    s15, [fp, #-12]  
		
			    fdivs   s15, s14, s15  
		
			    fsts    s15, [fp, #-8]  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    add sp, fp, #0  
		
			    ldmfd   sp!, {fp}  
		
			    bx  lr  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

4、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=softfp

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .eabi_attribute 27, 3  
		
			    .fpu neon  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    @ link register save eliminated.  
		
			    str fp, [sp, #-4]!  
		
			    .save {fp}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #0  
		
			    add fp, sp, #0  
		
			.LCFI1:  
		
			    .pad #20  
		
			    sub sp, sp, #20  
		
			.LCFI2:  
		
			    flds    s15, .L3  
		
			    fsts    s15, [fp, #-16]  
		
			    flds    s15, .L3+4  
		
			    fsts    s15, [fp, #-12]  
		
			    flds    s14, [fp, #-16]  
		
			    flds    s15, [fp, #-12]  
		
			    fdivs   s15, s14, s15  
		
			    fsts    s15, [fp, #-8]  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    add sp, fp, #0  
		
			    ldmfd   sp!, {fp}  
		
			    bx  lr  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

5、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=softfp

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .eabi_attribute 27, 3  
		
			    .fpu vfpv3  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    @ link register save eliminated.  
		
			    str fp, [sp, #-4]!  
		
			    .save {fp}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #0  
		
			    add fp, sp, #0  
		
			.LCFI1:  
		
			    .pad #20  
		
			    sub sp, sp, #20  
		
			.LCFI2:  
		
			    flds    s15, .L3  
		
			    fsts    s15, [fp, #-16]  
		
			    flds    s15, .L3+4  
		
			    fsts    s15, [fp, #-12]  
		
			    flds    s14, [fp, #-16]  
		
			    flds    s15, [fp, #-12]  
		
			    fdivs   s15, s14, s15  
		
			    fsts    s15, [fp, #-8]  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    add sp, fp, #0  
		
			    ldmfd   sp!, {fp}  
		
			    bx  lr  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

6、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=hard

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .eabi_attribute 27, 3  
		
			    .eabi_attribute 28, 1  
		
			    .fpu vfpv3  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    @ link register save eliminated.  
		
			    str fp, [sp, #-4]!  
		
			    .save {fp}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #0  
		
			    add fp, sp, #0  
		
			.LCFI1:  
		
			    .pad #20  
		
			    sub sp, sp, #20  
		
			.LCFI2:  
		
			    flds    s15, .L3  
		
			    fsts    s15, [fp, #-16]  
		
			    flds    s15, .L3+4  
		
			    fsts    s15, [fp, #-12]  
		
			    flds    s14, [fp, #-16]  
		
			    flds    s15, [fp, #-12]  
		
			    fdivs   s15, s14, s15  
		
			    fsts    s15, [fp, #-8]  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    add sp, fp, #0  
		
			    ldmfd   sp!, {fp}  
		
			    bx  lr  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

从上面可以看到，使用softfp和hard使用的指令集是一样的，都是硬件浮点， neon和vfp的区别，仅仅体现在.fpu vfpv3和.fpu neon.

7、 arm-eabi-gcc -S hello.c -mfloat-abi=hard

			[cpp] view plaincopy
			
			    .arch armv5te  
		
			    .eabi_attribute 27, 3  
		
			    .eabi_attribute 28, 1  
		
			    .fpu vfp  
		
			    .eabi_attribute 20, 1  
		
			    .eabi_attribute 21, 1  
		
			    .eabi_attribute 23, 3  
		
			    .eabi_attribute 24, 1  
		
			    .eabi_attribute 25, 1  
		
			    .eabi_attribute 26, 2  
		
			    .eabi_attribute 30, 6  
		
			    .eabi_attribute 18, 4  
		
			    .file   "hello.c"  
		
			    .text  
		
			    .align  2  
		
			    .global main  
		
			    .type   main, %function  
		
			main:  
		
			    .fnstart  
		
			.LFB0:  
		
			    @ args = 0, pretend = 0, frame = 16  
		
			    @ frame_needed = 1, uses_anonymous_args = 0  
		
			    @ link register save eliminated.  
		
			    str fp, [sp, #-4]!  
		
			    .save {fp}  
		
			.LCFI0:  
		
			    .setfp fp, sp, #0  
		
			    add fp, sp, #0  
		
			.LCFI1:  
		
			    .pad #20  
		
			    sub sp, sp, #20  
		
			.LCFI2:  
		
			    flds    s15, .L3  
		
			    fsts    s15, [fp, #-16]  
		
			    flds    s15, .L3+4  
		
			    fsts    s15, [fp, #-12]  
		
			    flds    s14, [fp, #-16]  
		
			    flds    s15, [fp, #-12]  
		
			    fdivs   s15, s14, s15  
		
			    fsts    s15, [fp, #-8]  
		
			    mov r3, #0  
		
			    mov r0, r3  
		
			    add sp, fp, #0  
		
			    ldmfd   sp!, {fp}  
		
			    bx  lr  
		
			.L4:  
		
			    .align  2  
		
			.L3:  
		
			    .word   1067030938  
		
			    .word   1067869798  
		
			.LFE0:  
		
			    .fnend  
		
			    .size   main, .-main  
		
			    .ident  "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"  
		
			    .section    .note.GNU-stack,"",%progbits

当直接使用-mfloat-abi=hard时，会默认使用.fpu vfp硬件浮点。

阅读(2107) | 评论(0) | 转发(0) |

上一篇：C++内存泄露

下一篇：为hisiv200交叉编译valgrind3.7

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6