NEON:SIMD(Single Instruction Multiple Data 单指令多重数据) 指令集, 其针对多媒体和讯号处理程式具备标准化的加速能力。
VFP: (Vector Float Point), 向量浮点运算单元,arm11(s3c6410 支持VFPv2),Cortex-A8(s5pv210)支持VFPv3.
NEON和VFPv3 浮点协处理器共享寄存器组,所以在汇编时,指令是一样的。
编译选项:
-mfpu = name(neon or vfpvx)指定FPU 单元
-mfloat-abi = name(soft、hard、 softfp):指定软件浮点或硬件浮点或兼容软浮点调用接口
如果只指定 -mfpu,那么默认编译不会选择选择硬件浮点指令集
如果只指定 -mfloat-abi = hard或者softfp,那么编译会使用硬件浮点指令集
测试C文件
-
int main(void)
-
{
-
float f1, f2, f3;
-
f1 = 1.2;
-
f2 = 1.3;
-
f3 = f1 / f2;
-
return 0;
-
}
1、 arm-eabi-gcc -S hello.c -mfpu=neon
-
.arch armv5te
-
.fpu softvfp
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.global __aeabi_fdiv
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
stmfd sp!, {fp, lr}
-
.save {fp, lr}
-
.LCFI0:
-
.setfp fp, sp, #4
-
add fp, sp, #4
-
.LCFI1:
-
.pad #16
-
sub sp, sp, #16
-
.LCFI2:
-
ldr r3, .L3 @ float
-
str r3, [fp, #-16] @ float
-
ldr r3, .L3+4 @ float
-
str r3, [fp, #-12] @ float
-
ldr r0, [fp, #-16] @ float
-
ldr r1, [fp, #-12] @ float
-
bl __aeabi_fdiv
-
mov r3, r0
-
str r3, [fp, #-8] @ float
-
mov r3, #0
-
mov r0, r3
-
sub sp, fp, #4
-
ldmfd sp!, {fp, pc}
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
2、 arm-eabi-gcc -S hello.c -mfpu=vfp
-
.arch armv5te
-
.fpu softvfp
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.global __aeabi_fdiv
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
stmfd sp!, {fp, lr}
-
.save {fp, lr}
-
.LCFI0:
-
.setfp fp, sp, #4
-
add fp, sp, #4
-
.LCFI1:
-
.pad #16
-
sub sp, sp, #16
-
.LCFI2:
-
ldr r3, .L3 @ float
-
str r3, [fp, #-16] @ float
-
ldr r3, .L3+4 @ float
-
str r3, [fp, #-12] @ float
-
ldr r0, [fp, #-16] @ float
-
ldr r1, [fp, #-12] @ float
-
bl __aeabi_fdiv
-
mov r3, r0
-
str r3, [fp, #-8] @ float
-
mov r3, #0
-
mov r0, r3
-
sub sp, fp, #4
-
ldmfd sp!, {fp, pc}
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
可以看到上面两个例子,使用的是 .fpu softvfp
3、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=hard
-
.arch armv5te
-
.eabi_attribute 27, 3
-
.eabi_attribute 28, 1
-
.fpu neon
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
@ link register save eliminated.
-
str fp, [sp, #-4]!
-
.save {fp}
-
.LCFI0:
-
.setfp fp, sp, #0
-
add fp, sp, #0
-
.LCFI1:
-
.pad #20
-
sub sp, sp, #20
-
.LCFI2:
-
flds s15, .L3
-
fsts s15, [fp, #-16]
-
flds s15, .L3+4
-
fsts s15, [fp, #-12]
-
flds s14, [fp, #-16]
-
flds s15, [fp, #-12]
-
fdivs s15, s14, s15
-
fsts s15, [fp, #-8]
-
mov r3, #0
-
mov r0, r3
-
add sp, fp, #0
-
ldmfd sp!, {fp}
-
bx lr
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
4、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=softfp
-
.arch armv5te
-
.eabi_attribute 27, 3
-
.fpu neon
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
@ link register save eliminated.
-
str fp, [sp, #-4]!
-
.save {fp}
-
.LCFI0:
-
.setfp fp, sp, #0
-
add fp, sp, #0
-
.LCFI1:
-
.pad #20
-
sub sp, sp, #20
-
.LCFI2:
-
flds s15, .L3
-
fsts s15, [fp, #-16]
-
flds s15, .L3+4
-
fsts s15, [fp, #-12]
-
flds s14, [fp, #-16]
-
flds s15, [fp, #-12]
-
fdivs s15, s14, s15
-
fsts s15, [fp, #-8]
-
mov r3, #0
-
mov r0, r3
-
add sp, fp, #0
-
ldmfd sp!, {fp}
-
bx lr
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
5、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=softfp
-
.arch armv5te
-
.eabi_attribute 27, 3
-
.fpu vfpv3
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
@ link register save eliminated.
-
str fp, [sp, #-4]!
-
.save {fp}
-
.LCFI0:
-
.setfp fp, sp, #0
-
add fp, sp, #0
-
.LCFI1:
-
.pad #20
-
sub sp, sp, #20
-
.LCFI2:
-
flds s15, .L3
-
fsts s15, [fp, #-16]
-
flds s15, .L3+4
-
fsts s15, [fp, #-12]
-
flds s14, [fp, #-16]
-
flds s15, [fp, #-12]
-
fdivs s15, s14, s15
-
fsts s15, [fp, #-8]
-
mov r3, #0
-
mov r0, r3
-
add sp, fp, #0
-
ldmfd sp!, {fp}
-
bx lr
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
6、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=hard
-
.arch armv5te
-
.eabi_attribute 27, 3
-
.eabi_attribute 28, 1
-
.fpu vfpv3
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
@ link register save eliminated.
-
str fp, [sp, #-4]!
-
.save {fp}
-
.LCFI0:
-
.setfp fp, sp, #0
-
add fp, sp, #0
-
.LCFI1:
-
.pad #20
-
sub sp, sp, #20
-
.LCFI2:
-
flds s15, .L3
-
fsts s15, [fp, #-16]
-
flds s15, .L3+4
-
fsts s15, [fp, #-12]
-
flds s14, [fp, #-16]
-
flds s15, [fp, #-12]
-
fdivs s15, s14, s15
-
fsts s15, [fp, #-8]
-
mov r3, #0
-
mov r0, r3
-
add sp, fp, #0
-
ldmfd sp!, {fp}
-
bx lr
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
从上面可以看到,使用softfp和hard使用的指令集是一样的,都是硬件浮点, neon和vfp的区别,仅仅体现在.fpu vfpv3和.fpu neon.
7、 arm-eabi-gcc -S hello.c -mfloat-abi=hard
-
.arch armv5te
-
.eabi_attribute 27, 3
-
.eabi_attribute 28, 1
-
.fpu vfp
-
.eabi_attribute 20, 1
-
.eabi_attribute 21, 1
-
.eabi_attribute 23, 3
-
.eabi_attribute 24, 1
-
.eabi_attribute 25, 1
-
.eabi_attribute 26, 2
-
.eabi_attribute 30, 6
-
.eabi_attribute 18, 4
-
.file "hello.c"
-
.text
-
.align 2
-
.global main
-
.type main, %function
-
main:
-
.fnstart
-
.LFB0:
-
@ args = 0, pretend = 0, frame = 16
-
@ frame_needed = 1, uses_anonymous_args = 0
-
@ link register save eliminated.
-
str fp, [sp, #-4]!
-
.save {fp}
-
.LCFI0:
-
.setfp fp, sp, #0
-
add fp, sp, #0
-
.LCFI1:
-
.pad #20
-
sub sp, sp, #20
-
.LCFI2:
-
flds s15, .L3
-
fsts s15, [fp, #-16]
-
flds s15, .L3+4
-
fsts s15, [fp, #-12]
-
flds s14, [fp, #-16]
-
flds s15, [fp, #-12]
-
fdivs s15, s14, s15
-
fsts s15, [fp, #-8]
-
mov r3, #0
-
mov r0, r3
-
add sp, fp, #0
-
ldmfd sp!, {fp}
-
bx lr
-
.L4:
-
.align 2
-
.L3:
-
.word 1067030938
-
.word 1067869798
-
.LFE0:
-
.fnend
-
.size main, .-main
-
.ident "GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
-
.section .note.GNU-stack,"",%progbits
当直接使用-mfloat-abi=hard时,会默认使用.fpu vfp硬件浮点。
阅读(2069) | 评论(0) | 转发(0) |