; Listing4-1.asm ; ; A program that times various versions of the ENTER ; instruction and compares the performance against ; discrete instructions. option casemap:none include aoalib.inc ;AoA library + constants includelib aoalib.lib ;Link in aoalib library ; Number of times to repeat each timing loop. ; Note: should be small, larger values produce ; incorrect results due to multitasking and ; multi-core effects. loops = 10 .const ; Program title: align word ttlStr byte "Listing 4-1", 0 .data .code ; Here is the main assembly language function. public asmMain asmMain proc ; Create a fake display here: push rbp mov rbp, rsp push rbp push rbp push rbp push rbp push rbp push rbp lea rbp, [rsp+48] sub rsp, 56 push rbx ; Execute "enter 10, 0" and "leave" ; several times and compute the average ; cycle count. This code uses the RDTSC ; instruction, which isn't amazingly ; accurate (multiple cores and multitasking ; create problems), but it's good enough ; for our purposes here. xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter0Lp: rdtsc mov ecx, eax mov ebx, edx enter 10, 0 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter0Lp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Enter 10,0 run time: " byte "%I64d.%I64d", nl, 0 xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter0aLp: rdtsc mov ecx, eax mov ebx, edx push rbp mov rbp, rsp sub rsp, 10 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter0aLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Discrete Enter 10,0 run time: " byte "%I64d.%I64d", nl, 0 ; Repeat the test for Enter 10, 1: xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter1Lp: rdtsc mov ecx, eax mov ebx, edx enter 10, 1 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter1Lp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Enter 10,1 run time: " byte "%I64d.%I64d", nl, 0 xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter1aLp: rdtsc mov ecx, eax mov ebx, edx push rbp push [rbp-8] lea rbp, [rsp+8] sub rsp, 10 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter1aLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Discrete Enter 10,1 run time: " byte "%I64d.%I64d", nl, 0 ; Repeat the test for Enter 10, 2: xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter2Lp: rdtsc mov ecx, eax mov ebx, edx enter 10, 2 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter2Lp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Enter 10,2 run time: " byte "%I64d.%I64d", nl, 0 xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter2aLp: rdtsc mov ecx, eax mov ebx, edx push rbp push [rbp-8] push [rbp-16] lea rbp, [rsp+16] sub rsp, 10 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter2aLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Discrete Enter 10,2 run time: " byte "%I64d.%I64d", nl, 0 ; Repeat the test for Enter 10, 3: xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter3Lp: rdtsc mov ecx, eax mov ebx, edx enter 10, 3 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter3Lp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Enter 10,3 run time: " byte "%I64d.%I64d", nl, 0 xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. enter3aLp: rdtsc mov ecx, eax mov ebx, edx push rbp push [rbp-8] push [rbp-16] push [rbp-24] lea rbp, [rsp+24] sub rsp, 10 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz enter3aLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Discrete Enter 10,3 run time: " byte "%I64d.%I64d", nl, 0 ; Quick test of the leave instruction: xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. leaveLp: rdtsc mov ecx, eax mov ebx, edx push rbp mov rbp, rsp sub rsp, 10 leave rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz leaveLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte nl byte "Leave run time: " byte "%I64d.%I64d", nl, 0 xor r9, r9 ;Maintain sum here mov r8d, loops ;Repeat 1,000 times. dleaveLp: rdtsc mov ecx, eax mov ebx, edx push rbp mov rbp, rsp sub rsp, 10 mov rsp, rbp pop rbp rdtsc sub eax, ecx sub edx, ebx shl rdx, 32 or rax, rdx add r9, rax dec r8d jnz dleaveLp mov rax, r9 xor rdx, rdx mov rbx, loops div rbx mov r8, rdx mov rdx, rax call print byte "Discrete leave run time: " byte "%I64d.%I64d", nl, 0 allDone: pop rbx leave ret ;Returns to caller asmMain endp end