查找数组中第二大值的汇编程序

Question

我写了一个汇编程序来查找数组中的最大值，但现在我希望它能找到数组中第二大的数。我将如何修改我的程序来做到这一点？

这是我编写的程序，它确实有效。该程序打印数组中的所有值，然后找到数组的最大值。现在我想让它找到第二大值。

 %include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0
   msg2  db "Min",0

   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln




    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish
    cmp ax, [si]        ; ax holds the max . So if ax is greater than si, then dont assign si to ax. 
    jge increment
    jmp newMax
newMax: 
    mov ax, [si] ; Otherwise we have a new a max

increment:   
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max

finish: ;clean up.
    leave ;pop bp
    ret   ;return

我编辑了我的程序以跟踪第二个最大值，我收到的第二个最大值的结果是 3。我期望程序输出 5。我不确定为什么我得到错误的输出。这是我对程序的编辑。

%include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0


   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln

   PutInt bx
    nwln


    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    mov bx, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish          ;equals 0, then finished
    cmp ax, [si]         
    jg testSecondMax   ;So if ax is greater than si, then dont assign si to ax and check second max
    jmp newMax         ;otherwise go to new max


newMax: 
    mov ax, [si] ; save new max
    jmp increment ; and increment

testSecondMax:
    cmp bx, [si]
    jl secondMax
    jg increment

secondMax:
    mov bx, [si]
    jmp increment

increment:  
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max 



finish: ;clean up.
    leave ;pop bp
    ret   ;return

Answer 1

我最终为此编写了代码，因为它让我想知道我实现循环的效率如何。如果你想自己解决作业，不要看我的代码，只看英文的bullet points。使用调试器单步执行代码。

以下是 I 将如何更改您的代码：

NASM 风格：在函数中使用局部标签（如 .noswap:）。将操作数缩进到一致的列中，这样它看起来就不会参差不齐。用 inputs / return value 和调用约定（注册它的内容）评论你的函数。
在 newMax: 之前优化 jmp next_instruction，因为它只是一个昂贵的空操作，可以跳到执行无论如何都会失败的地方。
除非可能针对真正的 8086 进行优化，否则不要使用 enter，它很慢。
将正在检查的每个元素加载到寄存器中，而不是多次使用相同的内存操作数。（x86-16 除了 BP/SP 之外还有 6 个整数寄存器；请使用它们。）
将循环退出条件分支放在底部。（如果需要，跳转到循环入口点）。
在两个寄存器中保留一个最大值和第二个最大值，就像在 AX 中保留最大值一样。
当您找到大于 2nd-max 的元素时，保留您拥有的 3 个数字中最高的 2 个。即在 2 个寄存器中维护一个 2 元素队列/排序列表。

未测试：

; word max2Meth(word *array);
; Input: implicit-length array (terminated by a 0 element),
;        pointed to by pointer passed on the stack.  (DS segment)
; returns in ax
; clobbers: cx, dx
global max2Meth
max2Meth:
    push  bp
    mov   bp, sp     ; make a stack frame.  (ENTER is slow, don't use it)
    push  si         ; SI is call-preserved in many calling conventions.  Omit this if you want to just clobber it.

    mov   si, [bp+4] ; pointer to array 

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, ax     ; start with max = max2 instead of putting a conditional xchg outside the loop

    jmp   .loop_entry   ; enter at the bottom, at the conditional branch
;;; ax: 2nd max
;;; dx: max

.maxloop:              ; do {
    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jg  .updateMaxes   ; optimize for the common case: fall through on not-found

.loop_entry:
    add  si, 2
    mov  cx, [si]      ;   c = *p++;
    test cx, cx
    jnz .maxloop       ; } while (c != 0);

.finish:
   ; 2nd-max is already in AX, just clean up and return

    pop  si
    leave    ;pop bp would be faster because SP is already pointing to the right place
    ret

; This block is part of the function, even though it's placed after the ret
.updateMaxes:
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .loop_entry      ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .loop_entry

将罕见情况的块放在外线是很好的，因为这意味着常见情况可以在没有分支的情况下失败。 经常放置 if/else 条件内联需要 jmp 某处只是为了避免运行在 if 之后加入 else 部分。但是 .updateMaxes: 最终变得相当优雅，IMO：它必须跳回循环，我们可以在交换之前或之后进行。

（与 3 mov 指令一样昂贵），但为了在 new-max 情况下使其更便宜（并且只执行 mov ax, dx / mov dx, cx）我们必须让 new-2ndmax 的情况变慢。假设只更新第二个最大值比同时更新两者更有可能，我认为只有 mov 和 cmp/jcc 是一个胜利。您可以使用 cmov（在 686 CPU 上）使该部分无分支，这可能很好。让整个事情无分支会给你一个相当长的依赖链，并且不值得，除非数组元素在最后平均变大，所以你一直有频繁的最大更新（但不是模式，所以你得到分支未命中）。

在 Intel Haswell/Skylake 上，内循环只有 4 个融合域 uops（compare/branches 都可以宏融合）。在长时间运行没有更新的情况下，它应该运行每个时钟迭代 1 次。

如果您正在优化代码大小而不是速度（例如，对于真正的 8086），请使用 ax 作为临时值并使用 lodsw 而不是 mov ax, [si] 和 add si, 2. （将您的 max 保存在不同的寄存器中）。

对于隐式长度列表，您不能只使用 scasw 和 ax 中的 2nd-max，因为您需要检查 0 以及 > 2nd-max：/

作为进一步的优化，如果您使用的是微型模型 (SS = DS)，您可以使用 bp 而不是 si，因为您不需要访问堆栈加载指针后。你可以 pop bp 而不是 leave

在我想到简单地以ax = dx =第一个元素进入循环之前，我打算在循环之前使用这段代码：

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, [si+2] ; but check if it's only 1 element long, like maxMeth does
    test  dx, dx
    jz    .finish

    add   si, 4      ; first 2 elements are loaded

    cmp   ax, dx     ; sort them so ax <= dx
    jng  .noswap
    xchg  ax, dx
 .noswap:

构造内循环的另一种方法是这样的：

.maxloop:              ; do {
    add  si, 2
    mov  cx, [si]      ;   c = *p++;

    test cx, cx
    jz .finish         ; jz instead of jnz: exit the loop on the sentinel value

    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jng .maxloop

;; .updateMaxes:  ;; conditionally fall through into this part 
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .maxloop         ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .maxloop

.finish:

这可能更好，因为我们可以直接进入循环的顶部开始。我们使用跳过条件更新内容的 jz 跳出循环，因此我们没有任何分支仅用于跳过 "in the way" 的代码。（即我们已经成功地有效地布置了我们的代码块。）

一些 CPU 的唯一缺点是 test/jz 和 cmp/jg 是背靠背的。当条件分支被比这更多的指令分隔时，一些 CPUs 做得更好。（例如，除非你幸运地知道这些是如何命中 Sandybridge 上的解码器的，否则两个分支中的一个不会宏融合。但他们会在第一个循环中。）

提醒：Stack Overflow 用户的贡献是根据 cc by-sa 3.0 获得许可的，需要注明出处，因此如果您复制粘贴我的整个代码，请确保在评论中包含

查找数组中第二大值的汇编程序

Assembly program that finds second largest value in array

assembly

nasm

x86-16