【问题标题】:ASSEMBLY: Binary Search on a Sorted String Array组装:对已排序字符串数组进行二进制搜索
【发布时间】:2014-07-26 16:31:02
【问题描述】:

在这个必须完全在汇编中完成的二进制搜索问题上花了几天时间之后,我不太确定在从排序数组中搜索名称匹配 [不区分大小写] 时我的逻辑在哪里崩溃.

任何帮助将不胜感激:

C 程序

/*

int b_search (char list[100][20], int count, char* token);

list – the starting address of the list of names to be searched

count – total number of names in the list

token – name to be searched in the list

*/

这是名称列表: 阿图罗·布莱恩·克里斯·大卫·乔恩·马克·肖恩·西蒙·托马斯·托尼

以下都是要在列表中搜索的token、名称:

// 测试完全匹配的元素,例如:“Jon”、“shane”、“TONY”

// 测试不区分大小写,例如:“Chris”、“BryAN”

// 测试代码是否检测到部分和无意义的查询,例如:“Art” [Arturo 的缩写]、“garbage”

// 测试名称为奇数的列表,例如:“DAVe”、“Jona”

我一直在逻辑中的某个地方遇到无限循环,发现 返回的索引始终为 0 或发现它错误地返回“未找到名称”。

再次感谢任何帮助;感谢阅读。

// ============================================== =====================================

我的代码:

int b_search (char list[100][20], int count, char* token)
{
__asm
{
// Function returns the positionmove of the token in the list, starting with 1.

// If name is NOT found, return 0.

// Registers used:

// EAX: logical OR value

// EBX: toLowercase() loop counter

// ECX: total number of names

// EDI: name to be searched

// ESI: list pointer

mov eax, 0                      ; // zero out the result
mov ebx, 0                      ; // zero out EBX for use
mov esi, list                   ; // move the list pointer to ESI
mov edi, token                  ; // the name to be searched to EDI

// YOUR CODE HERE
// list - the starting address of the list of names to be searched
// count - total number of names in the list
// token - name to be searched in the list

// ==================================================================================================================
// CHANGE TOKEN LETTERS TO LOWERCASE
// ==================================================================================================================

TOKEN_LOWERCASE: // Cycles through every char in token and converts them to lowercase
    mov eax, [edi + ebx]        ; // move char of token into EAX
    or eax, 0x20                ; // convert to lowercase by logical OR with 0010 0000
    mov [edi + ebx], eax        ; // move new char back into EAX 
    inc ebx                     ; // increments loop counter
    cmp [edi + ebx], 0x00       ; // checks if the next char is a null terminator
jnz TOKEN_LOWERCASE             ; // exit loop in the presence of a null terminator

// ==================================================================================================================
// BINARY SEARCH RECURSION - FIRST ITERATION LOCATION
// All registers are now open except for EDI and ESI
// ==================================================================================================================

mov eax, 0                      ; // set the minimum value to be index first [0]
mov ecx, count                  ; // set the maximum value to be index last [index.length]
mov edx, 0                      ; // zero out EDX for use
push eax                        ; // push minimum value EAX back onto stack
push ecx                        ; // push maximum value ECX back onto stack

BEGIN_BINARY_SEARCH: // return here for recursion
    mov eax, 0                  ; // zero out EAX for use
    //mov ebx, 0                ; // zero out EBX for use
    mov ecx, 0                  ; // zero out ECX for use
    mov edx, 0                  ; // zero out EDX for use

    // FIRST IN, LAST OUT
    pop ecx                     ; // maximum value; first in, last out
    pop eax                     ; // minimum value; first in, last out
    cmp ecx, eax                ; // compares the maximum and minimum values
    jl DONE_EXIT                ; // all operations completed, goto DONE_EXIT [KNOWN ISSUE]
    mov edx, eax                ; // move EAX into EDX
    add edx, ecx                ; // add EAX and ECX, store it into EDX
    sar edx, 0x01               ; // shifts arithmetic right, dividing EDX by 2

    // FIRST IN, LAST OUT
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
    mov eax, 0                  ; // move EAX to 0 for use *****
    mov ebx, 0                  ; // move EBX to 0 for use [external counter, see "RECURSION CONCLUDES"]
    mov ecx, 0                  ; // move ECX to 0 for use

    // ==============================================================================================================
    // INNER RECURSIVE LOOP
    // Registers to keep track of:
    // ECX = token[i]
    // EAX = element[i]
    // ==============================================================================================================
    GO_LOWER: // loop to check if cursor needs to go lower
        mov ecx, edx            ; // move EDX and copy it into ECX; SEE BELOW:
        imul ecx, 0x14          ; // OFFSET_TOTAL = COUNT * 20[Decimal]
        add ecx, ebx            ; // adds offset to EBX
        mov eax, [esi + ecx]    ; // moves element[i] into EAX, where list + 20 * externalCount + internalCount
        // ECX held the offset; it has been moved to EAX, so ECX can be reset
        mov ecx, 0              ; // reset ECX with every iteration to prepare for another address's contents
        mov ecx, [edi + ebx]    ; // move token element into ECX
        cmp eax, 0x00           ; // compares EAX to zero; checks for null terminator; SEE BELOW:
        jz NULL_TERM_CHECK      ; // if IS zero, then jump to IS_NULL
        jnz NOT_NULL            ; // if NOT zero, then jump to NOT_NULL

        // ==========================================================================================================
        NULL_TERM_CHECK: // procedure to check contents of ECX are a null terminator at this point
            //cmp ecx, 0x00     ; // checks for null terminator
            cmp ecx, eax        ; // compares token and element
        jz IS_MATCH             ; // if IS null terminator, then reached end of String
        jl DONE_GO_LOWER        ; // if token.length() is shorter then element.length()
        jg DONE_GO_HIGHER       ; // if token.length() is longer than element.length()
        //jnz DONE_EXIT         ; // if NOT null terminator, function is not yet finished; proceed:
        // ==========================================================================================================

        NOT_NULL: // proceed with the rest of the function
            or eax, 0x20        ; // logical OR with EAX will return the letter in lowercase
            sub ecx, eax        ; // -32 -> 0 -> +32; result indicates need to jump DONE_GO_LOWER or DONE_GO_HIGHER
        jl DONE_GO_LOWER        ; // jump to GO_LOWER if less than zero; 
        jg DONE_GO_HIGHER       ; // jump to GO_HIGHER if greater than zero
        inc ebx                 ; // increments loop counter if slips through
    jmp GO_LOWER                ; // return to GO_LOWER for recursion
    // ==============================================================================================================

// ==================================================================================================================
// RECURSION CONCLUDES - END ITERATION LOCATION
// Registers EAX, EBX and ECX are now open
// Register EDX is reserved for being the external loop counter
// ==================================================================================================================

// ==================================================================================================================
DONE_GO_LOWER:

    // FIRST IN, LAST OUT
    pop ecx                     ; // pop maximum value back into ECX from stack
    pop eax                     ; // pop minimum value back into EAX from stack
    mov ecx, edx                ; // move EDX into ECX, copying the value
    sub ecx, 0x01               ; // subtracts 1 from current makes the maximum
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH         ; // jump back to beginning of recursion

// ==================================================================================================================

// ==================================================================================================================
DONE_GO_HIGHER:

    // FIRST IN, LAST OUT
    pop ecx                     ; // pop maximum value back into ECX from stack
    pop eax                     ; // pop minimum value back into EAX from stack
    mov eax, edx                ; // move EDX into EAX, updating the minimum
    add eax, 0x01               ; // adds 1 to current makes the minimum
    push eax                    ; // push minimum value EAX back onto stack
    push ecx                    ; // push maximum value ECX back onto stack
jmp BEGIN_BINARY_SEARCH         ; // jump back to beginning of recursion

// ==================================================================================================================

DONE_EXIT:
    mov eax, 0                  ; // move eax back to 0 to finish up
    jmp DONE                    ; // jump to default done location

// ==================================================================================================================
IS_MATCH:
    mov eax, edx                ; // move ESP contents into EAX
jmp DONE                        ; // done with everything

// END PROCEDURE: DEFAULT TO HERE WHEN FINISHED

DONE: // ALL OPERATIONS FINISHED
}

}

【问题讨论】:

    标签: c assembly


    【解决方案1】:

    @Edward 完全正确。这是一个不难翻译的 C 例程。我的快速组装版本竟然有 39 条指令。

    #include <stdio.h>
    
    int bsearch(char a[][20], int count, char *key)
    {
      // Answer lies in a[lo .. hi-1].
      int lo = 0, hi = count;
    
      while (lo < hi) {
    
        // Midpoint of range where answer must lie.    
        int mid = (lo + hi) / 2;
    
        // This simulates condition codes for key comparison.
        int cmp;
    
        // Pointers and character values from key and midpoint strings.
        char *p_key = key, *p_mid = a[mid], ch_key, ch_mid;
    
        // Pointers advance together through key and midpoint strings, stopping at '\0'.
        for (;;) {
    
          // Fetch characters from key and array.
          ch_key = *p_key, ch_mid = *p_mid;
    
          // Break if null is found;
          if (ch_key == 0 || ch_mid == 0) break;
    
          // Convert to lower case if necessary.
          if ('A' <= ch_key && ch_key <= 'Z') ch_key += 'a' - 'A';
          if ('A' <= ch_mid && ch_mid <= 'Z') ch_mid += 'a' - 'A';
    
          // Break if inequality is found.
          if (ch_key != ch_mid) break;
    
          // Move to next character pair.
          p_key++;
          p_mid++;
        }
        // Set the condition codes based on character difference.
        cmp = ch_key - ch_mid; 
    
        // If equal, we're done.
        if (cmp == 0) return mid;
    
        // Shrink the range based on comparison result.
        if (cmp < 0) hi = mid;
        else         lo = mid + 1;
      }
      return -1;
    }
    
    int main(void) {
      static char a[][20] = {
        "Arturo", "Bryan", "chris", "David", "Jon", "Mark", "shane", "SIMON", "Thomas", "TONY"
      };
      static char keys[][20] = {
        "ARTURo", "brYAn", "cHRiS", "dAvID", "jON", "MaRk", "sHAne", "sImON", "THOmas", "TonY" , "AAAA", "foo", "ZZZZZ"
      };
    
      #define COUNT(A) (sizeof A / sizeof A[0])
    
      int i;
    
      for (i = 0; i < COUNT(keys); i++) {
        printf("%s: %d\n", keys[i], bsearch(a, COUNT(a), keys[i]));
      }
    
      return 0;
    }
    

    【讨论】:

    • 请不要介意,但我只是想知道你们在哪个编译器/汇编器上进行汇编编码。请回答,因为我将从今年秋天开始我的汇编课程!提前致谢。此外,如果不是单独的编译器、任何工具或任何内置的 Linux、C、支持,请...我会等待您的回复。
    • @shekharsuman 您需要能够使用提供的工具。选一个。好好学吧。其他的并没有太大的不同。
    • 如果您打算编写独立程序或从汇编代码完成目标文件,那么 NASM 就可以了。如果您只想在 C/C++ 程序中为特殊目的编写几行代码,那么只需关注 gcc 中的内联 asm 语句即可。 @shekharsuman
    • 非常感谢@Gene。您是像我这样的新手的真正导师。再次感谢!
    【解决方案2】:

    代码存在许多问题。没有特别的顺序,这是我发现的:

    更改传递的令牌

    代码更改了传入的令牌,这可能是可以接受的,但您必须在 or0x20 的值之前检查 NUL 终止符 并将其存储回原位.

    最好只在进行比较时将字符串设置为小写,而不改变传递的字符串。

    评论没有帮助

    这样的行:

    mov edx, eax                ; // move EAX into EDX
    

    没有帮助。程序员可以看到 EAX 的内容正在被移动到 EDX 中。评论应该告诉我为什么会发生这种情况。

    跟踪寄存器内容的问题

    当你有mov ecx, 0 和两行之后的pop ecx 它告诉我(它应该告诉你!)你没有跟踪寄存器的内容。这些是可以帮助您的 cmets;我经常在标记行(跳转目标)上方写注释块,告诉我期望在每个寄存器和堆栈中的内容。您在几个地方都有,但并未列出所有相关的寄存器。它确实有助于调试这样的代码。理想情况下,您应该为每个寄存器分配一些用途,然后在整个其余代码中仅将它们用于该目的。

    未能将列表条目小写

    GO_LOWER 标签之后的代码中,您将列表中下一个条目的字母加载到eax,并将令牌的下一个字母加载到ecx,但只有后者已转换为小写进行比较。

    卷积分支

    这些行过于复杂:

        cmp eax, 0x00           
        jz NULL_TERM_CHECK     
        jnz NOT_NULL          
    NULL_TERM_CHECK: 
    

    这可以非常简化:

        cmp eax, 0x00           
        jnz NOT_NULL          
    

    因为代码无论如何都会转到下一条指令,你不再需要另一个分支,并且因为它没有在其他地方使用,你也可以消除标签。

    二分搜索混淆

    意图似乎是进行二分搜索,通常从有序列表的中间开始,然后进行比较以确定项目是在范围的上半部分还是下半部分。您的代码似乎是从第一个元素开始而不是从中间开始,并且从那里开始无法正常工作。

    一般建议:

    尝试用 C 语言编写一个正确且有效的例程,然后用相应的汇编语言例程一次替换一小部分。您将花费更少的时间来调试基本算法,而将更多的时间成功地实现工作的汇编语言代码。

    【讨论】:

      最近更新 更多