MACRO EMBSR3 enulates the BSR
instruction for all non-zero inputs.
It uses a straight forward binary search, thus producing one bit of the result per algorithm step. For example, bit 5 of the result will be set if one of bits 16..31
of the original argument is set (or in other words, if the argument is >= 2^16). If bit 5 of the result is 0, we examine bits 15..0 in the next step, otherwise we examine bits 31..16
in the next step. Both tests can be accomplished by the same code if we conditionally shift right the argument when bit 5 is set.
This code has been tested exhaustively for all 2^32-1 possible inputs.;
; emulate bsf instruction
;
; input:
; eax = preform bsf on this register
;
; output:
; edx = return bit from bsf operation
;
; destroys:
; ecx, esi
; eflags
;
EMBSR3 MACRO
xor edx,edx ; initialize result
mov esi,eax ; save original argument
xor ecx,ecx ; initialize mask
cmp esi,10000h ; a < (1<<16) ?
adc ecx,0FFFFFFFFh ; a < (1<<16) ? 0 : FFFFFFFFh
and ecx,16 ; inc = a < (1<<16) ? 0 : 16
add edx,ecx ; cnt = cnt+inc
shr esi,cl ; a = a < (1<<16) ? a : a >> 16
xor ecx,ecx ; initialize mask
cmp esi,100h ; a < (1<<8) ?
adc ecx,0FFFFFFFFh ; a < (1<<8) ? 0 : FFFFFFFFh
and ecx,8 ; inc = a < (1<<8) ? 0 : 8
add edx,ecx ; cnt = cnt+inc
shr esi,cl ; a = a < (1<<8) ? a : a >> 8
xor ecx,ecx ; initialize mask
cmp esi,10h ; a < (1<<4) ?
adc ecx,0FFFFFFFFh ; a < (1<<4) ? 0 : FFFFFFFFh
and ecx,4 ; inc = a < (1<<4) ? 0 : 4
add edx,ecx ; cnt = cnt+inc
shr esi,cl ; a = a < (1<<4) ? a : a >> 4
xor ecx,ecx ; initialize mask
cmp esi,4h ; a < (1<<2) ?
adc ecx,0FFFFFFFFh ; a < (1<<2) ? 0 : FFFFFFFFh
and ecx,2 ; inc = a < (1<<2) ? 0 : 2
add edx,ecx ; cnt = cnt+inc
shr esi,cl ; a = a < (1<<2) ? a : a >> 2
shr esi,1 ; inc = (a>>1) ; 3->1, 2->1, 1->0, 0->0
add edx,esi ; cnt = cnt+inc
ENDM
Note that this gem may be rewritten to run on CPUs lower than 386, just use 16 bit registers instead.