mirror of
				https://gitlab.com/qemu-project/qemu-palcode.git
				synced 2024-02-13 08:32:59 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			60 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
 | |
|  *
 | |
|  * Finds length of a 0-terminated string.  Optimized for the
 | |
|  * Alpha architecture:
 | |
|  *
 | |
|  *	- memory accessed as aligned quadwords only
 | |
|  *	- uses bcmpge to compare 8 bytes in parallel
 | |
|  *	- does binary search to find 0 byte in last
 | |
|  *	  quadword (HAKMEM needed 12 instructions to
 | |
|  *	  do this instead of the 9 instructions that
 | |
|  *	  binary search needs).
 | |
|  */
 | |
| 
 | |
| 	.set noreorder
 | |
| 	.set noat
 | |
| 
 | |
| 	.align 3
 | |
| 
 | |
| 	.globl	strlen
 | |
| 	.ent	strlen
 | |
| strlen:
 | |
| 	.frame	$sp, 0, $26, 0
 | |
| 	.prologue 0
 | |
| 
 | |
| 	ldq_u	$1, 0($16)	# load first quadword ($16  may be misaligned)
 | |
| 	lda	$2, -1($31)
 | |
| 	insqh	$2, $16, $2
 | |
| 	andnot	$16, 7, $0
 | |
| 	or	$2, $1, $1
 | |
| 	cmpbge	$31, $1, $2	# $2  <- bitmask: bit i == 1 <==> i-th byte == 0
 | |
| 	bne	$2, found
 | |
| 
 | |
| loop:	ldq	$1, 8($0)
 | |
| 	addq	$0, 8, $0	# addr += 8
 | |
| 	nop			# helps dual issue last two insns
 | |
| 	cmpbge	$31, $1, $2
 | |
| 	beq	$2, loop
 | |
| 
 | |
| found:	blbs	$2, done	# make aligned case fast
 | |
| 	negq	$2, $3
 | |
| 	and	$2, $3, $2
 | |
| 
 | |
| 	and	$2, 0x0f, $1
 | |
| 	addq	$0, 4, $3
 | |
| 	cmoveq	$1, $3, $0
 | |
| 
 | |
| 	and	$2, 0x33, $1
 | |
| 	addq	$0, 2, $3
 | |
| 	cmoveq	$1, $3, $0
 | |
| 
 | |
| 	and	$2, 0x55, $1
 | |
| 	addq	$0, 1, $3
 | |
| 	cmoveq	$1, $3, $0
 | |
| 
 | |
| done:	subq	$0, $16, $0
 | |
| 	ret	$31, ($26)
 | |
| 
 | |
| 	.end	strlen
 | 
