arch/arc/lib/strcmp.S

Source file repositories/reference/linux-study-clean/arch/arc/lib/strcmp.S

File Facts

System
Linux kernel
Corpus path
arch/arc/lib/strcmp.S
Extension
.S
Size
2511 bytes
Lines
94
Domain
Architecture Layer
Bucket
arch/arc
Inferred role
Architecture Layer: arch/arc
Status
atlas-only

Why This File Exists

CPU and platform-specific kernel glue: boot entry, traps, syscall entry, interrupts, page tables, context switch, and low-level barriers.

Dependency Surface

Detected Declarations

Annotated Snippet

It would be possible to speed up the loops by one cycle / word
   respective one cycle / byte by forcing double source 1 alignment, unrolling
   by a factor of two, and speculatively loading the second word / byte of
   source 1; however, that would increase the overhead for loop setup / finish,
   and strcmp might often terminate early.  */

#include <linux/linkage.h>

ENTRY_CFI(strcmp)
	or	r2,r0,r1
	bmsk_s	r2,r2,1
	brne	r2,0,.Lcharloop
	mov_s	r12,0x01010101
	ror	r5,r12
.Lwordloop:
	ld.ab	r2,[r0,4]
	ld.ab	r3,[r1,4]
	nop_s
	sub	r4,r2,r12
	bic	r4,r4,r2
	and	r4,r4,r5
	brne	r4,0,.Lfound0
	breq	r2,r3,.Lwordloop
#ifdef	__LITTLE_ENDIAN__
	xor	r0,r2,r3	; mask for difference
	sub_s	r1,r0,1
	bic_s	r0,r0,r1	; mask for least significant difference bit
	sub	r1,r5,r0
	xor	r0,r5,r1	; mask for least significant difference byte
	and_s	r2,r2,r0
	and_s	r3,r3,r0
#endif /* LITTLE ENDIAN */
	cmp_s	r2,r3
	mov_s	r0,1
	j_s.d	[blink]
	bset.lo	r0,r0,31

	.balign	4
#ifdef __LITTLE_ENDIAN__
.Lfound0:
	xor	r0,r2,r3	; mask for difference
	or	r0,r0,r4	; or in zero indicator
	sub_s	r1,r0,1
	bic_s	r0,r0,r1	; mask for least significant difference bit
	sub	r1,r5,r0
	xor	r0,r5,r1	; mask for least significant difference byte
	and_s	r2,r2,r0
	and_s	r3,r3,r0
	sub.f	r0,r2,r3
	mov.hi	r0,1
	j_s.d	[blink]
	bset.lo	r0,r0,31
#else /* BIG ENDIAN */
	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
	   because of carry-propagateion from a lower significant zero byte.
	   We can compensate for this by checking that bit0 is zero.
	   This compensation is not necessary in the step where we
	   get a low estimate for r2, because in any affected bytes
	   we already have 0x00 or 0x01, which will remain unchanged
	   when bit 7 is cleared.  */
	.balign	4
.Lfound0:
	lsr	r0,r4,8
	lsr_s	r1,r2
	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
	cmp_s	r3,r2		; ... be independent of trailing garbage
	or_s	r2,r2,r0	; likewise for r3 > r2
	bic_s	r3,r3,r0

Annotation

Implementation Notes