arch/arc/lib/memset-archs.S

Source file repositories/reference/linux-study-clean/arch/arc/lib/memset-archs.S

File Facts

System
Linux kernel
Corpus path
arch/arc/lib/memset-archs.S
Extension
.S
Size
2794 bytes
Lines
145
Domain
Architecture Layer
Bucket
arch/arc
Inferred role
Architecture Layer: arch/arc
Status
atlas-only

Why This File Exists

CPU and platform-specific kernel glue: boot entry, traps, syscall entry, interrupts, page tables, context switch, and low-level barriers.

Dependency Surface

Detected Declarations

Annotated Snippet

#include <linux/linkage.h>
#include <asm/cache.h>

/*
 * The memset implementation below is optimized to use prefetchw and prealloc
 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
 * If you want to implement optimized memset for other possible L1 data cache
 * line lengths (32B and 128B) you should rewrite code carefully checking
 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
 * don't belongs to memset area.
 */

#if L1_CACHE_SHIFT == 6

.macro PREALLOC_INSTR	reg, off
	prealloc	[\reg, \off]
.endm

.macro PREFETCHW_INSTR	reg, off
	prefetchw	[\reg, \off]
.endm

#else

.macro PREALLOC_INSTR	reg, off
.endm

.macro PREFETCHW_INSTR	reg, off
.endm

#endif

ENTRY_CFI(memset)
	mov.f	0, r2
;;; if size is zero
	jz.d	[blink]
	mov	r3, r0		; don't clobber ret val

	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location

;;; if length < 8
	brls.d.nt	r2, 8, .Lsmallchunk
	mov.f	lp_count,r2

	and.f	r4, r0, 0x03
	rsub	lp_count, r4, 4
	lpnz	@.Laligndestination
	;; LOOP BEGIN
	stb.ab	r1, [r3,1]
	sub	r2, r2, 1
.Laligndestination:

;;; Destination is aligned
	and	r1, r1, 0xFF
	asl	r4, r1, 8
	or	r4, r4, r1
	asl	r5, r4, 16
	or	r5, r5, r4
	mov	r4, r5

	sub3	lp_count, r2, 8
	cmp     r2, 64
	bmsk.hi	r2, r2, 5
	mov.ls	lp_count, 0
	add3.hi	r2, r2, 8

;;; Convert len to Dwords, unfold x8
	lsr.f	lp_count, lp_count, 6

	lpnz	@.Lset64bytes

Annotation

Implementation Notes