399 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			399 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-only
 | 
						|
// Copyright (C) 2021 ARM Limited.
 | 
						|
// Original author: Mark Brown <broonie@kernel.org>
 | 
						|
//
 | 
						|
// Scalable Matrix Extension ZA context switch test
 | 
						|
// Repeatedly writes unique test patterns into each ZA tile
 | 
						|
// and reads them back to verify integrity.
 | 
						|
//
 | 
						|
// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
 | 
						|
// (leave it running for as long as you want...)
 | 
						|
// kill $pids
 | 
						|
 | 
						|
#include <asm/unistd.h>
 | 
						|
#include "assembler.h"
 | 
						|
#include "asm-offsets.h"
 | 
						|
#include "sme-inst.h"
 | 
						|
 | 
						|
.arch_extension sve
 | 
						|
 | 
						|
#define MAXVL     2048
 | 
						|
#define MAXVL_B   (MAXVL / 8)
 | 
						|
 | 
						|
// Declare some storage space to shadow ZA register contents and a
 | 
						|
// scratch buffer for a vector.
 | 
						|
.pushsection .text
 | 
						|
.data
 | 
						|
.align 4
 | 
						|
zaref:
 | 
						|
	.space	MAXVL_B * MAXVL_B
 | 
						|
scratch:
 | 
						|
	.space	MAXVL_B
 | 
						|
.popsection
 | 
						|
 | 
						|
// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
 | 
						|
// Clobbers x0-x3
 | 
						|
function memcpy
 | 
						|
	cmp	x2, #0
 | 
						|
	b.eq	1f
 | 
						|
0:	ldrb	w3, [x1], #1
 | 
						|
	strb	w3, [x0], #1
 | 
						|
	subs	x2, x2, #1
 | 
						|
	b.ne	0b
 | 
						|
1:	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
// Generate a test pattern for storage in ZA
 | 
						|
// x0: pid
 | 
						|
// x1: row in ZA
 | 
						|
// x2: generation
 | 
						|
 | 
						|
// These values are used to constuct a 32-bit pattern that is repeated in the
 | 
						|
// scratch buffer as many times as will fit:
 | 
						|
// bits 31:28	generation number (increments once per test_loop)
 | 
						|
// bits 27:16	pid
 | 
						|
// bits 15: 8	row number
 | 
						|
// bits  7: 0	32-bit lane index
 | 
						|
 | 
						|
function pattern
 | 
						|
	mov	w3, wzr
 | 
						|
	bfi	w3, w0, #16, #12	// PID
 | 
						|
	bfi	w3, w1, #8, #8		// Row
 | 
						|
	bfi	w3, w2, #28, #4		// Generation
 | 
						|
 | 
						|
	ldr	x0, =scratch
 | 
						|
	mov	w1, #MAXVL_B / 4
 | 
						|
 | 
						|
0:	str	w3, [x0], #4
 | 
						|
	add	w3, w3, #1		// Lane
 | 
						|
	subs	w1, w1, #1
 | 
						|
	b.ne	0b
 | 
						|
 | 
						|
	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
// Get the address of shadow data for ZA horizontal vector xn
 | 
						|
.macro _adrza xd, xn, nrtmp
 | 
						|
	ldr	\xd, =zaref
 | 
						|
	rdsvl	\nrtmp, 1
 | 
						|
	madd	\xd, x\nrtmp, \xn, \xd
 | 
						|
.endm
 | 
						|
 | 
						|
// Set up test pattern in a ZA horizontal vector
 | 
						|
// x0: pid
 | 
						|
// x1: row number
 | 
						|
// x2: generation
 | 
						|
function setup_za
 | 
						|
	mov	x4, x30
 | 
						|
	mov	x12, x1			// Use x12 for vector select
 | 
						|
 | 
						|
	bl	pattern			// Get pattern in scratch buffer
 | 
						|
	_adrza	x0, x12, 2		// Shadow buffer pointer to x0 and x5
 | 
						|
	mov	x5, x0
 | 
						|
	ldr	x1, =scratch
 | 
						|
	bl	memcpy			// length set up in x2 by _adrza
 | 
						|
 | 
						|
	_ldr_za 12, 5			// load vector w12 from pointer x5
 | 
						|
 | 
						|
	ret	x4
 | 
						|
endfunction
 | 
						|
 | 
						|
// Trivial memory compare: compare x2 bytes starting at address x0 with
 | 
						|
// bytes starting at address x1.
 | 
						|
// Returns only if all bytes match; otherwise, the program is aborted.
 | 
						|
// Clobbers x0-x5.
 | 
						|
function memcmp
 | 
						|
	cbz	x2, 2f
 | 
						|
 | 
						|
	stp	x0, x1, [sp, #-0x20]!
 | 
						|
	str	x2, [sp, #0x10]
 | 
						|
 | 
						|
	mov	x5, #0
 | 
						|
0:	ldrb	w3, [x0, x5]
 | 
						|
	ldrb	w4, [x1, x5]
 | 
						|
	add	x5, x5, #1
 | 
						|
	cmp	w3, w4
 | 
						|
	b.ne	1f
 | 
						|
	subs	x2, x2, #1
 | 
						|
	b.ne	0b
 | 
						|
 | 
						|
1:	ldr	x2, [sp, #0x10]
 | 
						|
	ldp	x0, x1, [sp], #0x20
 | 
						|
	b.ne	barf
 | 
						|
 | 
						|
2:	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
// Verify that a ZA vector matches its shadow in memory, else abort
 | 
						|
// x0: row number
 | 
						|
// Clobbers x0-x7 and x12.
 | 
						|
function check_za
 | 
						|
	mov	x3, x30
 | 
						|
 | 
						|
	mov	x12, x0
 | 
						|
	_adrza	x5, x0, 6		// pointer to expected value in x5
 | 
						|
	mov	x4, x0
 | 
						|
	ldr	x7, =scratch		// x7 is scratch
 | 
						|
 | 
						|
	mov	x0, x7			// Poison scratch
 | 
						|
	mov	x1, x6
 | 
						|
	bl	memfill_ae
 | 
						|
 | 
						|
	_str_za 12, 7			// save vector w12 to pointer x7
 | 
						|
 | 
						|
	mov	x0, x5
 | 
						|
	mov	x1, x7
 | 
						|
	mov	x2, x6
 | 
						|
	mov	x30, x3
 | 
						|
	b	memcmp
 | 
						|
endfunction
 | 
						|
 | 
						|
// Any SME register modified here can cause corruption in the main
 | 
						|
// thread -- but *only* the locations modified here.
 | 
						|
function irritator_handler
 | 
						|
	// Increment the irritation signal count (x23):
 | 
						|
	ldr	x0, [x2, #ucontext_regs + 8 * 23]
 | 
						|
	add	x0, x0, #1
 | 
						|
	str	x0, [x2, #ucontext_regs + 8 * 23]
 | 
						|
 | 
						|
	// Corrupt some random ZA data
 | 
						|
#if 0
 | 
						|
	adr	x0, .text + (irritator_handler - .text) / 16 * 16
 | 
						|
	movi	v0.8b, #1
 | 
						|
	movi	v9.16b, #2
 | 
						|
	movi	v31.8b, #3
 | 
						|
#endif
 | 
						|
 | 
						|
	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
function tickle_handler
 | 
						|
	// Increment the signal count (x23):
 | 
						|
	ldr	x0, [x2, #ucontext_regs + 8 * 23]
 | 
						|
	add	x0, x0, #1
 | 
						|
	str	x0, [x2, #ucontext_regs + 8 * 23]
 | 
						|
 | 
						|
	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
function terminate_handler
 | 
						|
	mov	w21, w0
 | 
						|
	mov	x20, x2
 | 
						|
 | 
						|
	puts	"Terminated by signal "
 | 
						|
	mov	w0, w21
 | 
						|
	bl	putdec
 | 
						|
	puts	", no error, iterations="
 | 
						|
	ldr	x0, [x20, #ucontext_regs + 8 * 22]
 | 
						|
	bl	putdec
 | 
						|
	puts	", signals="
 | 
						|
	ldr	x0, [x20, #ucontext_regs + 8 * 23]
 | 
						|
	bl	putdecn
 | 
						|
 | 
						|
	mov	x0, #0
 | 
						|
	mov	x8, #__NR_exit
 | 
						|
	svc	#0
 | 
						|
endfunction
 | 
						|
 | 
						|
// w0: signal number
 | 
						|
// x1: sa_action
 | 
						|
// w2: sa_flags
 | 
						|
// Clobbers x0-x6,x8
 | 
						|
function setsignal
 | 
						|
	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
 | 
						|
 | 
						|
	mov	w4, w0
 | 
						|
	mov	x5, x1
 | 
						|
	mov	w6, w2
 | 
						|
 | 
						|
	add	x0, sp, #16
 | 
						|
	mov	x1, #sa_sz
 | 
						|
	bl	memclr
 | 
						|
 | 
						|
	mov	w0, w4
 | 
						|
	add	x1, sp, #16
 | 
						|
	str	w6, [x1, #sa_flags]
 | 
						|
	str	x5, [x1, #sa_handler]
 | 
						|
	mov	x2, #0
 | 
						|
	mov	x3, #sa_mask_sz
 | 
						|
	mov	x8, #__NR_rt_sigaction
 | 
						|
	svc	#0
 | 
						|
 | 
						|
	cbz	w0, 1f
 | 
						|
 | 
						|
	puts	"sigaction failure\n"
 | 
						|
	b	.Labort
 | 
						|
 | 
						|
1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
 | 
						|
	ret
 | 
						|
endfunction
 | 
						|
 | 
						|
// Main program entry point
 | 
						|
.globl _start
 | 
						|
function _start
 | 
						|
_start:
 | 
						|
	mov	x23, #0		// signal count
 | 
						|
 | 
						|
	mov	w0, #SIGINT
 | 
						|
	adr	x1, terminate_handler
 | 
						|
	mov	w2, #SA_SIGINFO
 | 
						|
	bl	setsignal
 | 
						|
 | 
						|
	mov	w0, #SIGTERM
 | 
						|
	adr	x1, terminate_handler
 | 
						|
	mov	w2, #SA_SIGINFO
 | 
						|
	bl	setsignal
 | 
						|
 | 
						|
	mov	w0, #SIGUSR1
 | 
						|
	adr	x1, irritator_handler
 | 
						|
	mov	w2, #SA_SIGINFO
 | 
						|
	orr	w2, w2, #SA_NODEFER
 | 
						|
	bl	setsignal
 | 
						|
 | 
						|
	mov	w0, #SIGUSR2
 | 
						|
	adr	x1, tickle_handler
 | 
						|
	mov	w2, #SA_SIGINFO
 | 
						|
	orr	w2, w2, #SA_NODEFER
 | 
						|
	bl	setsignal
 | 
						|
 | 
						|
	puts	"Streaming mode "
 | 
						|
	smstart_za
 | 
						|
 | 
						|
	// Sanity-check and report the vector length
 | 
						|
 | 
						|
	rdsvl	19, 8
 | 
						|
	cmp	x19, #128
 | 
						|
	b.lo	1f
 | 
						|
	cmp	x19, #2048
 | 
						|
	b.hi	1f
 | 
						|
	tst	x19, #(8 - 1)
 | 
						|
	b.eq	2f
 | 
						|
 | 
						|
1:	puts	"bad vector length: "
 | 
						|
	mov	x0, x19
 | 
						|
	bl	putdecn
 | 
						|
	b	.Labort
 | 
						|
 | 
						|
2:	puts	"vector length:\t"
 | 
						|
	mov	x0, x19
 | 
						|
	bl	putdec
 | 
						|
	puts	" bits\n"
 | 
						|
 | 
						|
	// Obtain our PID, to ensure test pattern uniqueness between processes
 | 
						|
	mov	x8, #__NR_getpid
 | 
						|
	svc	#0
 | 
						|
	mov	x20, x0
 | 
						|
 | 
						|
	puts	"PID:\t"
 | 
						|
	mov	x0, x20
 | 
						|
	bl	putdecn
 | 
						|
 | 
						|
	mov	x22, #0		// generation number, increments per iteration
 | 
						|
.Ltest_loop:
 | 
						|
	rdsvl	0, 8
 | 
						|
	cmp	x0, x19
 | 
						|
	b.ne	vl_barf
 | 
						|
 | 
						|
	rdsvl	21, 1		// Set up ZA & shadow with test pattern
 | 
						|
0:	mov	x0, x20
 | 
						|
	sub	x1, x21, #1
 | 
						|
	mov	x2, x22
 | 
						|
	bl	setup_za
 | 
						|
	subs	x21, x21, #1
 | 
						|
	b.ne	0b
 | 
						|
 | 
						|
	mov	x8, #__NR_sched_yield	// encourage preemption
 | 
						|
1:
 | 
						|
	svc	#0
 | 
						|
 | 
						|
	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=1,SM=0
 | 
						|
	and	x1, x0, #3
 | 
						|
	cmp	x1, #2
 | 
						|
	b.ne	svcr_barf
 | 
						|
 | 
						|
	rdsvl	21, 1			// Verify that the data made it through
 | 
						|
	rdsvl	24, 1			// Verify that the data made it through
 | 
						|
0:	sub	x0, x24, x21
 | 
						|
	bl	check_za
 | 
						|
	subs	x21, x21, #1
 | 
						|
	bne	0b
 | 
						|
 | 
						|
	add	x22, x22, #1	// Everything still working
 | 
						|
	b	.Ltest_loop
 | 
						|
 | 
						|
.Labort:
 | 
						|
	mov	x0, #0
 | 
						|
	mov	x1, #SIGABRT
 | 
						|
	mov	x8, #__NR_kill
 | 
						|
	svc	#0
 | 
						|
endfunction
 | 
						|
 | 
						|
function barf
 | 
						|
// fpsimd.c acitivty log dump hack
 | 
						|
//	ldr	w0, =0xdeadc0de
 | 
						|
//	mov	w8, #__NR_exit
 | 
						|
//	svc	#0
 | 
						|
// end hack
 | 
						|
	smstop
 | 
						|
	mov	x10, x0	// expected data
 | 
						|
	mov	x11, x1	// actual data
 | 
						|
	mov	x12, x2	// data size
 | 
						|
 | 
						|
	puts	"Mismatch: PID="
 | 
						|
	mov	x0, x20
 | 
						|
	bl	putdec
 | 
						|
	puts	", iteration="
 | 
						|
	mov	x0, x22
 | 
						|
	bl	putdec
 | 
						|
	puts	", row="
 | 
						|
	mov	x0, x21
 | 
						|
	bl	putdecn
 | 
						|
	puts	"\tExpected ["
 | 
						|
	mov	x0, x10
 | 
						|
	mov	x1, x12
 | 
						|
	bl	dumphex
 | 
						|
	puts	"]\n\tGot      ["
 | 
						|
	mov	x0, x11
 | 
						|
	mov	x1, x12
 | 
						|
	bl	dumphex
 | 
						|
	puts	"]\n"
 | 
						|
 | 
						|
	mov	x8, #__NR_getpid
 | 
						|
	svc	#0
 | 
						|
// fpsimd.c acitivty log dump hack
 | 
						|
//	ldr	w0, =0xdeadc0de
 | 
						|
//	mov	w8, #__NR_exit
 | 
						|
//	svc	#0
 | 
						|
// ^ end of hack
 | 
						|
	mov	x1, #SIGABRT
 | 
						|
	mov	x8, #__NR_kill
 | 
						|
	svc	#0
 | 
						|
//	mov	x8, #__NR_exit
 | 
						|
//	mov	x1, #1
 | 
						|
//	svc	#0
 | 
						|
endfunction
 | 
						|
 | 
						|
function vl_barf
 | 
						|
	mov	x10, x0
 | 
						|
 | 
						|
	puts	"Bad active VL: "
 | 
						|
	mov	x0, x10
 | 
						|
	bl	putdecn
 | 
						|
 | 
						|
	mov	x8, #__NR_exit
 | 
						|
	mov	x1, #1
 | 
						|
	svc	#0
 | 
						|
endfunction
 | 
						|
 | 
						|
function svcr_barf
 | 
						|
	mov	x10, x0
 | 
						|
 | 
						|
	puts	"Bad SVCR: "
 | 
						|
	mov	x0, x10
 | 
						|
	bl	putdecn
 | 
						|
 | 
						|
	mov	x8, #__NR_exit
 | 
						|
	mov	x1, #1
 | 
						|
	svc	#0
 | 
						|
endfunction
 |