253 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			253 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0-only
 | 
						|
/* Copyright(c) 2022 Intel Corporation. */
 | 
						|
 | 
						|
#include <linux/cpu.h>
 | 
						|
#include <linux/delay.h>
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/nmi.h>
 | 
						|
#include <linux/slab.h>
 | 
						|
#include <linux/stop_machine.h>
 | 
						|
 | 
						|
#include "ifs.h"
 | 
						|
 | 
						|
/*
 | 
						|
 * Note all code and data in this file is protected by
 | 
						|
 * ifs_sem. On HT systems all threads on a core will
 | 
						|
 * execute together, but only the first thread on the
 | 
						|
 * core will update results of the test.
 | 
						|
 */
 | 
						|
 | 
						|
#define CREATE_TRACE_POINTS
 | 
						|
#include <trace/events/intel_ifs.h>
 | 
						|
 | 
						|
/* Max retries on the same chunk */
 | 
						|
#define MAX_IFS_RETRIES  5
 | 
						|
 | 
						|
/*
 | 
						|
 * Number of TSC cycles that a logical CPU will wait for the other
 | 
						|
 * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
 | 
						|
 */
 | 
						|
#define IFS_THREAD_WAIT 100000
 | 
						|
 | 
						|
enum ifs_status_err_code {
 | 
						|
	IFS_NO_ERROR				= 0,
 | 
						|
	IFS_OTHER_THREAD_COULD_NOT_JOIN		= 1,
 | 
						|
	IFS_INTERRUPTED_BEFORE_RENDEZVOUS	= 2,
 | 
						|
	IFS_POWER_MGMT_INADEQUATE_FOR_SCAN	= 3,
 | 
						|
	IFS_INVALID_CHUNK_RANGE			= 4,
 | 
						|
	IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS	= 5,
 | 
						|
	IFS_CORE_NOT_CAPABLE_CURRENTLY		= 6,
 | 
						|
	IFS_UNASSIGNED_ERROR_CODE		= 7,
 | 
						|
	IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
 | 
						|
	IFS_INTERRUPTED_DURING_EXECUTION	= 9,
 | 
						|
};
 | 
						|
 | 
						|
static const char * const scan_test_status[] = {
 | 
						|
	[IFS_NO_ERROR] = "SCAN no error",
 | 
						|
	[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
 | 
						|
	[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
 | 
						|
	[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
 | 
						|
	"Core Abort SCAN Response due to power management condition.",
 | 
						|
	[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
 | 
						|
	[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
 | 
						|
	[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
 | 
						|
	[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
 | 
						|
	[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
 | 
						|
	"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
 | 
						|
	[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
 | 
						|
};
 | 
						|
 | 
						|
static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
 | 
						|
{
 | 
						|
	if (status.error_code < ARRAY_SIZE(scan_test_status)) {
 | 
						|
		dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
 | 
						|
			 cpumask_pr_args(cpu_smt_mask(cpu)),
 | 
						|
			 scan_test_status[status.error_code]);
 | 
						|
	} else if (status.error_code == IFS_SW_TIMEOUT) {
 | 
						|
		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
 | 
						|
			 cpumask_pr_args(cpu_smt_mask(cpu)));
 | 
						|
	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
 | 
						|
		dev_info(dev, "CPU(s) %*pbl: %s\n",
 | 
						|
			 cpumask_pr_args(cpu_smt_mask(cpu)),
 | 
						|
			 "Not all scan chunks were executed. Maximum forward progress retries exceeded");
 | 
						|
	} else {
 | 
						|
		dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
 | 
						|
			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static void message_fail(struct device *dev, int cpu, union ifs_status status)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * control_error is set when the microcode runs into a problem
 | 
						|
	 * loading the image from the reserved BIOS memory, or it has
 | 
						|
	 * been corrupted. Reloading the image may fix this issue.
 | 
						|
	 */
 | 
						|
	if (status.control_error) {
 | 
						|
		dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image\n",
 | 
						|
			cpumask_pr_args(cpu_smt_mask(cpu)));
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * signature_error is set when the output from the scan chains does not
 | 
						|
	 * match the expected signature. This might be a transient problem (e.g.
 | 
						|
	 * due to a bit flip from an alpha particle or neutron). If the problem
 | 
						|
	 * repeats on a subsequent test, then it indicates an actual problem in
 | 
						|
	 * the core being tested.
 | 
						|
	 */
 | 
						|
	if (status.signature_error) {
 | 
						|
		dev_err(dev, "CPU(s) %*pbl: test signature incorrect.\n",
 | 
						|
			cpumask_pr_args(cpu_smt_mask(cpu)));
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static bool can_restart(union ifs_status status)
 | 
						|
{
 | 
						|
	enum ifs_status_err_code err_code = status.error_code;
 | 
						|
 | 
						|
	/* Signature for chunk is bad, or scan test failed */
 | 
						|
	if (status.signature_error || status.control_error)
 | 
						|
		return false;
 | 
						|
 | 
						|
	switch (err_code) {
 | 
						|
	case IFS_NO_ERROR:
 | 
						|
	case IFS_OTHER_THREAD_COULD_NOT_JOIN:
 | 
						|
	case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
 | 
						|
	case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
 | 
						|
	case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
 | 
						|
	case IFS_INTERRUPTED_DURING_EXECUTION:
 | 
						|
		return true;
 | 
						|
	case IFS_INVALID_CHUNK_RANGE:
 | 
						|
	case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
 | 
						|
	case IFS_CORE_NOT_CAPABLE_CURRENTLY:
 | 
						|
	case IFS_UNASSIGNED_ERROR_CODE:
 | 
						|
		break;
 | 
						|
	}
 | 
						|
	return false;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Execute the scan. Called "simultaneously" on all threads of a core
 | 
						|
 * at high priority using the stop_cpus mechanism.
 | 
						|
 */
 | 
						|
static int doscan(void *data)
 | 
						|
{
 | 
						|
	int cpu = smp_processor_id();
 | 
						|
	u64 *msrs = data;
 | 
						|
	int first;
 | 
						|
 | 
						|
	/* Only the first logical CPU on a core reports result */
 | 
						|
	first = cpumask_first(cpu_smt_mask(cpu));
 | 
						|
 | 
						|
	/*
 | 
						|
	 * This WRMSR will wait for other HT threads to also write
 | 
						|
	 * to this MSR (at most for activate.delay cycles). Then it
 | 
						|
	 * starts scan of each requested chunk. The core scan happens
 | 
						|
	 * during the "execution" of the WRMSR. This instruction can
 | 
						|
	 * take up to 200 milliseconds (in the case where all chunks
 | 
						|
	 * are processed in a single pass) before it retires.
 | 
						|
	 */
 | 
						|
	wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]);
 | 
						|
 | 
						|
	if (cpu == first) {
 | 
						|
		/* Pass back the result of the scan */
 | 
						|
		rdmsrl(MSR_SCAN_STATUS, msrs[1]);
 | 
						|
	}
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
 | 
						|
 * on all threads of the core to be tested. Loop if necessary to complete
 | 
						|
 * run of all chunks. Include some defensive tests to make sure forward
 | 
						|
 * progress is made, and that the whole test completes in a reasonable time.
 | 
						|
 */
 | 
						|
static void ifs_test_core(int cpu, struct device *dev)
 | 
						|
{
 | 
						|
	union ifs_scan activate;
 | 
						|
	union ifs_status status;
 | 
						|
	unsigned long timeout;
 | 
						|
	struct ifs_data *ifsd;
 | 
						|
	u64 msrvals[2];
 | 
						|
	int retries;
 | 
						|
 | 
						|
	ifsd = ifs_get_data(dev);
 | 
						|
 | 
						|
	activate.rsvd = 0;
 | 
						|
	activate.delay = IFS_THREAD_WAIT;
 | 
						|
	activate.sigmce = 0;
 | 
						|
	activate.start = 0;
 | 
						|
	activate.stop = ifsd->valid_chunks - 1;
 | 
						|
 | 
						|
	timeout = jiffies + HZ / 2;
 | 
						|
	retries = MAX_IFS_RETRIES;
 | 
						|
 | 
						|
	while (activate.start <= activate.stop) {
 | 
						|
		if (time_after(jiffies, timeout)) {
 | 
						|
			status.error_code = IFS_SW_TIMEOUT;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
 | 
						|
		msrvals[0] = activate.data;
 | 
						|
		stop_core_cpuslocked(cpu, doscan, msrvals);
 | 
						|
 | 
						|
		status.data = msrvals[1];
 | 
						|
 | 
						|
		trace_ifs_status(cpu, activate, status);
 | 
						|
 | 
						|
		/* Some cases can be retried, give up for others */
 | 
						|
		if (!can_restart(status))
 | 
						|
			break;
 | 
						|
 | 
						|
		if (status.chunk_num == activate.start) {
 | 
						|
			/* Check for forward progress */
 | 
						|
			if (--retries == 0) {
 | 
						|
				if (status.error_code == IFS_NO_ERROR)
 | 
						|
					status.error_code = IFS_SW_PARTIAL_COMPLETION;
 | 
						|
				break;
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			retries = MAX_IFS_RETRIES;
 | 
						|
			activate.start = status.chunk_num;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/* Update status for this core */
 | 
						|
	ifsd->scan_details = status.data;
 | 
						|
 | 
						|
	if (status.control_error || status.signature_error) {
 | 
						|
		ifsd->status = SCAN_TEST_FAIL;
 | 
						|
		message_fail(dev, cpu, status);
 | 
						|
	} else if (status.error_code) {
 | 
						|
		ifsd->status = SCAN_NOT_TESTED;
 | 
						|
		message_not_tested(dev, cpu, status);
 | 
						|
	} else {
 | 
						|
		ifsd->status = SCAN_TEST_PASS;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
 * Initiate per core test. It wakes up work queue threads on the target cpu and
 | 
						|
 * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
 | 
						|
 * wait for all sibling threads to finish the scan test.
 | 
						|
 */
 | 
						|
int do_core_test(int cpu, struct device *dev)
 | 
						|
{
 | 
						|
	int ret = 0;
 | 
						|
 | 
						|
	/* Prevent CPUs from being taken offline during the scan test */
 | 
						|
	cpus_read_lock();
 | 
						|
 | 
						|
	if (!cpu_online(cpu)) {
 | 
						|
		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
 | 
						|
		ret = -EINVAL;
 | 
						|
		goto out;
 | 
						|
	}
 | 
						|
 | 
						|
	ifs_test_core(cpu, dev);
 | 
						|
out:
 | 
						|
	cpus_read_unlock();
 | 
						|
	return ret;
 | 
						|
}
 |