scsi_fault_injection_test_tool
scsi_fault_injection_test_tool copied to clipboard
How to update to support kernel 4.1x?
I prepared a latest version Centos (version: 8.2, kernel: 4.18.0-193.19.1.el8_2.x86_64) VMware VM to experience this tool.
The detail system info as below: CentOS version: 8.2.2004 kernel version: 4.18.0-193.19.1.el8_2.x86_64 systemtap version: 4.2
function scsi_next_command had been removed from SCSI driver, so now I want to know which function should be probed to substitute it?
#! /usr/bin/env stap
%{
#include <linux/types.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
#include <linux/timer.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/mm_types.h>
#include <linux/mm.h>
#include <linux/fs.h>
%}
global target_access
global target_scmd
global target_rq
global target_bio
global target_r1bio
global target_block
global fix_write
global temp_failure
global error_type
global access_type
global dev_major
global dev_minor_min
global dev_minor_max
global inode_lba_flag
global inode_lba_val
global timeout_flag
global retry_allowed
global target_minor
probe begin
{
target_block = -1
target_minor = -1
printf("\nBEGIN\n")
}
function set_sense_buf:long (cmd:long, result:long, sensekey:long, asc:long, ascq:long)
%{
struct scsi_cmnd * scmd = (struct scsi_cmnd *)(long)STAP_ARG_cmd;
scmd->result = (int)(long)STAP_ARG_result;
scmd->sense_buffer[0] = 0x70; /* current, fixed format */
scmd->sense_buffer[2] = (unsigned char)(long)STAP_ARG_sensekey;
scmd->sense_buffer[7] = 0x13; /* length */
scmd->sense_buffer[12] = (unsigned char)(long)STAP_ARG_asc;
scmd->sense_buffer[13] = (unsigned char)(long)STAP_ARG_ascq;
%}
function get_inode:long (page:long)
%{
struct page * thispage = (struct page *)(long)STAP_ARG_page;
unsigned long tempval = (unsigned long)thispage->flags;
struct address_space *mapping = thispage->mapping;
if (unlikely(PageSwapCache(thispage)))
mapping = NULL;
#ifdef CONFIG_SLUB
else if (unlikely(PageSlab(thispage)))
mapping = NULL;
#endif
else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
mapping = NULL;
if((mapping != NULL) && (mapping->host != NULL))
{
STAP_RETVALUE = (unsigned long)(mapping->host->i_ino);
} else {
STAP_RETVALUE = 0;
}
%}
probe kernel.function("scsi_decide_disposition@drivers/scsi/scsi_error.c")
{
scmd_direction = $scmd->sc_data_direction
if((((temp_failure == 1) || (error_type == 1)) && (target_scmd == $scmd)) && ((scmd_direction == access_type) || (access_type == 3) || ((scmd_direction == 2) && (access_type== 4))) && ($scmd->request->rq_disk != 0))
{
major = $scmd->request->rq_disk->major
minor = $scmd->request->rq_disk->first_minor
block = $scmd->request->__sector
req_len = $scmd->sdb->length
if(major == dev_major && minor == target_minor && ((block == target_block) || ((block <= target_block) && (target_block < block + (req_len >> 9)))))
{
if((scmd_direction == 2) && (fix_write == 2))
{
#fix_write = 0
} else
{
printf("scsi_decide_disposition : major=%d minor=%d scmd=%d \n",major, minor, $scmd)
/* create fake status and sense data */
temp_failure++
set_sense_buf($scmd, 0x02, 0x03, 0x11, 0x04)
}
}
}
}
//probe kernel.function("scsi_next_command@drivers/scsi/scsi_lib.c")
//{
// if((target_access != 0) && (target_scmd == $cmd))
// {
// printf("scsi_next_command : cmd = %d \n", $cmd)
// target_access = 0
// target_scmd = 0
// target_rq = 0
// restore_state = 0
// }
//}
probe kernel.function("scsi_dispatch_cmd@drivers/scsi/scsi_lib.c")
{
struct_bio= $cmd->request->bio
block = $cmd->request->__sector
req_len = $cmd->sdb->length
if($cmd->request->rq_disk != 0)
{
major = $cmd->request->rq_disk->major
minor = $cmd->request->rq_disk->first_minor
}
if(target_block == -1)
{
if(struct_bio != 0)
{
page = $cmd->request->bio->bi_io_vec->bv_page
if(page != 0)
{
inode = get_inode(page)
}
}
}
if(((inode_lba_flag ==1)&&(inode == inode_lba_val)) || ((inode_lba_flag ==0 ) && ((block <= inode_lba_val) && (inode_lba_val < block + (req_len >> 9)))) || (block == target_block) || ((block <= target_block) && (target_block < block + (req_len >> 9))))
{
printf("\nSCSI_DISPATCH_CMD: command= %d \n", $cmd->cmnd[0])
printf("SCSI_DISPATCH_CMD: major= %d minor= %d \n", major, minor)
printf("SCSI_DISPATCH_CMD: flag(0:LBA, 1:inode)= %d \n", inode_lba_flag)
printf("SCSI_DISPATCH_CMD: start sector= %d \n", $cmd->request->__sector)
printf("SCSI_DISPATCH_CMD: req bufflen= %d \n", $cmd->sdb->length)
printf("SCSI_DISPATCH_CMD: inode= %d \n", inode)
printf("SCSI_DISPATCH_CMD: scmd = %d \n", $cmd)
printf("SCSI_DISPATCH_CMD: [7]=%d [8]=%d \n", $cmd->cmnd[7],$cmd->cmnd[8])
if((target_minor== -1) && (major == dev_major) && ((dev_minor_min & 0xfff0) <= minor) && (minor <= (dev_minor_max & 0xfff0)))
{
tmp_minor = minor
}
if((major == dev_major && ((minor == tmp_minor) || (minor == target_minor))) && (fix_write != 2))
{
/* inject errors on the designated device */
printf("SCSI_DISPATCH_CMD: cmd-retries = %d entire-retry =%d \n", $cmd->retries, entire_retries)
cmd_direction = $cmd->sc_data_direction
if((cmd_direction == 1) && (access_type == 4))
{
fix_write = 2
printf("SCSI_DISPATCH_CMD: fix_write =%d \n", fix_write)
}
if((temp_failure == 0) || (error_type == 1) ||((timeout_flag == 1) && (entire_retries <= retry_allowed)))
{
if((cmd_direction == access_type) || ((cmd_direction == 2) && (access_type == 4)) || (access_type == 3))
{
if(target_minor == -1)
{
target_minor = tmp_minor
}
if(target_block == -1)
{
target_block = block
}
if(target_access == 0)
{
retry_allowed = $cmd->allowed
target_access++
target_scmd = $cmd
target_rq = $cmd->request
}
temp_failure++
if(($cmd->cmnd[0] == 0x28) || ($cmd->cmnd[0] == 0x2a))
{
/* read_10 or write_10 */
$cmd->cmnd[7]=0
$cmd->cmnd[8]=0
}else if(($cmd->cmnd[0] == 0x08) || ($cmd->cmnd[0] == 0x0a))
{
/* read_16 or write_16 */
$cmd->cmnd[10]=0
$cmd->cmnd[11]=0
$cmd->cmnd[12]=0
$cmd->cmnd[13]=0
}else if(($cmd->cmnd[0] == 0x08) || ($cmd->cmnd[0] == 0x0a))
{
/* read_6 or write_6 */
$cmd->cmnd[4]=0
}
if(target_scmd == $cmd)
{
entire_retries++
}
if((target_access_t == 0) && (timeout_flag == 1))
{
target_access_t++
global_scmd = $cmd
restore_state = $cmd->device->host->shost_state
$cmd->device->host->shost_state = 4
}
}
}
printf("\nSCSI_DISPATCH_CMD: cmd= %d, allowed = %d retries= %d \n", $cmd, $cmd->allowed, $cmd->retries)
printf("SCSI_DISPATCH_CMD: scsi_cmnd= %d (host,channel,id,lun)= (%d, %d, %d, %d) \n", $cmd, $cmd->device->host->host_no, $cmd->device->channel, $cmd->device->id, $cmd->device->lun)
printf("SCSI_DISPATCH_CMD: execname=%s, pexecname=%s\n", execname(), pexecname())
}
}
}
@CharmingYang0 I haven't tried to use this tool for several years or on any recent kernel, but what I would do if I were trying to use it is to research when this function was removed from the kernel, using strategies like https://stackoverflow.com/questions/12591247/find-when-line-was-deleted on the linux kernel git repo, then look at the commits which replaced it and try to understand how the function was replaced in other uses.
@dwalkes Got it. The solution will be post here once I figure it out.
@dwalkes Below case I want to confirm with you.
STEPS
- Keep temporary_rerr.stp running in inode mode which associates to a text file that not cached in memory
- Read the file via command head -n 5 file.txt
RESULTS The first 5 line displayed even error found in dmesg and 'scsi_decide_disposition : major=8 minor=xx scmd=xxx' found.
Now that the read error was injected, it's suposed to return some error like 'I/O error' for the head command. Any idea?
SCSI_DISPATCH_CMD: scmd = 0xffff9bceb4dbb8e8
SCSI_DISPATCH_CMD: command= 0x28
SCSI_DISPATCH_CMD: access_type= 2, transfer_direction= 2
SCSI_DISPATCH_CMD: major= 8 minor= 32
SCSI_DISPATCH_CMD: flag(0:LBA, 1:inode)= 1
SCSI_DISPATCH_CMD: start sector= 2240
SCSI_DISPATCH_CMD: req bufflen= 32768
SCSI_DISPATCH_CMD: inode= 132
SCSI_DISPATCH_CMD: [7]=0 [8]=64
SCSI_DISPATCH_CMD: cmd-retries = 0 entire-retry =0
SCSI_DISPATCH_CMD: scsi_cmnd= 0xffff9bceb4dbb8e8, allowed= 5 retries= 0
SCSI_DISPATCH_CMD: (host,channel,id,lun)= (0, 0, 2, 0)
scsi_decide_disposition : major=8 minor=32 scmd=0xffff9bceb4dbb8e8
scsi_end_request: scmd = 0xffff9bceb4dbb8e8
[root@VERIFY ~]# dmesg -T|grep error
[Fri Oct 16 18:07:20 2020] sd 0:0:2:0: [sdc] tag#78 Add. Sense: Unrecovered read error - auto reallocate failed
[Fri Oct 16 18:07:20 2020] blk_update_request: critical medium error, dev sdc, sector 2240 op 0x0:(READ) flags 0x80700 phys_seg 6 prio class 0
@CharmingYang0 my first guess is that something is happening in filesystem caching which means the head command is still succeeding. Do you get the expected content from file.txt
output from the head
command?
I'd suggest start by verifying you can see the error using ddpt with iflag=sync
to make sure you aren't dealing with filesystem caching. It looks like this will succeed based on the dmesg
output.
Assuming this works, you could experiment with unmounting the filesystem between the error injection and the read step to force filesystem cache flush. There are probably other ways to accomplish this through open synchronized I/O as well.