sg_inq: update version descriptors to spc5r21; scripts/scsi-sg3_id: update rules; testing folder work

git-svn-id: https://svn.bingwo.ca/repos/sg3_utils/trunk@814 6180dd3e-e324-4e3e-922d-17de1ae2f315
diff --git a/testing/Makefile b/testing/Makefile
index 518eb5c..26868e9 100644
--- a/testing/Makefile
+++ b/testing/Makefile
@@ -5,7 +5,7 @@
 MANDIR=$(DESTDIR)/$(PREFIX)/man
 
 EXECS = sg_iovec_tst sg_sense_test sg_queue_tst bsg_queue_tst sg_chk_asc \
-	sg_tst_nvme sg_tst_ioctl sg_tst_bidi tst_sg_lib sgh_dd sgs_dd
+	sg_tst_nvme sg_tst_ioctl sg_tst_bidi tst_sg_lib sgs_dd
 	
 EXTRAS =
 
diff --git a/testing/Makefile.cplus b/testing/Makefile.cplus
index 96fcf5c..34c9483 100644
--- a/testing/Makefile.cplus
+++ b/testing/Makefile.cplus
@@ -9,7 +9,8 @@
 ## CC = clang++
 ## LD = clang++
 
-EXECS = sg_tst_excl sg_tst_excl2 sg_tst_excl3 sg_tst_context sg_tst_async
+EXECS = sg_tst_excl sg_tst_excl2 sg_tst_excl3 sg_tst_context sg_tst_async \
+	sgh_dd
 
 EXTRAS =
 
@@ -30,8 +31,10 @@
 LDFLAGS = -std=c++11 -pthread
 
 LIBFILESOLD = ../lib/sg_lib.o ../lib/sg_lib_data.o ../lib/sg_io_linux.o
-LIBFILESNEW = ../lib/sg_lib.o ../lib/sg_lib_data.o ../lib/sg_pt_linux.o ../lib/sg_pt_common.o \
-		../lib/sg_pt_linux_nvme.o ../lib/sg_io_linux.o ../lib/sg_cmds_basic.o
+LIBFILESNEW = ../lib/sg_pt_linux_nvme.o ../lib/sg_lib.o ../lib/sg_lib_data.o \
+                ../lib/sg_pt_linux.o ../lib/sg_io_linux.o \
+                ../lib/sg_pt_common.o  ../lib/sg_cmds_basic.o \
+                ../lib/sg_cmds_basic2.o
 
 all: $(EXECS)
 
@@ -60,6 +63,9 @@
 sg_tst_async: sg_tst_async.o $(LIBFILESNEW)
 	$(LD) -o $@ $(LDFLAGS) $^
 
+sgh_dd: sgh_dd.o $(LIBFILESNEW)
+	$(LD) -o $@ $(LDFLAGS) -pthread $^
+
 install: $(EXECS)
 	install -d $(INSTDIR)
 	for name in $^; \
diff --git a/testing/sg_tst_excl.cpp b/testing/sg_tst_excl.cpp
index d5c0bec..84f8389 100644
--- a/testing/sg_tst_excl.cpp
+++ b/testing/sg_tst_excl.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2018 Douglas Gilbert.
+ * Copyright (c) 2013-2019 Douglas Gilbert.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,10 +43,37 @@
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef HAVE_LINUX_SG_V4_HDR
+
+/* Kernel uapi header contain __user decorations on user space pointers
+ * to indicate they are unsafe in the kernel space. However glibc takes
+ * all those __user decorations out from headers in /usr/include/linux .
+ * So to stop compile errors when directly importing include/uapi/scsi/sg.h
+ * undef __user before doing that include. */
+#define __user
+
+/* Want to block the original sg.h header from also being included. That
+ * causes lots of multiple definition errors. This will only work if this
+ * header is included _before_ the original sg.h header.  */
+#define _SCSI_GENERIC_H         /* original kernel header guard */
+#define _SCSI_SG_H              /* glibc header guard */
+
+#include "uapi_sg.h"    /* local copy of include/uapi/scsi/sg.h */
+
+#else
+#define __user
+#endif  /* end of: ifndef HAVE_LINUX_SG_V4_HDR */
+
 #include "sg_lib.h"
 #include "sg_io_linux.h"
+#include "sg_unaligned.h"
 
-static const char * version_str = "1.10 20181207";
+static const char * version_str = "1.11 20190121";
 static const char * util_name = "sg_tst_excl";
 
 /* This is a test program for checking O_EXCL on open() works. It uses
@@ -102,21 +129,25 @@
 static unsigned int odd_count;
 static unsigned int ebusy_count;
 static unsigned int eagain_count;
+static int sg_ifc_ver = 3;
 
 
 static void
 usage(void)
 {
-    printf("Usage: %s [-b] [-f] [-h] [-l <lba>] [-n <n_per_thr>] "
-           "[-t <num_thrs>]\n"
-           "                   [-V] [-w <wait_ms>] [-x] [-xx] "
-           "<sg_disk_device>\n", util_name);
+    printf("Usage: %s [-b] [-f] [-h] [-i <sg_ver>] [-l <lba>] "
+           "[-n <n_per_thr>]\n"
+           "                   [-t <num_thrs>] [-V] [-w <wait_ms>] "
+           "[-x] [-xx]\n"
+           "                   <sg_disk_device>\n", util_name);
     printf("  where\n");
     printf("    -b                block on open (def: O_NONBLOCK)\n");
     printf("    -f                force: any SCSI disk (def: only "
            "scsi_debug)\n");
     printf("                      WARNING: <lba> written to\n");
     printf("    -h                print this usage message then exit\n");
+    printf("    -i <sg_ver>       sg driver interface version (default: "
+           "3)\n");
     printf("    -l <lba>          logical block to increment (def: %u)\n",
            DEF_LBA);
     printf("    -n <n_per_thr>    number of loops per thread "
@@ -153,9 +184,9 @@
  * closes dev_name. If an error occurs returns -1 else returns 0 if
  * first int read from lba is even otherwise returns 1. */
 static int
-do_rd_inc_wr_twice(const char * dev_name, unsigned int lba, int block,
-                   int excl, int wait_ms, int id, unsigned int & ebusy,
-                   unsigned int & eagains)
+do_rd_inc_wr_twice_v3(const char * dev_name, unsigned int lba, int block,
+                      int excl, int wait_ms, int id, unsigned int & ebusy,
+                      unsigned int & eagains)
 {
     int k, sg_fd, ok, res;
     int odd = 0;
@@ -170,10 +201,8 @@
     char ebuff[EBUFF_SZ];
     int open_flags = O_RDWR;
 
-    r16CmdBlk[6] = w16CmdBlk[6] = (lba >> 24) & 0xff;
-    r16CmdBlk[7] = w16CmdBlk[7] = (lba >> 16) & 0xff;
-    r16CmdBlk[8] = w16CmdBlk[8] = (lba >> 8) & 0xff;
-    r16CmdBlk[9] = w16CmdBlk[9] = lba & 0xff;
+    sg_put_unaligned_be64(lba, r16CmdBlk + 2);
+    sg_put_unaligned_be64(lba, w16CmdBlk + 2);
     if (! block)
         open_flags |= O_NONBLOCK;
     if (excl)
@@ -190,8 +219,8 @@
             sleep(0);                   // process yield ??
     }
     if (sg_fd < 0) {
-        snprintf(ebuff, EBUFF_SZ,
-                 "do_rd_inc_wr_twice: error opening file: %s", dev_name);
+        snprintf(ebuff, EBUFF_SZ, "%s: error opening file: %s", __func__,
+		 dev_name);
         perror(ebuff);
         return -1;
     }
@@ -215,7 +244,7 @@
             {
                 lock_guard<mutex> lg(console_mutex);
 
-                perror("do_rd_inc_wr_twice: write(sg, READ_16)");
+                perror(" write(sg, READ_16)");
             }
             close(sg_fd);
             return -1;
@@ -225,7 +254,7 @@
             {
                 lock_guard<mutex> lg(console_mutex);
 
-                perror("do_rd_inc_wr_twice: write(sg, READ_16) 2");
+                perror(" write(sg, READ_16) 2");
             }
             close(sg_fd);
             return -1;
@@ -245,7 +274,7 @@
             {
                 lock_guard<mutex> lg(console_mutex);
 
-                perror("do_rd_inc_wr_twice: read(sg, READ_16)");
+                perror(" read(sg, READ_16)");
             }
             close(sg_fd);
             return -1;
@@ -287,7 +316,7 @@
                 {
                     lock_guard<mutex> lg(console_mutex);
 
-                    perror("do_rd_inc_wr_twice: read(sg, READ_16) 2");
+                    perror(" read(sg, READ_16) 2");
                 }
                 close(sg_fd);
                 return -1;
@@ -303,8 +332,8 @@
                 {
                     lock_guard<mutex> lg(console_mutex);
 
-                    fprintf(stderr, "Recovered error on READ_16, continuing "
-                            "2\n");
+                    fprintf(stderr, "%s: Recovered error on READ_16, "
+			    "continuing 2\n", __func__);
                 }
                 ok = 1;
                 break;
@@ -322,14 +351,12 @@
             return -1;
         }
 
-        u = (lb[0] << 24) + (lb[1] << 16) + (lb[2] << 8) + lb[3];
+        u = sg_get_unaligned_be32(lb);
+        // Assuming u starts test as even (probably 0), expect it to stay even
         if (0 == k)
             odd = (1 == (u % 2));
         ++u;
-        lb[0] = (u >> 24) & 0xff;
-        lb[1] = (u >> 16) & 0xff;
-        lb[2] = (u >> 8) & 0xff;
-        lb[3] = u & 0xff;
+        sg_put_unaligned_be32(u, lb);
 
         if (wait_ms > 0)       /* allow daylight for bad things ... */
             this_thread::sleep_for(milliseconds{wait_ms});
@@ -355,7 +382,7 @@
             {
                 lock_guard<mutex> lg(console_mutex);
 
-                perror("do_rd_inc_wr_twice: WRITE_16 SG_IO ioctl error");
+                perror(" WRITE_16 SG_IO ioctl error");
             }
             close(sg_fd);
             return -1;
@@ -370,7 +397,8 @@
             {
                 lock_guard<mutex> lg(console_mutex);
 
-                fprintf(stderr, "Recovered error on WRITE_16, continuing\n");
+                fprintf(stderr, "%s: Recovered error on WRITE_16, "
+			"continuing\n", __func__);
             }
             ok = 1;
             break;
@@ -391,6 +419,263 @@
     return odd;
 }
 
+/* Opens dev_name and spins if busy (i.e. gets EBUSY), sleeping for
+ * wait_ms milliseconds if wait_ms is positive.
+ * Reads lba (twice) and treats the first 4 bytes as an int (SCSI endian),
+ * increments it and writes it back. Repeats so that happens twice. Then
+ * closes dev_name. If an error occurs returns -1 else returns 0 if
+ * first int read from lba is even otherwise returns 1. */
+static int
+do_rd_inc_wr_twice_v4(const char * dev_name, unsigned int lba, int block,
+                      int excl, int wait_ms, int id, unsigned int & ebusy,
+                      unsigned int & eagains)
+{
+    int k, sg_fd, ok, res;
+    int odd = 0;
+    unsigned int u = 0;
+    struct sg_io_v4 pt, pt2;
+    unsigned char r16CmdBlk [READ16_CMD_LEN] =
+                {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
+    unsigned char w16CmdBlk [WRITE16_CMD_LEN] =
+                {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
+    unsigned char sense_buffer[64];
+    unsigned char lb[READ16_REPLY_LEN];
+    char ebuff[EBUFF_SZ];
+    int open_flags = O_RDWR;
+
+    sg_put_unaligned_be64(lba, r16CmdBlk + 2);
+    sg_put_unaligned_be64(lba, w16CmdBlk + 2);
+    if (! block)
+        open_flags |= O_NONBLOCK;
+    if (excl)
+        open_flags |= O_EXCL;
+
+    while (((sg_fd = open(dev_name, open_flags)) < 0) &&
+           (EBUSY == errno)) {
+        ++ebusy;
+        if (wait_ms > 0)
+            this_thread::sleep_for(milliseconds{wait_ms});
+        else if (0 == wait_ms)
+            this_thread::yield();
+        else if (-2 == wait_ms)
+            sleep(0);                   // process yield ??
+    }
+    if (sg_fd < 0) {
+        snprintf(ebuff, EBUFF_SZ, "%s: error opening file: %s", __func__,
+		 dev_name);
+        perror(ebuff);
+        return -1;
+    }
+
+    for (k = 0; k < 2; ++k) {
+        /* Prepare READ_16 command */
+        memset(&pt, 0, sizeof(pt));
+        pt.guard = 'Q';
+        pt.request_len = sizeof(r16CmdBlk);
+        pt.max_response_len = sizeof(sense_buffer);
+        // pt.dxfer_direction = SG_DXFER_FROM_DEV;
+        pt.din_xfer_len = READ16_REPLY_LEN;
+        pt.din_xferp = (uint64_t)(sg_uintptr_t)lb;
+        pt.request = (uint64_t)(sg_uintptr_t)r16CmdBlk;
+        pt.response = (uint64_t)(sg_uintptr_t)sense_buffer;
+        pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
+        pt.request_extra = id;	/* pack_id field */
+
+        // queue up two READ_16s to same LBA
+        if (ioctl(sg_fd, SG_IOSUBMIT, &pt) < 0) {
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                perror(" write(sg, READ_16)");
+            }
+            close(sg_fd);
+            return -1;
+        }
+        pt2 = pt;
+        if (ioctl(sg_fd, SG_IOSUBMIT, &pt2) < 0) {
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                perror(" write(sg, READ_16) 2");
+            }
+            close(sg_fd);
+            return -1;
+        }
+
+        while (((res = ioctl(sg_fd, SG_IORECEIVE, &pt)) < 0) &&
+               (EAGAIN == errno)) {
+            ++eagains;
+            if (wait_ms > 0)
+                this_thread::sleep_for(milliseconds{wait_ms});
+            else if (0 == wait_ms)
+                this_thread::yield();
+            else if (-2 == wait_ms)
+                sleep(0);                   // process yield ??
+        }
+        if (res < 0) {
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                perror(" read(sg, READ_16)");
+            }
+            close(sg_fd);
+            return -1;
+        }
+        /* now for the error processing */
+        ok = 0;
+	switch (sg_err_category_new(pt.device_status, pt.transport_status,
+	        pt.driver_status, sense_buffer, pt.response_len)) {
+        case SG_LIB_CAT_CLEAN:
+            ok = 1;
+            break;
+        case SG_LIB_CAT_RECOVERED:
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                fprintf(stderr, "Recovered error on READ_16, continuing\n");
+            }
+            ok = 1;
+            break;
+        default: /* won't bother decoding other categories */
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+		sg_linux_sense_print("READ_16 command error",
+				     pt.device_status, pt.transport_status,
+				     pt.driver_status, sense_buffer,
+				     pt.response_len, true);
+                // sg_chk_n_print3("READ_16 command error", &pt, 1);
+            }
+            break;
+        }
+        if (ok) {
+            while (((res = ioctl(sg_fd, SG_IORECEIVE, &pt2)) < 0) &&
+                   (EAGAIN == errno)) {
+                ++eagains;
+                if (wait_ms > 0)
+                    this_thread::sleep_for(milliseconds{wait_ms});
+                else if (0 == wait_ms)
+                    this_thread::yield();
+                else if (-2 == wait_ms)
+                    sleep(0);                   // process yield ??
+            }
+            if (res < 0) {
+                {
+                    lock_guard<mutex> lg(console_mutex);
+
+                    perror(" read(sg, READ_16) 2");
+                }
+                close(sg_fd);
+                return -1;
+            }
+            pt = pt2;
+            /* now for the error processing */
+            ok = 0;
+	    switch (sg_err_category_new(pt.device_status, pt.transport_status,
+	            pt.driver_status, sense_buffer, pt.response_len)) {
+            case SG_LIB_CAT_CLEAN:
+                ok = 1;
+                break;
+            case SG_LIB_CAT_RECOVERED:
+                {
+                    lock_guard<mutex> lg(console_mutex);
+
+                    fprintf(stderr, "%s: Recovered error on READ_16, "
+			    "continuing 2\n", __func__);
+                }
+                ok = 1;
+                break;
+            default: /* won't bother decoding other categories */
+                {
+                    lock_guard<mutex> lg(console_mutex);
+
+		    sg_linux_sense_print("READ_16 command error 2",
+				         pt.device_status,
+					 pt.transport_status,
+				         pt.driver_status, sense_buffer,
+				         pt.response_len, true);
+                    // sg_chk_n_print3("READ_16 command error 2", &pt, 1);
+                }
+                break;
+            }
+        }
+        if (! ok) {
+            close(sg_fd);
+            return -1;
+        }
+
+        u = sg_get_unaligned_be32(lb);
+        // Assuming u starts test as even (probably 0), expect it to stay even
+        if (0 == k)
+            odd = (1 == (u % 2));
+        ++u;
+        sg_put_unaligned_be32(u, lb);
+
+        if (wait_ms > 0)       /* allow daylight for bad things ... */
+            this_thread::sleep_for(milliseconds{wait_ms});
+        else if (0 == wait_ms)
+            this_thread::yield();
+        else if (-2 == wait_ms)
+            sleep(0);                   // process yield ??
+
+        /* Prepare WRITE_16 command */
+        memset(&pt, 0, sizeof(pt));
+        pt.guard = 'Q';
+        pt.request_len = sizeof(w16CmdBlk);
+        pt.max_response_len = sizeof(sense_buffer);
+        // pt.dxfer_direction = SG_DXFER_TO_DEV;
+        pt.dout_xfer_len = WRITE16_REPLY_LEN;
+        pt.dout_xferp = (uint64_t)(sg_uintptr_t)lb;
+        pt.request = (uint64_t)(sg_uintptr_t)w16CmdBlk;
+        pt.response = (uint64_t)(sg_uintptr_t)sense_buffer;
+        pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
+        pt.request_extra = id;	/* pack_id field */
+
+        if (ioctl(sg_fd, SG_IO, &pt) < 0) {
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                perror(" WRITE_16 SG_IO ioctl error");
+            }
+            close(sg_fd);
+            return -1;
+        }
+        /* now for the error processing */
+        ok = 0;
+        switch (sg_err_category_new(pt.device_status, pt.transport_status,
+                pt.driver_status, sense_buffer, pt.response_len)) {
+        case SG_LIB_CAT_CLEAN:
+            ok = 1;
+            break;
+        case SG_LIB_CAT_RECOVERED:
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+                fprintf(stderr, "%s: Recovered error on WRITE_16, "
+			"continuing\n", __func__);
+            }
+            ok = 1;
+            break;
+        default: /* won't bother decoding other categories */
+            {
+                lock_guard<mutex> lg(console_mutex);
+
+		sg_linux_sense_print("WRITE_16 command error",
+				     pt.device_status, pt.transport_status,
+				     pt.driver_status, sense_buffer,
+				     pt.response_len, true);
+            }
+            break;
+        }
+        if (! ok) {
+            close(sg_fd);
+            return -1;
+        }
+    }
+    close(sg_fd);
+    return odd;
+}
+
 
 
 #define INQ_REPLY_LEN 96
@@ -500,8 +785,18 @@
              << block << endl;
     }
     for (k = 0; k < num; ++k) {
-        res = do_rd_inc_wr_twice(dev_name, lba, block, excl, wait_ms, k,
-                                 thr_ebusy_count, thr_eagain_count);
+	if (sg_ifc_ver == 3)
+            res = do_rd_inc_wr_twice_v3(dev_name, lba, block, excl, wait_ms,
+					k, thr_ebusy_count, thr_eagain_count);
+	else if (sg_ifc_ver == 4)
+            res = do_rd_inc_wr_twice_v4(dev_name, lba, block, excl, wait_ms,
+					k, thr_ebusy_count, thr_eagain_count);
+	else {
+	    lock_guard<mutex> lg(console_mutex);
+
+	    cerr << "sg_ifc_ver=" << sg_ifc_ver << " not supported" << endl;
+	    res = -1;
+	}
         if (res < 0)
             break;
         if (res)
@@ -548,6 +843,12 @@
         else if (0 == memcmp("-h", argv[k], 2)) {
             usage();
             return 0;
+        } else if (0 == memcmp("-i", argv[k], 2)) {
+            ++k;
+            if ((k < argc) && isdigit(*argv[k]))
+                sg_ifc_ver = atoi(argv[k]);
+            else
+                break;
         } else if (0 == memcmp("-l", argv[k], 2)) {
             ++k;
             if ((k < argc) && isdigit(*argv[k]))
diff --git a/testing/sg_tst_excl2.cpp b/testing/sg_tst_excl2.cpp
index 9a03906..491d7fc 100644
--- a/testing/sg_tst_excl2.cpp
+++ b/testing/sg_tst_excl2.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 Douglas Gilbert.
+ * Copyright (c) 2013-2019 Douglas Gilbert.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,8 +45,9 @@
 #include <sys/stat.h>
 #include "sg_lib.h"
 #include "sg_pt.h"
+#include "sg_unaligned.h"
 
-static const char * version_str = "1.08 20181207";
+static const char * version_str = "1.09 20190321";
 static const char * util_name = "sg_tst_excl2";
 
 /* This is a test program for checking O_EXCL on open() works. It uses
@@ -209,10 +210,8 @@
     char ebuff[EBUFF_SZ];
     int open_flags = O_RDWR;
 
-    r16CmdBlk[6] = w16CmdBlk[6] = (lba >> 24) & 0xff;
-    r16CmdBlk[7] = w16CmdBlk[7] = (lba >> 16) & 0xff;
-    r16CmdBlk[8] = w16CmdBlk[8] = (lba >> 8) & 0xff;
-    r16CmdBlk[9] = w16CmdBlk[9] = lba & 0xff;
+    sg_put_unaligned_be64(lba, r16CmdBlk + 2);
+    sg_put_unaligned_be64(lba, w16CmdBlk + 2);
     if (! block)
         open_flags |= O_NONBLOCK;
     if (excl)
@@ -267,15 +266,12 @@
             goto err;
         }
 
-        u = (lb[0] << 24) + (lb[1] << 16) + (lb[2] << 8) + lb[3];
+        u = sg_get_unaligned_be32(lb);
         // Assuming u starts test as even (probably 0), expect it to stay even
         if (0 == k)
             odd = (1 == (u % 2));
         ++u;
-        lb[0] = (u >> 24) & 0xff;
-        lb[1] = (u >> 16) & 0xff;
-        lb[2] = (u >> 8) & 0xff;
-        lb[3] = u & 0xff;
+	sg_put_unaligned_be32(u, lb);
 
         if (wait_ms > 0)       /* allow daylight for bad things ... */
             this_thread::sleep_for(milliseconds{wait_ms});
diff --git a/testing/sg_tst_excl3.cpp b/testing/sg_tst_excl3.cpp
index bd15389..6d32a4f 100644
--- a/testing/sg_tst_excl3.cpp
+++ b/testing/sg_tst_excl3.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2018 Douglas Gilbert.
+ * Copyright (c) 2013-2019 Douglas Gilbert.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,10 +43,12 @@
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+
 #include "sg_lib.h"
 #include "sg_pt.h"
+#include "sg_unaligned.h"
 
-static const char * version_str = "1.06 20181207";
+static const char * version_str = "1.07 20190321";
 static const char * util_name = "sg_tst_excl3";
 
 /* This is a test program for checking O_EXCL on open() works. It uses
@@ -54,7 +56,7 @@
  * to "break" O_EXCL. The strategy is to open a device O_EXCL|O_NONBLOCK
  * and do a double increment on a LB then close it from a single thread.
  * the remaining threads open that device O_NONBLOCK and do a read and
- * note of the number is odd. Assuming the count starts as an even
+ * note if the number is odd. Assuming the count starts as an even
  * (typically 0) then it should remain even. Odd instances
  * are counted and reported at the end of the program, after all threads
  * have completed.
@@ -215,10 +217,8 @@
     char ebuff[EBUFF_SZ];
     int open_flags = O_RDWR;
 
-    r16CmdBlk[6] = w16CmdBlk[6] = (lba >> 24) & 0xff;
-    r16CmdBlk[7] = w16CmdBlk[7] = (lba >> 16) & 0xff;
-    r16CmdBlk[8] = w16CmdBlk[8] = (lba >> 8) & 0xff;
-    r16CmdBlk[9] = w16CmdBlk[9] = lba & 0xff;
+    sg_put_unaligned_be64(lba, r16CmdBlk + 2);
+    sg_put_unaligned_be64(lba, w16CmdBlk + 2);
     if (! block)
         open_flags |= O_NONBLOCK;
     if (excl)
@@ -273,7 +273,7 @@
             goto err;
         }
 
-        u = (lb[0] << 24) + (lb[1] << 16) + (lb[2] << 8) + lb[3];
+	u = sg_get_unaligned_be32(lb);
         // Assuming u starts test as even (probably 0), expect it to stay even
         if (0 == k)
             odd = (1 == (u % 2));
@@ -288,10 +288,7 @@
         if (read_only)
             break;
         ++u;
-        lb[0] = (u >> 24) & 0xff;
-        lb[1] = (u >> 16) & 0xff;
-        lb[2] = (u >> 8) & 0xff;
-        lb[3] = u & 0xff;
+	sg_put_unaligned_be32(u, lb);
 
         /* Prepare WRITE_16 command */
         clear_scsi_pt_obj(ptp);
diff --git a/testing/sg_tst_ioctl.c b/testing/sg_tst_ioctl.c
index ea3dde9..5e6b0ef 100644
--- a/testing/sg_tst_ioctl.c
+++ b/testing/sg_tst_ioctl.c
@@ -15,6 +15,9 @@
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <ctype.h>
 #include <string.h>
 #include <errno.h>
 #include <sys/ioctl.h>
@@ -53,9 +56,9 @@
  * later of the Linux sg driver.  */
 
 
-static const char * version_str = "Version: 1.04  20190201";
+static const char * version_str = "Version: 1.06  20190323";
 
-#define INQ_REPLY_LEN 96
+#define INQ_REPLY_LEN 128
 #define INQ_CMD_LEN 6
 #define SDIAG_CMD_LEN 6
 #define SENSE_BUFFER_LEN 96
@@ -80,11 +83,14 @@
 static bool ioctl_only = false;
 static bool q_at_tail = false;
 static bool write_only = false;
+static bool mrq_immed = false;  /* if set, also sets mrq_iosubmit */
+static bool mrq_iosubmit = false;
 
 static int childs_pid = 0;
 static int q_len = DEF_Q_LEN;
 static int sleep_secs = 0;
 static int reserve_buff_sz = DEF_RESERVE_BUFF_SZ;
+static int num_mrqs = 0;
 static int verbose = 0;
 
 static const char * relative_cp = NULL;
@@ -93,13 +99,20 @@
 static void
 usage(void)
 {
-    printf("Usage: sg_tst_ioctl [-f] [-h] [-l=Q_LEN] [-o] [-r=SZ] [-s=SEC] "
-           "[-t]\n"
-           "                    [-v] [-V] [-w] <sg_device> [<sg_device2>]\n"
+    printf("Usage: sg_tst_ioctl [-f] [-h] [-l=Q_LEN] [-m=MRQS[,I|S]] [-r=SZ] "
+           "[-s=SEC]\n"
+           "                    [-t] [-v] [-V] [-w] <sg_device> "
+           "[<sg_device2>]\n"
            " where:\n"
            "      -f      fork and test share between processes\n"
            "      -h      help: print usage message then exit\n"
            "      -l=Q_LEN    queue length, between 1 and 511 (def: 16)\n"
+           "      -m=MRQS[,I|S]    test multi-req, MRQS number to do; if "
+           "the letter\n"
+           "                     'I' is appended after a comma, then do "
+           "IMMED mrq;\n"
+           "                     'S' is appended, then use "
+           "ioctl(SG_IOSUBMIT)\n"
            "      -o      ioctls only, then exit\n"
            "      -r=SZ     reserve buffer size in KB (def: 256 --> 256 "
            "KB)\n"
@@ -240,7 +253,6 @@
     seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_UNSHARE;
     seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_MASTER_FINI;
     seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_MASTER_ERR;
-    seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_CHECK_FOR_MORE;
     seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
 
     if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
@@ -286,9 +298,6 @@
         if (SG_CTL_FLAGM_MASTER_ERR & seip->ctl_flags_rd_mask)
             printf("  %sMASTER_ERR: %s\n", cp,
                    (SG_CTL_FLAGM_MASTER_ERR & cflags) ? "true" : "false");
-        if (SG_CTL_FLAGM_CHECK_FOR_MORE & seip->ctl_flags_rd_mask)
-            printf("  %sCHECK_FOR_MORE: %s\n", cp,
-                   (SG_CTL_FLAGM_CHECK_FOR_MORE & cflags) ? "true" : "false");
     }
     if (SG_SEIM_MINOR_INDEX & seip->sei_rd_mask)
         printf("  %sminor_index: %u\n", cp, seip->minor_index);
@@ -437,14 +446,134 @@
     return 0;
 }
 
-#include <linux/fs.h>
-#include <linux/blktrace_api.h>
+static int
+do_mrqs(int sg_fd, int sg_fd2, int mrqs)
+{
+    bool both = (sg_fd2 >= 0);
+    int k, arr_v4_sz, good;
+    int res = 0;
+    struct sg_io_v4 * arr_v4;
+    struct sg_io_v4 * h4p;
+    struct sg_io_v4 * mrq_h4p;
+    struct sg_io_v4 mrq_h4;
+    uint8_t sense_buffer[SENSE_BUFFER_LEN];
+    uint8_t inq_cdb[INQ_CMD_LEN] =      /* Device Id VPD page */
+                                {0x12, 0x1, 0x83, 0, INQ_REPLY_LEN, 0};
+    uint8_t sdiag_cdb[SDIAG_CMD_LEN] =
+                                {0x1d, 0x10 /* PF */, 0, 0, 0, 0};
+    uint8_t inqBuff[INQ_REPLY_LEN];
+
+    if (both) {
+        struct sg_extended_info sei;
+        struct sg_extended_info * seip;
+
+        seip = &sei;
+        memset(seip, 0, sizeof(*seip));
+        seip->sei_wr_mask |= SG_SEIM_SHARE_FD;
+        seip->sei_rd_mask |= SG_SEIM_SHARE_FD;
+        seip->share_fd = sg_fd;         /* master */
+        if (ioctl(sg_fd2, SG_SET_GET_EXTENDED, seip) < 0) {
+            res = errno;
+            pr2serr("ioctl(sg_fd2, SG_SET_GET_EXTENDED) shared_fd, "
+                    "failed errno=%d %s\n", res, strerror(res));
+            return res;
+        }
+    }
+    memset(inqBuff, 0, sizeof(inqBuff));
+    mrq_h4p = &mrq_h4;
+    memset(mrq_h4p, 0, sizeof(*mrq_h4p));
+    mrq_h4p->guard = 'Q';
+    mrq_h4p->flags = SGV4_FLAG_MULTIPLE_REQS;
+    if (mrq_immed)
+        mrq_h4p->flags |= SGV4_FLAG_IMMED;
+    arr_v4 = calloc(mrqs, sizeof(struct sg_io_v4));
+    if (NULL == arr_v4) {
+        res = ENOMEM;
+        goto fini;
+    }
+    arr_v4_sz = mrqs * sizeof(struct sg_io_v4);
+
+    for (k = 0; k < mrqs; ++k) {
+        h4p = arr_v4 + k;
+
+        h4p->guard = 'Q';
+        /* ->protocol and ->subprotocol are already zero */
+        /* io_hdr[k].iovec_count = 0; */  /* memset takes care of this */
+        if (0 == (k % 2)) {
+            h4p->request_len = sizeof(sdiag_cdb);
+            h4p->request = (uint64_t)sdiag_cdb;
+            /* all din and dout fields are zero */
+        } else {
+            h4p->request_len = sizeof(inq_cdb);
+            h4p->request = (uint64_t)inq_cdb;
+            h4p->din_xfer_len = INQ_REPLY_LEN;
+            h4p->din_xferp = (uint64_t)inqBuff;
+            if (both)
+                h4p->flags |= SGV4_FLAG_DO_ON_OTHER;
+        }
+        h4p->response = (uint64_t)sense_buffer;
+        h4p->max_response_len = sizeof(sense_buffer);
+        h4p->timeout = 20000;     /* 20000 millisecs == 20 seconds */
+        h4p->request_extra = k + 3;      /* so pack_id doesn't start at 0 */
+        /* default is to queue at head (in SCSI mid level) */
+        if (q_at_tail)
+            h4p->flags |= SG_FLAG_Q_AT_TAIL;
+        else
+            h4p->flags |= SG_FLAG_Q_AT_HEAD;
+    }
+    mrq_h4p->din_xferp = (uint64_t)arr_v4;
+    mrq_h4p->din_xfer_len = arr_v4_sz;
+    mrq_h4p->dout_xferp = mrq_h4p->din_xferp;
+    mrq_h4p->dout_xfer_len = mrq_h4p->din_xfer_len;
+    if (ioctl(sg_fd, (mrq_iosubmit ? SG_IOSUBMIT : SG_IO), mrq_h4p) < 0) {
+        res = errno;
+        pr2serr("ioctl(SG_IO%s, mrq) failed, errno=%d %s\n",
+                (mrq_iosubmit ? "SUBMIT" : ""), res, strerror(res));
+        goto fini;
+    }
+    if (mrq_immed) {
+mrq_h4p->flags = SGV4_FLAG_MULTIPLE_REQS;       // zap SGV4_FLAG_IMMED
+        if (ioctl(sg_fd, SG_IORECEIVE, mrq_h4p) < 0) {
+            res = errno;
+            pr2serr("ioctl(SG_IORECEIVE, mrq) failed, errno=%d %s\n",
+                    res, strerror(res));
+            goto fini;
+        }
+    }
+
+    for (k = 0, good = 0; k < mrqs; ++k) {
+        h4p = arr_v4 + k;
+        if (! (h4p->driver_status || h4p->transport_status ||
+               h4p->device_status)) {
+            if (h4p->info & SG_INFO_MRQ_FINI)
+                ++good;
+        }
+    }
+    if (good > 0) {
+        printf("Final INQUIRY response:\n");
+        hex2stdout(inqBuff, INQ_REPLY_LEN, 0);
+    }
+    printf("Good responses: %d, bad responses: %d\n", good, mrqs - good);
+    if (mrq_h4p->driver_status != 0)
+        printf("Master mrq object: driver_status=%d\n",
+               mrq_h4p->driver_status);
+    h4p = arr_v4 + mrqs - 1;
+    if (h4p->driver_status != 0)
+        printf("Last mrq object: driver_status=%d\n", h4p->driver_status);
+
+fini:
+    if (arr_v4)
+        free(arr_v4);
+    return res;
+}
+
 
 int
 main(int argc, char * argv[])
 {
     bool done;
     int sg_fd, k, ok, ver_num, pack_id, num_waiting;
+    int res = 0;
     int sg_fd2 = -1;
     int sock = -1;
     uint8_t inq_cdb[INQ_CMD_LEN] =
@@ -479,6 +608,26 @@
                 file_name = 0;
                 break;
             }
+        } else if (0 == memcmp("-m=", argv[k], 3)) {
+            num_mrqs = sg_get_num(argv[k] + 3);
+            if (num_mrqs < 1) {
+                printf("Expect -m= to take a number greater than 0\n");
+                file_name = 0;
+                break;
+            }
+            if ((cp = strchr(argv[k] + 3, ','))) {
+                mrq_iosubmit = true;
+                if (toupper(cp[1]) == 'I')
+                    mrq_immed = true;
+                else if (toupper(cp[1]) == 'S')
+                    ;
+                else {
+                    printf("-m= option expects 'A' or 'a' as a suffix, "
+                           "after comma\n");
+                    file_name = 0;
+                    break;
+                }
+            }
         } else if (0 == memcmp("-o", argv[k], 2))
             ioctl_only = true;
         else if (0 == memcmp("-r=", argv[k], 3)) {
@@ -562,6 +711,11 @@
                     second_fname, sg_fd2);
     }
 
+    if (num_mrqs > 0) {
+        res = do_mrqs(sg_fd, sg_fd2, num_mrqs);
+        goto out;
+    }
+
     if (do_fork) {
         int pid;
         int sv[2];
@@ -731,5 +885,5 @@
     close(sg_fd);
     if (sg_fd2 >= 0)
         close(sg_fd2);
-    return 0;
+    return res;
 }
diff --git a/testing/sgh_dd.c b/testing/sgh_dd.cpp
similarity index 90%
rename from testing/sgh_dd.c
rename to testing/sgh_dd.cpp
index 5ff9e71..0fdfb36 100644
--- a/testing/sgh_dd.c
+++ b/testing/sgh_dd.cpp
@@ -22,7 +22,7 @@
  * in this case) are transferred to or from the sg device in a single SCSI
  * command.
  *
- * This version is designed for the linux kernel 2.4, 2.6, 3 and 4 series.
+ * This version is designed for the linux kernel 2.4, 2.6, 3, 4 and 5 series.
  *
  * sgp_dd is a Posix threads specialization of the sg_dd utility. Both
  * sgp_dd and sg_dd only perform special tasks when one or both of the given
@@ -46,7 +46,6 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <stdbool.h>
-#include <stdatomic.h>
 #include <string.h>
 #include <ctype.h>
 #include <errno.h>
@@ -67,6 +66,10 @@
 #include <linux/fs.h>           /* for BLKSSZGET and friends */
 #include <sys/mman.h>           /* for mmap() system call */
 
+#include <vector>
+#include <array>
+#include <atomic>       // C++ header replacing <stdatomic.h>
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -98,7 +101,9 @@
 #include "sg_pr2serr.h"
 
 
-static const char * version_str = "1.20 20190212";
+using namespace std;
+
+static const char * version_str = "1.21 20190324";
 
 #ifdef __GNUC__
 #ifndef  __clang__
@@ -107,7 +112,7 @@
 #endif
 
 /* <<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>   xxxxxxxxxx   beware next line */
-#define SGH_DD_READ_COMPLET_AFTER 1
+// #define SGH_DD_READ_COMPLET_AFTER 1
 
 #define DEF_BLOCK_SIZE 512
 #define DEF_BLOCKS_PER_TRANSFER 128
@@ -125,7 +130,8 @@
 #define SGP_READ10 0x28
 #define SGP_WRITE10 0x2a
 #define DEF_NUM_THREADS 4
-#define MAX_NUM_THREADS SG_MAX_QUEUE
+#define MAX_NUM_THREADS 1024 /* was SG_MAX_QUEUE with v3 driver */
+#define DEF_NUM_MRQS 0
 
 #ifndef RAW_MAJOR
 #define RAW_MAJOR 255   /*unlikely value */
@@ -143,6 +149,8 @@
 
 #define EBUFF_SZ 768
 
+#define PROC_SCSI_SG_VERSION "/proc/scsi/sg/version"
+
 struct flags_t {
     bool append;
     bool coe;
@@ -160,6 +168,7 @@
     bool swait;
     bool v3;
     bool v4;
+    bool v4_given;
 };
 
 typedef struct global_collection
@@ -177,6 +186,7 @@
     int in_partial;                   /*  | */
     bool in_stop;                     /*  | */
     pthread_mutex_t in_mutex;         /* -/ */
+    int nmrqs;                        /* Number of multi-reqs for sg v4 */
     int outfd;
     int64_t seek;
     int out_type;
@@ -212,10 +222,12 @@
 {       /* one instance per worker thread */
     bool wr;
     bool has_share;
+    bool both_sg;
     bool swait; /* interleave READ WRITE async copy segment: READ submit,
                  * WRITE submit, READ receive, WRITE receive */
     int id;
     int infd;
+    int nmrqs;
     int outfd;
     int out2fd;
     int outregfd;
@@ -249,30 +261,39 @@
     pthread_t a_pthr;
 } Thread_info;
 
-static atomic_int mono_pack_id = 0;
-static atomic_long pos_index = 0;
+// typedef vector< pair<int, struct sg_io_v4> > mrq_arr_t;
+typedef array<uint8_t, 32> cmd_at;
+typedef pair< vector<struct sg_io_v4>, vector<cmd_at> >   mrq_arr_t;
+
+static atomic<int> mono_pack_id(0);
+static atomic<long int> pos_index(0);
 
 static sigset_t signal_set;
 static pthread_t sig_listen_thread_id;
 
 static const char * proc_allow_dio = "/proc/scsi/sg/allow_dio";
 
-static void sg_in_rd_cmd(Gbl_coll * clp, Rq_elem * rep);
-static void sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, bool is_wr2);
+static void sg_in_rd_cmd(Gbl_coll * clp, Rq_elem * rep, mrq_arr_t & def_arr);
+static void sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, mrq_arr_t & def_arr,
+                          bool is_wr2);
 static bool normal_in_rd(Gbl_coll * clp, Rq_elem * rep, int blocks);
 static void normal_out_wr(Gbl_coll * clp, Rq_elem * rep, int blocks);
-static int sg_start_io(Rq_elem * rep, bool is_wr2);
+static int sg_start_io(Rq_elem * rep, mrq_arr_t & def_arr, bool is_wr2);
 static int sg_finish_io(bool wr, Rq_elem * rep, bool is_wr2);
 static int sg_in_open(Gbl_coll *clp, const char *inf, uint8_t **mmpp,
                       int *mmap_len);
 static int sg_out_open(Gbl_coll *clp, const char *outf, uint8_t **mmpp,
                        int *mmap_len);
-static void sg_in_out_interleave(Gbl_coll *clp, Rq_elem * rep);
+static void sg_in_out_interleave(Gbl_coll *clp, Rq_elem * rep,
+                                 mrq_arr_t & def_arr);
+static int sgh_do_def(Rq_elem * rep, mrq_arr_t & def_arr);
 
 #define STRERR_BUFF_LEN 128
 
 static pthread_mutex_t strerr_mut = PTHREAD_MUTEX_INITIALIZER;
 
+static bool have_sg_version = false;
+static int sg_version = 0;
 static bool shutting_down = false;
 static bool do_sync = false;
 static bool do_time = true;
@@ -347,7 +368,6 @@
     pthread_mutex_unlock(&strerr_mut);
 }
 
-
 static void
 lk_chk_n_print4(const char * leadin, struct sg_io_v4 * h4p, bool raw_sinfo)
 {
@@ -359,6 +379,57 @@
 }
 
 static void
+hex2stderr_lk(const uint8_t * b_str, int len, int no_ascii)
+{
+    pthread_mutex_lock(&strerr_mut);
+    hex2stderr(b_str, len, no_ascii);
+    pthread_mutex_unlock(&strerr_mut);
+}
+
+static void
+v4hdr_out_lk(const char * leadin, const sg_io_v4 * h4p)
+{
+    pthread_mutex_lock(&strerr_mut);
+    if (leadin)
+        pr2serr("%s\n", leadin);
+    if (('Q' != h4p->guard) || (0 != h4p->protocol) ||
+        (0 != h4p->subprotocol))
+        pr2serr("  <<<sg_io_v4 _NOT_ properly set>>>\n");
+    pr2serr("  pointers: cdb=%s  sense=%s  din=%s  dout=%s\n",
+            (h4p->request ? "y" : "NULL"), (h4p->response ? "y" : "NULL"),
+            (h4p->din_xferp ? "y" : "NULL"),
+            (h4p->dout_xferp ? "y" : "NULL"));
+    pr2serr("  lengths: cdb=%u  sense=%u  din=%u  dout=%u\n",
+            h4p->request_len, h4p->max_response_len, h4p->din_xfer_len,
+             h4p->dout_xfer_len);
+    pr2serr("  flags=0x%x  request_extra=0x%x   OUT--> response_len=%d\n",
+            h4p->flags, h4p->request_extra, h4p->response_len);
+    pr2serr("  driver_status=0x%x transport_status=0x%x device_status=0x%x\n",
+            h4p->driver_status, h4p->transport_status, h4p->device_status);
+    pr2serr("  info=0x%x  din_resid=%u  dout_resid=%u  spare_out=%u\n",
+            h4p->info, h4p->din_resid, h4p->dout_resid, h4p->spare_out);
+    pthread_mutex_unlock(&strerr_mut);
+}
+
+
+static void
+fetch_sg_version(void)
+{
+    FILE * fp;
+    char b[96];
+
+    have_sg_version = false;
+    sg_version = 0;
+    fp = fopen(PROC_SCSI_SG_VERSION, "r");
+    if (fp && fgets(b, sizeof(b) - 1, fp)) {
+        if (1 == sscanf(b, "%d", &sg_version))
+            have_sg_version = !!sg_version;
+    }
+    if (fp)
+        fclose(fp);
+}
+
+static void
 calc_duration_throughput(int contin)
 {
     struct timeval end_tm, res_tm;
@@ -530,10 +601,10 @@
     pr2serr("               [ae=AEN] [bpt=BPT] [cdbsz=6|10|12|16] [coe=0|1] "
             "[deb=VERB]\n"
             "               [dio=0|1] [elemsz_kb=ESK] [fua=0|1|2|3] "
-            "[of2=OFILE2]\n"
-            "               [ofreg=OFREG] [sync=0|1] [thr=THR] [time=0|1] "
-            "[verbose=VERB]\n"
-            "               [--dry-run] [--verbose]\n\n"
+            "[mrq=NRQS]\n"
+            "               [of2=OFILE2] [ofreg=OFREG] [sync=0|1] [thr=THR] "
+            "[time=0|1]\n"
+            "               [verbose=VERB] [--dry-run] [--verbose]\n\n"
             "  where the main options (shown in first group above) are:\n"
             "    bs          must be device logical block size (default "
             "512)\n"
@@ -563,8 +634,8 @@
             "is Linux specific and uses the v4 sg driver\n'share' capability "
             "if available. Use '-hh' or '-hhh' for more information.\n"
 #ifdef SGH_DD_READ_COMPLET_AFTER
-	    "\nIn this version oflag=swait does read completion _after_ "
-	    "write completion\n"
+            "\nIn this version oflag=swait does read completion _after_ "
+            "write completion\n"
 #endif
            );
     return;
@@ -577,6 +648,7 @@
             "are:\n"
             "    ae          abort every n commands (def: 0 --> don't abort "
             "any)\n"
+            "                [requires commands with > 1 ms duration]\n"
             "    bpt         is blocks_per_transfer (default is 128)\n"
             "    cdbsz       size of SCSI READ or WRITE cdb (default is 10)\n"
             "    coe         continue on error, 0->exit (def), "
@@ -589,6 +661,8 @@
             "    fua         force unit access: 0->don't(def), 1->OFILE, "
             "2->IFILE,\n"
             "                3->OFILE+IFILE\n"
+            "    mrq         even number of cmds placed in each sg call "
+            "(def: 0)\n"
             "    ofreg       OFREG is regular file or pipe to send what is "
             "read from\n"
             "                IFILE in the first half of each shared element\n"
@@ -633,9 +707,9 @@
             "is finished;\n"
             "                [oflag only] and IFILE and OFILE must be sg "
             "devices\n"
-            "    v3          use v3 sg interface which is the default (also "
-            "see v4)\n"
-            "    v4          use v4 sg interface (def: v3 unless other side "
+            "    v3          use v3 sg interface (def: v3 unless sg driver "
+            "is v4)\n"
+            "    v4          use v4 sg interface (def: v3 unless sg driver "
             "is v4)\n"
             "\n"
             "Copies IFILE to OFILE (and to OFILE2 if given). If IFILE and "
@@ -816,6 +890,7 @@
     bool own_outfd = false;
     bool own_out2fd = false;
     bool share_and_ofreg;
+    mrq_arr_t def_arr;  /* MRQ deferred array (vector) */
 
     tip = (Thread_info *)v_tip;
     clp = tip->gcp;
@@ -825,7 +900,7 @@
     /* Following clp members are constant during lifetime of thread */
     rep->id = tip->id;
     if (vb > 0)
-        pr2serr_lk("Starting worker thread %d\n", rep->id);
+        pr2serr_lk("%d <-- Starting worker thread\n", rep->id);
     if (! clp->in_flags.mmap) {
         rep->buffp = sg_memalign(sz, 0 /* page align */, &rep->alloc_bp,
                                  false);
@@ -843,9 +918,14 @@
     rep->cdbsz_out = clp->cdbsz_out;
     rep->in_flags = clp->in_flags;
     rep->out_flags = clp->out_flags;
+    rep->nmrqs = clp->nmrqs;
     rep->aen = clp->aen;
     rep->rep_count = 0;
 
+    if ((FT_SG == clp->in_type) && (FT_SG == clp->out_type) &&
+        (rep->infd != rep->outfd))
+        rep->both_sg = true;
+
     if (rep->in_flags.same_fds || rep->out_flags.same_fds) {
         /* we are sharing a single pair of fd_s across all threads */
         if (rep->out_flags.swait && (! swait_reported)) {
@@ -924,16 +1004,18 @@
     /* vvvvvvvvvvvvvv  Main segment copy loop  vvvvvvvvvvvvvvvvvvvvvvv */
     while (1) {
         rep->wr = false;
-        my_index = atomic_fetch_add(&pos_index, clp->bpt);
+        my_index = atomic_fetch_add(&pos_index, (long int)clp->bpt);
         /* Start of READ half of a segment */
         status = pthread_mutex_lock(&clp->in_mutex);
         if (0 != status) err_exit(status, "lock in_mutex");
+#if 0
         if (clp->in_stop || (clp->in_count <= 0)) {
             /* no more to do, exit loop then thread */
             status = pthread_mutex_unlock(&clp->in_mutex);
             if (0 != status) err_exit(status, "unlock in_mutex");
             break;
         }
+#endif
         if (dd_count >= 0) {
             if (my_index >= dd_count) {
                 status = pthread_mutex_unlock(&clp->in_mutex);
@@ -956,9 +1038,9 @@
         pthread_cleanup_push(cleanup_in, (void *)clp);
         if (FT_SG == clp->in_type) {
             if (rep->swait)
-                sg_in_out_interleave(clp, rep);
+                sg_in_out_interleave(clp, rep, def_arr);
             else
-                sg_in_rd_cmd(clp, rep); /* unlocks in_mutex mid operation */
+                sg_in_rd_cmd(clp, rep, def_arr); /* unlocks in_mutex mid op */
         } else {
             stop_after_write = normal_in_rd(clp, rep, blocks);
             status = pthread_mutex_unlock(&clp->in_mutex);
@@ -988,6 +1070,7 @@
         }
 
 skip_force_out_sequence:
+#if 0
         if (clp->out_stop || (clp->out_count <= 0)) {
             if (! clp->out_stop)
                 clp->out_stop = true;
@@ -995,6 +1078,7 @@
             if (0 != status) err_exit(status, "unlock out_mutex");
             break;
         }
+#endif
         if (stop_after_write)
             clp->out_stop = true;
 
@@ -1002,6 +1086,12 @@
         clp->out_count -= blocks;
 
         if (0 == rep->num_blks) {
+            if ((rep->nmrqs > 0) && (def_arr.first.size() > 0)) {
+                if (rep->debug)
+                    pr2serr_lk("thread=%d: tail-end, to_do=%u\n", rep->id,
+                               (uint32_t)def_arr.first.size());
+                sgh_do_def(rep, def_arr);
+            }
             clp->out_stop = true;
             stop_after_write = true;
             status = pthread_mutex_unlock(&clp->out_mutex);
@@ -1027,7 +1117,7 @@
                 status = pthread_mutex_unlock(&clp->out_mutex);
                 if (0 != status) err_exit(status, "unlock out_mutex");
             } else
-                sg_out_wr_cmd(clp, rep, false); /* releases out_mutex */
+                sg_out_wr_cmd(clp, rep, def_arr, false); /* release out_mtx */
         } else if (FT_DEV_NULL == clp->out_type) {
             /* skip actual write operation */
             clp->out_rem_count -= blocks;
@@ -1047,7 +1137,8 @@
             pthread_cleanup_push(cleanup_out, (void *)clp);
             status = pthread_mutex_lock(&clp->out2_mutex);
             if (0 != status) err_exit(status, "lock out2_mutex");
-            sg_out_wr_cmd(clp, rep, true); /* releases out2_mutex mid oper */
+            /* releases out2_mutex mid operation */
+            sg_out_wr_cmd(clp, rep, def_arr, true);
 
             pthread_cleanup_pop(0);
         }
@@ -1055,7 +1146,15 @@
         pthread_cond_broadcast(&clp->out_sync_cv);
         if (stop_after_write)
             break;
-    } /* end of while loop which copies segments */
+    }	/* ^^^^^^^^^^ end of main while loop which copies segments ^^^^^^ */
+#if 0
+    if ((rep->nmrqs > 0) && (def_arr.first.size() > 0)) {
+        if (rep->debug)
+            pr2serr_lk("thread=%d: tail-end, to_do=%u\n", rep->id,
+                       (uint32_t)def_arr.first.size());
+        sgh_do_def(rep, def_arr);
+    }
+#endif
 
     status = pthread_mutex_lock(&clp->in_mutex);
     if (0 != status) err_exit(status, "lock in_mutex");
@@ -1083,6 +1182,9 @@
     if (own_out2fd && (rep->out2fd >= 0))
         close(rep->out2fd);
     pthread_cond_broadcast(&clp->out_sync_cv);
+    if (rep->num_blks > 0)
+        pr2serr("%d <-- thread exiting with rep->num_blks=%d\n", rep->id,
+                rep->num_blks);
     return stop_after_write ? NULL : clp;
 }
 
@@ -1254,13 +1356,13 @@
 
 /* Enters this function holding in_mutex */
 static void
-sg_in_rd_cmd(Gbl_coll * clp, Rq_elem * rep)
+sg_in_rd_cmd(Gbl_coll * clp, Rq_elem * rep, mrq_arr_t & def_arr)
 {
     int res;
     int status;
 
     while (1) {
-        res = sg_start_io(rep, false);
+        res = sg_start_io(rep, def_arr, false);
         if (1 == res)
             err_exit(ENOMEM, "sg starting in command");
         else if (res < 0) {
@@ -1374,7 +1476,7 @@
 
 /* Enters this function holding out_mutex */
 static void
-sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, bool is_wr2)
+sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, mrq_arr_t & def_arr, bool is_wr2)
 {
     int res;
     int status;
@@ -1384,7 +1486,7 @@
         sg_wr_swap_share(rep, rep->out2fd, true);
 
     while (1) {
-        res = sg_start_io(rep, is_wr2);
+        res = sg_start_io(rep, def_arr, is_wr2);
         if (1 == res)
             err_exit(ENOMEM, "sg starting out command");
         else if (res < 0) {
@@ -1453,7 +1555,76 @@
 
 /* Returns 0 on success, 1 if ENOMEM error else -1 for other errors. */
 static int
-sg_start_io(Rq_elem * rep, bool is_wr2)
+sgh_do_def(Rq_elem * rep, mrq_arr_t & def_arr)
+{
+    int n, k, res, fd;
+    struct sg_io_v4 * a_v4p;
+    struct sg_io_v4 ctl_v4;
+
+    memset(&ctl_v4, 0, sizeof(ctl_v4));
+    ctl_v4.guard = 'Q';
+    a_v4p = def_arr.first.data();
+    n = def_arr.first.size();
+    for (k = 0; k < n; ++k) {
+        struct sg_io_v4 * h4p = a_v4p + k;
+        uint8_t *cmdp = &def_arr.second[k].front();
+
+        h4p->request = (uint64_t)cmdp;
+        if (rep->debug > 3) {
+            pr2serr_lk("def_arr[%d]:\n", k);
+            hex2stderr_lk((const uint8_t *)(a_v4p + k), sizeof(*a_v4p), 1);
+        }
+    }
+    if (rep->both_sg)
+        fd = rep->infd;         /* assume share to rep->outfd */
+    else if (rep->infd >= 0)
+        fd = rep->infd;
+    else
+        fd = rep->outfd;
+    res = 0;
+    ctl_v4.flags = SGV4_FLAG_MULTIPLE_REQS | SGV4_FLAG_STOP_IF;
+    ctl_v4.din_xferp = (uint64_t)a_v4p;
+    ctl_v4.din_xfer_len = n * sizeof(*a_v4p);
+    ctl_v4.dout_xferp = (uint64_t)a_v4p;
+    ctl_v4.dout_xfer_len = n * sizeof(*a_v4p);
+    if (rep->debug > 2) {
+        pr2serr_lk("%s: Controlling object _before_ ioctl(SG_IO):\n",
+                   __func__);
+        if (rep->debug > 3)
+            hex2stderr_lk((const uint8_t *)&ctl_v4, sizeof(ctl_v4), 1);
+        v4hdr_out_lk("Controlling object before:", &ctl_v4);
+    }
+    res = ioctl(fd, SG_IO, &ctl_v4); // MULTIPLE_REQS | STOP_IF
+    if (res < 0) {
+        pr2serr_lk("%s: ioctl(SG_IO, MULTIPLE_REQS)-->%d, errno=%d: %s\n",
+                   __func__, res, errno, strerror(errno));
+        def_arr.first.clear();
+        def_arr.second.clear();
+        return -1;
+    }
+    if (rep->debug > 2) {
+        pr2serr_lk("%s: Controlling object output by ioctl(SG_IO):\n",
+                   __func__);
+        if (rep->debug > 3)
+            hex2stderr_lk((const uint8_t *)&ctl_v4, sizeof(ctl_v4), 1);
+        v4hdr_out_lk("Controlling object after:", &ctl_v4);
+        if (rep->debug > 3) {
+            for (k = 0; k < n; ++k) {
+                pr2serr_lk("AFTER: def_arr[%d]:\n", k);
+                v4hdr_out_lk(NULL, (a_v4p + k));
+                // hex2stderr_lk((const uint8_t *)(a_v4p + k), sizeof(*a_v4p),
+                                  // 1);
+            }
+        }
+    }
+    def_arr.first.clear();
+    def_arr.second.clear();
+    return res;
+}
+
+/* Returns 0 on success, 1 if ENOMEM error else -1 for other errors. */
+static int
+sg_start_io(Rq_elem * rep, mrq_arr_t & def_arr, bool is_wr2)
 {
     bool wr = rep->wr;
     bool fua = wr ? rep->out_flags.fua : rep->in_flags.fua;
@@ -1560,6 +1731,19 @@
     h4p->usr_ptr = (uint64_t)rep;
     h4p->request_extra = rep->rq_id;    /* this is the pack_id */
     h4p->flags = flags;
+    if (rep->nmrqs > 0) {
+        if (rep->both_sg && (rep->outfd == fd))
+            h4p->flags |= SGV4_FLAG_DO_ON_OTHER;
+        cmd_at cmd_obj;
+        uint8_t * cmdp = &(cmd_obj[0]);
+        memcpy(cmdp, rep->cmd, cdbsz);
+        def_arr.first.push_back(*h4p);
+        def_arr.second.push_back(cmd_obj);
+        res = 0;
+        if ((int)def_arr.first.size() >= rep->nmrqs)
+            res = sgh_do_def(rep, def_arr);
+        return res;
+    }
     while (((res = ioctl(fd, SG_IOSUBMIT, h4p)) < 0) &&
            ((EINTR == errno) || (EAGAIN == errno)))
         sched_yield();  /* another thread may be able to progress */
@@ -1587,7 +1771,7 @@
                 if (res < 0)
                     pr2serr_lk("%s: ioctl(SG_IOABORT) failed: %s [%d]\n",
                                __func__, safe_strerror(errno), errno);
-                else if (rep->debug > 3)
+                else if (rep->debug > 1)
                     pr2serr_lk("%s: sending ioctl(SG_IOABORT) on rq_id=%d\n",
                                __func__, rep->rq_id);
             }   /* else got response, too late for timeout, so skip */
@@ -1676,6 +1860,8 @@
     return 0;
 
 do_v4:
+    if (rep->nmrqs > 0)
+        return 0;
     h4p = &rep->io_hdr4;
     while (((res = ioctl(fd, SG_IORECEIVE, h4p)) < 0) &&
            ((EINTR == errno) || (EAGAIN == errno)))
@@ -1710,10 +1896,9 @@
                      rep->rq_id, blk);
             lk_chk_n_print4(ebuff, h4p, false);
             if ((rep->debug > 4) && h4p->info)
-                pr2serr_lk(" info=0x%x sg_info_check=%d another_waiting=%d "
-                           "direct=%d detaching=%d aborted=%d\n", h4p->info,
+                pr2serr_lk(" info=0x%x sg_info_check=%d direct=%d "
+                           "detaching=%d aborted=%d\n", h4p->info,
                            !!(h4p->info & SG_INFO_CHECK),
-                           !!(h4p->info & SG_INFO_ANOTHER_WAITING),
                            !!(h4p->info & SG_INFO_DIRECT_IO),
                            !!(h4p->info & SG_INFO_DEVICE_DETACHING),
                            !!(h4p->info & SG_INFO_ABORTED));
@@ -1733,10 +1918,9 @@
         pr2serr_lk("%s: tid,rq_id=%d,%d: completed %s\n", __func__, rep->id,
                    rep->rq_id, cp);
         if ((rep->debug > 4) && h4p->info)
-            pr2serr_lk(" info=0x%x sg_info_check=%d another_waiting=%d "
-                       "direct=%d detaching=%d aborted=%d\n", h4p->info,
+            pr2serr_lk(" info=0x%x sg_info_check=%d direct=%d "
+                       "detaching=%d aborted=%d\n", h4p->info,
                        !!(h4p->info & SG_INFO_CHECK),
-                       !!(h4p->info & SG_INFO_ANOTHER_WAITING),
                        !!(h4p->info & SG_INFO_DIRECT_IO),
                        !!(h4p->info & SG_INFO_DEVICE_DETACHING),
                        !!(h4p->info & SG_INFO_ABORTED));
@@ -1746,14 +1930,14 @@
 
 /* Enter holding in_mutex, exits holding nothing */
 static void
-sg_in_out_interleave(Gbl_coll *clp, Rq_elem * rep)
+sg_in_out_interleave(Gbl_coll *clp, Rq_elem * rep, mrq_arr_t & def_arr)
 {
     int res, pid_read, pid_write;
     int status;
 
     while (1) {
         /* start READ */
-        res = sg_start_io(rep, false);
+        res = sg_start_io(rep, def_arr, false);
         pid_read = rep->rq_id;
         if (1 == res)
             err_exit(ENOMEM, "sg interleave starting in command");
@@ -1768,7 +1952,7 @@
 
         /* start WRITE */
         rep->wr = true;
-        res = sg_start_io(rep, false);
+        res = sg_start_io(rep, def_arr, false);
         pid_write = rep->rq_id;
         if (1 == res)
             err_exit(ENOMEM, "sg interleave starting out command");
@@ -1786,7 +1970,7 @@
 
 #ifdef SGH_DD_READ_COMPLET_AFTER
 #warning "SGH_DD_READ_COMPLET_AFTER is set (testing)"
-	goto write_complet;
+        goto write_complet;
 read_complet:
 #endif
 
@@ -1847,7 +2031,7 @@
 
 
 #ifdef SGH_DD_READ_COMPLET_AFTER
-	return;
+        return;
 
 write_complet:
 #endif
@@ -1892,7 +2076,7 @@
             if (0 != status) err_exit(status, "unlock out_mutex");
 
 #ifdef SGH_DD_READ_COMPLET_AFTER
-	goto read_complet;
+        goto read_complet;
 #endif
             return;
         default:
@@ -2011,9 +2195,10 @@
             fp->swait = true;
         else if (0 == strcmp(cp, "v3"))
             fp->v3 = true;
-        else if (0 == strcmp(cp, "v4"))
+        else if (0 == strcmp(cp, "v4")) {
             fp->v4 = true;
-        else {
+            fp->v4_given = true;
+        } else {
             pr2serr("unrecognised flag: %s\n", cp);
             return false;
         }
@@ -2144,10 +2329,16 @@
     clp->out2_type = FT_DEV_NULL;
     clp->cdbsz_in = DEF_SCSI_CDBSZ;
     clp->cdbsz_out = DEF_SCSI_CDBSZ;
+    clp->nmrqs = DEF_NUM_MRQS;
     inf[0] = '\0';
     outf[0] = '\0';
     out2f[0] = '\0';
     outregf[0] = '\0';
+    fetch_sg_version();
+    if (sg_version > 40000) {
+        clp->in_flags.v4 = true;
+        clp->out_flags.v4 = true;
+    }
 
     for (k = 1; k < argc; k++) {
         if (argv[k]) {
@@ -2232,6 +2423,13 @@
                 pr2serr("%sbad argument to 'iflag='\n", my_name);
                 return SG_LIB_SYNTAX_ERROR;
             }
+        } else if (0 == strcmp(key, "mrq")) {
+            clp->nmrqs = sg_get_num(buf);
+            if ((-1 == clp->nmrqs) || (1 == (clp->nmrqs % 2))) {
+                pr2serr("%sbad argument to 'mrq=', want even number or "
+                        "zero\n", my_name);
+                return SG_LIB_SYNTAX_ERROR;
+            }
         } else if (0 == strcmp(key, "obs")) {
             obs = sg_get_num(buf);
             if (-1 == obs) {
@@ -2451,7 +2649,7 @@
             }
         }
         clp->infp = inf;
-        if ((clp->in_flags.v3 || clp->in_flags.v4) &&
+        if ((clp->in_flags.v3 || clp->in_flags.v4_given) &&
             (FT_SG != clp->in_type)) {
             clp->in_flags.v3 = false;
             clp->in_flags.v4 = false;
@@ -2518,7 +2716,7 @@
             }
         }
         clp->outfp = outf;
-        if ((clp->out_flags.v3 || clp->out_flags.v4) &&
+        if ((clp->out_flags.v3 || clp->out_flags.v4_given) &&
             (FT_SG != clp->out_type)) {
             clp->out_flags.v3 = false;
             clp->out_flags.v4 = false;
@@ -2588,16 +2786,16 @@
         clp->out2fp = out2f;
     }
     if ((FT_SG == clp->in_type ) && (FT_SG == clp->out_type)) {
-        if (clp->in_flags.v4 && (! clp->out_flags.v3)) {
-            if (! clp->out_flags.v4) {
+        if (clp->in_flags.v4_given && (! clp->out_flags.v3)) {
+            if (! clp->out_flags.v4_given) {
                 clp->out_flags.v4 = true;
                 if (clp->debug)
                     pr2serr("Changing OFILE from v3 to v4, use oflag=v3 to "
                             "force v3\n");
             }
         }
-        if (clp->out_flags.v4 && (! clp->in_flags.v3)) {
-            if (! clp->in_flags.v4) {
+        if (clp->out_flags.v4_given && (! clp->in_flags.v3)) {
+            if (! clp->in_flags.v4_given) {
                 clp->in_flags.v4 = true;
                 if (clp->debug)
                     pr2serr("Changing IFILE from v3 to v4, use iflag=v3 to "
@@ -2809,7 +3007,8 @@
             status = pthread_join(tip->a_pthr, &vp);
             if (0 != status) err_exit(status, "pthread_join");
             if (clp->debug > 0)
-                pr2serr_lk("Worker thread k=%d terminated\n", k);
+                pr2serr_lk("%d <-- Worker thread terminated, vp=%s\n", k,
+                           ((vp == clp) ? "clp" : "NULL (or !clp)"));
         }
     }   /* started worker threads and here after they have all exited */
 
diff --git a/testing/sgs_dd.c b/testing/sgs_dd.c
index 6642ac1..624c41b 100644
--- a/testing/sgs_dd.c
+++ b/testing/sgs_dd.c
@@ -1,7 +1,7 @@
 /*
  * Test code for the extensions to the Linux OS SCSI generic ("sg")
  * device driver.
- * Copyright (C) 1999-2018 D. Gilbert and P. Allworth
+ * Copyright (C) 1999-2019 D. Gilbert and P. Allworth
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -78,7 +78,7 @@
 #include "sg_unaligned.h"
 
 
-static const char * version_str = "4.03 20190105";
+static const char * version_str = "4.04 20190324";
 static const char * my_name = "sgs_dd";
 
 #define DEF_BLOCK_SIZE 512
@@ -89,9 +89,9 @@
 #define SENSE_BUFF_LEN 32       /* Arbitrary, could be larger */
 #define DEF_TIMEOUT 40000       /* 40,000 millisecs == 40 seconds */
 #define S_RW_LEN 10             /* Use SCSI READ(10) and WRITE(10) */
-#define SGQ_MAX_RD_AHEAD 4
-#define SGQ_MAX_WR_AHEAD 4
-#define SGQ_NUM_ELEMS (SGQ_MAX_RD_AHEAD+ SGQ_MAX_WR_AHEAD + 1)
+#define SGQ_MAX_RD_AHEAD 32
+#define SGQ_MAX_WR_AHEAD 32
+#define SGQ_NUM_ELEMS (SGQ_MAX_RD_AHEAD + SGQ_MAX_WR_AHEAD + 1)
 
 #define SGQ_FREE 0
 #define SGQ_IO_STARTED 1
diff --git a/testing/uapi_sg.h b/testing/uapi_sg.h
index f109690..635bb58 100644
--- a/testing/uapi_sg.h
+++ b/testing/uapi_sg.h
@@ -14,7 +14,7 @@
  * Later extensions (versions 2, 3 and 4) to driver:
  *   Copyright (C) 1998 - 2018 Douglas Gilbert
  *
- * Version 4.0.06 (20190210)
+ * Version 4.0.07 (20190320)
  *  This version is for Linux 2.6, 3, 4 and 5 series kernels.
  *
  * Documentation
@@ -90,8 +90,7 @@
 /* following flag values can be OR-ed together in v3::flags or v4::flags */
 #define SG_FLAG_DIRECT_IO 1	/* default is indirect IO */
 /* SG_FLAG_UNUSED_LUN_INHIBIT is ignored in sg v4 driver */
-#define SG_FLAG_UNUSED_LUN_INHIBIT 2	/* default is overwrite lun in SCSI */
-				/* command block (when <= SCSI_2) */
+#define SG_FLAG_UNUSED_LUN_INHIBIT 2  /* ignored, was LUN overwrite in cdb */
 #define SG_FLAG_MMAP_IO 4	/* request memory mapped IO */
 /* no transfer of kernel buffers to/from user space; used for sharing */
 #define SG_FLAG_NO_DXFER 0x10000
@@ -110,12 +109,15 @@
 #define SGV4_FLAG_YIELD_TAG 0x8  /* sg_io_v4::request_tag set after SG_IOS */
 #define SGV4_FLAG_Q_AT_TAIL SG_FLAG_Q_AT_TAIL
 #define SGV4_FLAG_Q_AT_HEAD SG_FLAG_Q_AT_HEAD
-/* Flag values 0x100 and 0x200 not currently used */
+#define SGV4_FLAG_COMPLETE_B4  0x100
+#define SGV4_FLAG_SIG_ON_OTHER  0x200
 #define SGV4_FLAG_IMMED 0x400 /* for polling with SG_IOR, ignored in SG_IOS */
-/* Flag value 0x800 not currently used */
+#define SGV4_FLAG_STOP_IF 0x800    /* Stops sync mrq if error or warning */
 #define SGV4_FLAG_DEV_SCOPE 0x1000 /* permit SG_IOABORT to have wider scope */
 #define SGV4_FLAG_SHARE 0x2000	/* share IO buffer; needs SG_SEIM_SHARE_FD */
+#define SGV4_FLAG_DO_ON_OTHER 0x4000 /* available on either of shared pair */
 #define SGV4_FLAG_NO_DXFER SG_FLAG_NO_DXFER	/* needed for sharing */
+#define SGV4_FLAG_MULTIPLE_REQS 0x20000	/* n sg_io_v4s in data-in */
 
 /* Output (potentially OR-ed together) in v3::info or v4::info field */
 #define SG_INFO_OK_MASK 0x1
@@ -127,8 +129,8 @@
 #define SG_INFO_DIRECT_IO 0x2	/* direct IO requested and performed */
 #define SG_INFO_MIXED_IO 0x4	/* not used, always 0 */
 #define SG_INFO_DEVICE_DETACHING 0x8	/* completed successfully but ... */
-#define SG_INFO_ANOTHER_WAITING 0x10	/* needs SG_CTL_FLAGM_CHECK_FOR_MORE */
-#define SG_INFO_ABORTED 0x20	/* this command has been aborted */
+#define SG_INFO_ABORTED 0x10	/* this command has been aborted */
+#define SG_INFO_MRQ_FINI 0x20	/* marks multi-reqs that have finished */
 
 /*
  * Pointer to object of this structure filled by ioctl(SG_GET_SCSI_ID). Last
@@ -196,8 +198,7 @@
 /* rd> 1: master finished 0: not; wr> 1: finish share post master */
 #define SG_CTL_FLAGM_MASTER_FINI 0x100	/* wr> 0: setup for repeat slave req */
 #define SG_CTL_FLAGM_MASTER_ERR	0x200	/* rd: sharing, master got error */
-#define SG_CTL_FLAGM_CHECK_FOR_MORE 0x400 /* additional ready to read? */
-#define SG_CTL_FLAGM_ALL_BITS	0x7ff	/* should be OR of previous items */
+#define SG_CTL_FLAGM_ALL_BITS	0x3ff	/* should be OR of previous items */
 
 /* Write one of the following values to sg_extended_info::read_value, get... */
 #define SG_SEIRV_INT_MASK	0x0	/* get SG_SEIM_ALL_BITS */
@@ -207,6 +208,7 @@
 #define SG_SEIRV_DEV_FL_RQS	0x4	/* sum(fl rqs) on all of dev's fds */
 #define SG_SEIRV_TRC_SZ		0x5	/* current size of trace buffer */
 #define SG_SEIRV_TRC_MAX_SZ	0x6	/* maximum size of trace buffer */
+#define SG_SEIRV_SUBMITTED	0x7	/* number of mrqs submitted+unread */
 
 /*
  * A pointer to the following structure is passed as the third argument to