rename: sgs_dd-->sgh_dd; retrieve sgs_dd from archive, it has SIGIO and RT signals

git-svn-id: https://svn.bingwo.ca/repos/sg3_utils/trunk@802 6180dd3e-e324-4e3e-922d-17de1ae2f315
diff --git a/ChangeLog b/ChangeLog
index 258b295..96f785c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,7 +2,7 @@
 some description at the top of its ".c" file. All utilities in the main
 directory have their own "man" pages. There is also a sg3_utils man page.
 
-Changelog for sg3_utils-1.45 [20181222] [svn: r801]
+Changelog for sg3_utils-1.45 [20181224] [svn: r802]
   - sg_ses: bug: --page= being overridden when --control
     and --data= also given; fix
   - sg_opcodes: expand MLU (18-102r0)
@@ -13,10 +13,11 @@
   - sg_scan (win32): expand limits for big arrays
   - rescan-scsi-bus: widen LUN 0 only scanning
   - testing/sg_tst_async: fix free_list issue
-  - testing/sg_tst_ioctl: for sg 3.9 driver
-  - testing/sgs_dd: for share in sg 3.9.02 driver
+  - testing/sg_tst_ioctl: for sg 4.0 driver
+  - testing/sgs_dd: for share in sg 4.0 driver
   - testing/sgh_dd: rename of sgs_dd to avoid ...
-  - testing/sgs_dd: from archive, for testing SIGPOLL
+  - testing/sgs_dd: back from archive, for testing
+    SIGPOLL (SIGIO) and realtime (RT) signals
   - sg_io_linux (sg_lib): add sg_linux_sense_print()
   - sg_pt_linux: uses sg v4 interface if sg driver
     >= 4.0.0 . Force sg v3 always by building with
diff --git a/debian/changelog b/debian/changelog
index 242f640..b70a85d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,7 +2,7 @@
 
   * New upstream version
 
- -- Douglas Gilbert <[email protected]>  Wed, 12 Dec 2018 22:00:00 -0500
+ -- Douglas Gilbert <[email protected]>  Sun, 23 Dec 2018 10:00:00 -0500
 
 sg3-utils (1.44-0.1) unstable; urgency=low
 
diff --git a/sg3_utils.spec b/sg3_utils.spec
index 7f11f8c..fa53967 100644
--- a/sg3_utils.spec
+++ b/sg3_utils.spec
@@ -84,7 +84,7 @@
 %{_libdir}/*.la
 
 %changelog
-* Wed Dec 12 2018 - dgilbert at interlog dot com
+* Sun Dec 23 2018 - dgilbert at interlog dot com
 - track t10 changes
   * sg3_utils-1.45
 
diff --git a/testing/sg_tst_ioctl.c b/testing/sg_tst_ioctl.c
index d0d7d15..180db0a 100644
--- a/testing/sg_tst_ioctl.c
+++ b/testing/sg_tst_ioctl.c
@@ -429,7 +429,7 @@
         else
             printf("%s: sock_fd_read() returned: %d, fd_ma=%d\n", __func__,
                    res, fd_ma);
-	/* yes it works! */
+        /* yes it works! */
     }
     return 0;
 }
diff --git a/testing/sgh_dd.c b/testing/sgh_dd.c
index 33994a4..f84bac5 100644
--- a/testing/sgh_dd.c
+++ b/testing/sgh_dd.c
@@ -91,7 +91,7 @@
 #include "sg_pr2serr.h"
 
 
-static const char * version_str = "1.08 20181221";
+static const char * version_str = "1.09 20181224";
 
 #ifdef __GNUC__
 #pragma GCC diagnostic ignored "-Wclobbered"
@@ -132,7 +132,6 @@
 #define EBUFF_SZ 768
 
 struct flags_t {
-    bool fds2;          /* flag is called '2fds' */
     bool append;
     bool coe;
     bool defres;        /* without this res_sz==bs*bpt */
@@ -145,6 +144,7 @@
     bool mmap;
     bool noshare;
     bool noxfer;
+    bool same_fds;
     bool v3;
     bool v4;
 };
@@ -179,15 +179,20 @@
     pthread_mutex_t out_mutex;        /*  | */
     pthread_cond_t out_sync_cv;       /*  | hold writes until "in order" */
     pthread_cond_t hold_1st_cv;       /* -/ wait for 1st thread to do 1 seg */
+    pthread_mutex_t out2_mutex;
     int bs;
     int bpt;
+    int outregfd;
+    int outreg_type;
     int dio_incomplete_count;   /* -\ */
     int sum_of_resids;          /* -/ */
     int debug;          /* both -v and deb=VERB bump this field */
     int dry_run;
     bool ofile_given;
+    bool ofile2_given;
     const char * infp;
     const char * outfp;
+    const char * out2fp;
 } Gbl_coll;
 
 typedef struct request_element
@@ -198,6 +203,7 @@
     int infd;
     int outfd;
     int out2fd;
+    int outregfd;
     int64_t blk;
     int num_blks;
     uint8_t * buffp;
@@ -230,11 +236,11 @@
 static const char * proc_allow_dio = "/proc/scsi/sg/allow_dio";
 
 static void sg_in_rd_cmd(Gbl_coll * clp, Rq_elem * rep);
-static void sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep);
+static void sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, bool is_wr2);
 static bool normal_in_rd(Gbl_coll * clp, Rq_elem * rep, int blocks);
 static void normal_out_wr(Gbl_coll * clp, Rq_elem * rep, int blocks);
-static int sg_start_io(Rq_elem * rep);
-static int sg_finish_io(bool wr, Rq_elem * rep);
+static int sg_start_io(Rq_elem * rep, bool is_wr2);
+static int sg_finish_io(bool wr, Rq_elem * rep, bool is_wr2);
 static int sg_in_open(Gbl_coll *clp, const char *inf, uint8_t **mmpp,
                       int *mmap_len);
 static int sg_out_open(Gbl_coll *clp, const char *outf, uint8_t **mmpp,
@@ -486,34 +492,38 @@
     pr2serr("               [bpt=BPT] [cdbsz=6|10|12|16] [coe=0|1] "
             "[deb=VERB] [dio=0|1]\n"
             "               [elemsz_kb=ESK] [fua=0|1|2|3] [of2=OFILE2] "
-            "[sync=0|1]\n"
-            "               [thr=THR] [time=0|1] [verbose=VERB] [--dry-run] "
-            "[--verbose]\n\n"
+            "[ofreg=OFREG]\n"
+            "               [sync=0|1] [thr=THR] [time=0|1] [verbose=VERB] "
+            "[--dry-run]\n"
+            "                [--verbose]\n\n"
             "  where the main options (shown in first group above) are:\n"
             "    bs          must be device logical block size (default "
             "512)\n"
             "    count       number of blocks to copy (def: device size)\n"
             "    if          file or device to read from (def: stdin)\n"
-            "    iflag       comma separated list from: [2fds,coe,defres,dio,"
+            "    iflag       comma separated list from: [coe,defres,dio,"
             "direct,dpo,\n"
-            "                dsync,excl,fua,mmap,noshare,noxfer,null,v3,v4]\n"
-            "    of          file or device to write to (def: /dev/null which "
-            "is different\n"
-            "                from dd that defaults to stdout). If 'of=.' "
-            "assumes /dev/null\n"
-            "    oflag       comma separated list from: [2fds,append,coe,dio,"
+            "                dsync,excl,fua,mmap,noshare,noxfer,null,"
+            "same_fds,v3,v4]\n"
+            "    of          file or device to write to (def: /dev/null "
+            "N.B. different\n"
+            "                from dd it defaults to stdout). If 'of=.' "
+            "uses /dev/null\n"
+            "    of2         second file or device to write to (def: "
+            "/dev/null)\n"
+            "    oflag       comma separated list from: [append,coe,dio,"
             "direct,dpo,\n"
-            "                dsync,excl,fua,mmap,noshare,noxfer,null,v3,v4]\n"
+            "                dsync,excl,fua,mmap,noshare,noxfer,null,"
+            "same_fds,v3,v4]\n"
             "    seek        block position to start writing to OFILE\n"
             "    skip        block position to start reading from IFILE\n"
             "    --help|-h      output this usage message then exit\n"
             "    --version|-V   output version string then exit\n\n"
-            "Copy from IFILE to OFILE, similar to dd command. This utility "
-            "is specialized\nfor SCSI devices and uses multiple POSIX "
-            "threads. It expects one or both\nIFILE and OFILE to be sg "
-            "devices. It is Linux specific and uses the v4\nsg driver "
-            "'share' capbility if available (hence the 'sgs' part of its "
-            "name).\nUse '-hh' or '-hhh' for more information.\n"
+            "Copy IFILE to OFILE, similar to dd command. This utility is "
+            "specialized for\nSCSI devices and uses multiple POSIX threads. "
+            "It expects one or both IFILE\nand OFILE to be sg devices. It "
+            "is Linux specific and uses the v4 sg driver\n'share' capability "
+            "if available. Use '-hh' or '-hhh' for more information.\n"
            );
     return;
 page2:
@@ -531,13 +541,13 @@
             "of debug\n"
             "    dio         is direct IO, 1->attempt, 0->indirect IO (def)\n"
             "    elemsz_kb    scatter gather list element size in kilobytes "
-            "(def: 32 [KB])\n"
+            "(def: 32[KB])\n"
             "    fua         force unit access: 0->don't(def), 1->OFILE, "
             "2->IFILE,\n"
             "                3->OFILE+IFILE\n"
-            "    of2         OFILE2 is regular file or pipe to output what is "
-            "read from the\n"
-            "                disk in the first half of each shared element\n"
+            "    ofreg       OFREG is regular file or pipe to send what is "
+            "read from\n"
+            "                IFILE in the first half of each shared element\n"
             "    sync        0->no sync(def), 1->SYNCHRONIZE CACHE on OFILE "
             "after copy\n"
             "    thr         is number of threads, must be > 0, default 4, "
@@ -553,9 +563,6 @@
 page3:
     pr2serr("Syntax:  sgh_dd [operands] [options]\n\n"
             "  where: iflag=' and 'oflag=' arguments are listed below:\n"
-            "    2fds        only 2 file descriptors (1 each for IFILE and "
-            "OFILE) are\n"
-            "                used. Default: a pair of fd_s for each thread\n"
             "    append      append output to OFILE (assumes OFILE is "
             "regular file)\n"
             "    coe         continue of error (reading, fills with zeros)\n"
@@ -574,16 +581,21 @@
             "    noshare     if IFILE and OFILE are sg devices, don't set "
             "up sharing\n"
             "                (def: do)\n"
-            "    v3          use v3 sg interface which is the default (aslo "
+            "    same_fds    each thread use the same IFILE and OFILE(2) "
+            "file\n"
+            "                descriptors (def: each threads has own file "
+            "desciptors)\n"
+            "    v3          use v3 sg interface which is the default (also "
             "see v4)\n"
             "    v4          use v4 sg interface (def: v3 unless other side "
             "us v4)\n"
             "\n"
-            "If both are sg devices 'shared' mode is selected unless "
-            "'noshare' is given\nto 'iflag=' or 'oflag='. If 'of2=OFLAG2' "
-            "is given, the read-side is output\nto OFLAG2. Without 'of2=' "
-            "and with IFILE and OFILE sg devices the copy is\nvia a single "
-            "in-kernel buffer.\n"
+            "Copies IFILE to OFILE (and to OFILE2 if given). If IFILE and "
+            "OFILE are sg\ndevices 'shared' mode is selected unless "
+            "'noshare' is given to 'iflag=' or\n'oflag='. of2=OFILE2 uses "
+            "'oflag=FLAGS'. When sharing, the data stays in a\nsingle "
+            "in-kernel buffer which is copied (or mmap-ed) to the user "
+            "space\nif the 'ofreg=OFREG' is given.\n"
            );
 }
 
@@ -695,7 +707,7 @@
 }
 
 static bool
-sg_share_prepare(int slave_writer_fd, int master_reader_fd, int id, bool vb_b)
+sg_share_prepare(int slave_wr_fd, int master_rd_fd, int id, bool vb_b)
 {
     struct sg_extended_info sei;
     struct sg_extended_info * seip;
@@ -704,17 +716,16 @@
     memset(seip, 0, sizeof(*seip));
     seip->valid_wr_mask |= SG_SEIM_SHARE_FD;
     seip->valid_rd_mask |= SG_SEIM_SHARE_FD;
-    seip->share_fd = master_reader_fd;
-    if (ioctl(slave_writer_fd, SG_SET_GET_EXTENDED, seip) < 0) {
+    seip->share_fd = master_rd_fd;
+    if (ioctl(slave_wr_fd, SG_SET_GET_EXTENDED, seip) < 0) {
         pr2serr_lk("tid=%d: ioctl(EXTENDED(shared_fd=%d), failed "
-                   "errno=%d %s\n", id, master_reader_fd, errno,
+                   "errno=%d %s\n", id, master_rd_fd, errno,
                    strerror(errno));
         return false;
     }
     if (vb_b)
         pr2serr_lk("%s: tid=%d: ioctl(EXTENDED(shared_fd)) ok, master_fd=%d, "
-                   "slave_fd=%d\n", __func__, id, master_reader_fd,
-                   slave_writer_fd);
+                   "slave_fd=%d\n", __func__, id, master_rd_fd, slave_wr_fd);
     return true;
 }
 
@@ -754,8 +765,9 @@
     volatile bool stop_after_write = false;
     bool own_infd = false;
     bool own_outfd = false;
+    bool own_out2fd = false;
     bool is_first = true;
-    bool share_and_of2;
+    bool share_and_ofreg;
 
     tip = (Thread_info *)v_tip;
     clp = tip->gcp;
@@ -765,6 +777,8 @@
     memset(rep, 0, sizeof(Rq_elem));
     /* Following clp members are constant during lifetime of thread */
     rep->id = tip->id;
+    if (vb > 0)
+        pr2serr_lk("Starting worker thread %d\n", rep->id);
     if (! clp->in_flags.mmap) {
         rep->buffp = sg_memalign(sz, 0 /* page align */, &rep->alloc_bp,
                                  false);
@@ -776,12 +790,25 @@
     rep->infd = clp->infd;
     rep->outfd = clp->outfd;
     rep->out2fd = clp->out2fd;
+    rep->outregfd = clp->outregfd;
     rep->debug = clp->debug;
     rep->cdbsz_in = clp->cdbsz_in;
     rep->cdbsz_out = clp->cdbsz_out;
     rep->in_flags = clp->in_flags;
     rep->out_flags = clp->out_flags;
-    if (rep->in_flags.fds2 || rep->out_flags.fds2)
+
+    if (rep->id > 0) {
+        /* wait for a broadcast on hold_1st_cv other than tid=0 thread so
+         * tide=0 can complete one READ/WRITE cycle (segment), makes infant
+         * mortality errors easier to analyze and fix. */
+        pthread_cleanup_push(cleanup_out, (void *)clp);
+        status = pthread_cond_wait(&clp->hold_1st_cv, &clp->out_mutex);
+        if (0 != status) err_exit(status, "cond hold_1st_cv");
+        pthread_cleanup_pop(0);
+        status = pthread_mutex_unlock(&clp->out_mutex);
+        if (0 != status) err_exit(status, "unlock out_mutex");
+    }
+    if (rep->in_flags.same_fds || rep->out_flags.same_fds)
         ;       /* we are sharing a single pair of fd_s across all threads */
     else {
         int fd;
@@ -808,12 +835,28 @@
             if (vb > 2)
                 pr2serr_lk("thread=%d: opened local sg OFILE\n", rep->id);
         }
+        if ((FT_SG == clp->out2_type) && clp->out2fp) {
+            fd = sg_out_open(clp, clp->out2fp,
+                            (rep->out_flags.mmap ? &rep->buffp : NULL),
+                            (rep->out_flags.mmap ? &rep->mmap_len : NULL));
+            if (fd < 0)
+                goto fini;
+            rep->out2fd = fd;
+            own_out2fd = true;
+            if (vb > 2)
+                pr2serr_lk("thread=%d: opened local sg OFILE2\n", rep->id);
+        }
     }
     if (vb > 2) {
-        if (! own_infd)
-            pr2serr_lk("thread=%d: using global sg IFILE\n", rep->id);
-        if (! own_outfd)
-            pr2serr_lk("thread=%d: using global sg OFILE\n", rep->id);
+        if ((FT_SG == clp->in_type) && (! own_infd))
+            pr2serr_lk("thread=%d: using global sg IFILE, fd=%d\n", rep->id,
+                       rep->infd);
+        if ((FT_SG == clp-> out_type) && (! own_outfd))
+            pr2serr_lk("thread=%d: using global sg OFILE, fd=%d\n", rep->id,
+                       rep->outfd);
+        if ((FT_SG == clp->out2_type) && (! own_out2fd))
+            pr2serr_lk("thread=%d: using global sg OFILE2, fd=%d\n", rep->id,
+                       rep->out2fd);
     }
     if (rep->in_flags.noshare || rep->out_flags.noshare) {
         if (vb)
@@ -821,11 +864,11 @@
                        rep->id);
     } else if ((FT_SG == clp->in_type) && (FT_SG == clp->out_type))
         rep->has_share = sg_share_prepare(rep->outfd, rep->infd, rep->id,
-                                          rep->debug);
+                                          rep->debug > 9);
     if (vb > 0)
-        pr2serr_lk("starting thread %d, has_share=%s\n", rep->id,
+        pr2serr_lk("tid=%d, has_share=%s\n", rep->id,
                    (rep->has_share ? "true" : "false"));
-    share_and_of2 = (rep->has_share && (rep->out2fd >= 0));
+    share_and_ofreg = (rep->has_share && (rep->outregfd >= 0));
 
     /* vvvvvvvvvvvvvv  Main segment copy loop  vvvvvvvvvvvvvvvvvvvvvvv */
     while (1) {
@@ -860,11 +903,11 @@
         status = pthread_mutex_lock(&clp->out_mutex);
         if (0 != status) err_exit(status, "lock out_mutex");
 
-        /* Make sure the OFILE (+ OFILE2) are in same sequence as IFILE */
-        if ((rep->out2fd < 0) && (FT_SG == clp->in_type) &&
+        /* Make sure the OFILE (+ OFREG) are in same sequence as IFILE */
+        if ((rep->outregfd < 0) && (FT_SG == clp->in_type) &&
             (FT_SG == clp->out_type))
             goto skip_force_out_sequence;
-        if (share_and_of2 || (FT_DEV_NULL != clp->out_type)) {
+        if (share_and_ofreg || (FT_DEV_NULL != clp->out_type)) {
             while ((! clp->out_stop) &&
                    ((rep->blk + seek_skip) != clp->out_blk)) {
                 /* if write would be out of sequence then wait */
@@ -898,28 +941,41 @@
         }
 
         pthread_cleanup_push(cleanup_out, (void *)clp);
-        if (share_and_of2) {
-            res = write(rep->out2fd, rep->buffp, rep->bs * rep->num_blks);
+        if (rep->outregfd >= 0) {
+            res = write(rep->outregfd, rep->buffp, rep->bs * rep->num_blks);
             err = errno;
             if (res < 0)
-                pr2serr_lk("%s: tid=%d: write(out2fd) failed: %s\n", __func__,
-                           rep->id, strerror(err));
+                pr2serr_lk("%s: tid=%d: write(outregfd) failed: %s\n",
+                           __func__, rep->id, strerror(err));
+            else if (rep->debug > 9)
+                pr2serr_lk("%s: tid=%d: write(outregfd), fd=%d, num_blks=%d"
+                           "\n", __func__, rep->id, rep->outregfd,
+                           rep->num_blks);
         }
+        /* Output to OFILE */
         if (FT_SG == clp->out_type)
-            sg_out_wr_cmd(clp, rep); /* releases out_mutex mid oper */
+            sg_out_wr_cmd(clp, rep, false); /* releases out_mutex mid oper */
         else if (FT_DEV_NULL == clp->out_type) {
             /* skip actual write operation */
             clp->out_rem_count -= blocks;
             status = pthread_mutex_unlock(&clp->out_mutex);
             if (0 != status) err_exit(status, "unlock out_mutex");
-        }
-        else {
+        } else {
             normal_out_wr(clp, rep, blocks);
             status = pthread_mutex_unlock(&clp->out_mutex);
             if (0 != status) err_exit(status, "unlock out_mutex");
         }
         pthread_cleanup_pop(0);
 
+        /* Output to OFILE2 if sg device */
+        if ((clp->out2fd >= 0) && (FT_SG == clp->out2_type)) {
+            pthread_cleanup_push(cleanup_out, (void *)clp);
+            status = pthread_mutex_lock(&clp->out2_mutex);
+            if (0 != status) err_exit(status, "lock out2_mutex");
+            sg_out_wr_cmd(clp, rep, true); /* releases out2_mutex mid oper */
+
+            pthread_cleanup_pop(0);
+        }
         // if ((! rep->has_share) && (FT_DEV_NULL != clp->out_type))
             pthread_cond_broadcast(&clp->out_sync_cv);
         if ((0 == rep->id) && is_first) {
@@ -952,6 +1008,8 @@
         close(rep->infd);
     if (own_outfd)
         close(rep->outfd);
+    if (own_out2fd)
+        close(rep->out2fd);
     pthread_cond_broadcast(&clp->out_sync_cv);
     if ((0 == rep->id) && is_first) {
         is_first = false;   /* allow rest of threads to start */
@@ -970,7 +1028,7 @@
     /* enters holding in_mutex */
     while (((res = read(clp->infd, rep->buffp, blocks * clp->bs)) < 0) &&
            ((EINTR == errno) || (EAGAIN == errno)))
-        ;
+        sched_yield();  /* another thread may be able to progress */
     if (res < 0) {
         if (clp->in_flags.coe) {
             memset(rep->buffp, 0, rep->num_blks * rep->bs);
@@ -1016,7 +1074,7 @@
     /* enters holding out_mutex */
     while (((res = write(clp->outfd, rep->buffp, rep->num_blks * clp->bs))
             < 0) && ((EINTR == errno) || (EAGAIN == errno)))
-        ;
+        sched_yield();  /* another thread may be able to progress */
     if (res < 0) {
         if (clp->out_flags.coe) {
             pr2serr_lk("tid=%d: >> ignored error for out blk=%" PRId64
@@ -1122,7 +1180,7 @@
     int status;
 
     while (1) {
-        res = sg_start_io(rep);
+        res = sg_start_io(rep, false);
         if (1 == res)
             err_exit(ENOMEM, "sg starting in command");
         else if (res < 0) {
@@ -1137,7 +1195,7 @@
         status = pthread_mutex_unlock(&clp->in_mutex);
         if (0 != status) err_exit(status, "unlock in_mutex");
 
-        res = sg_finish_io(rep->wr, rep);
+        res = sg_finish_io(rep->wr, rep, false);
         switch (res) {
         case SG_LIB_CAT_ABORTED_COMMAND:
         case SG_LIB_CAT_UNIT_ATTENTION:
@@ -1188,37 +1246,87 @@
     }           /* end of while (1) loop */
 }
 
+static bool
+sg_wr_swap_share(Rq_elem * rep, int to_fd, bool before)
+{
+    bool not_first = false;
+    int err = 0;
+    int master_fd = rep->infd;  /* in (READ) side is master */
+    struct sg_extended_info sei;
+    struct sg_extended_info * seip;
+
+    seip = &sei;
+    memset(seip, 0, sizeof(*seip));
+    seip->valid_wr_mask |= SG_SEIM_CHG_SHARE_FD;
+    seip->valid_rd_mask |= SG_SEIM_CHG_SHARE_FD;
+    seip->share_fd = to_fd;
+    if (before) {
+        /* clear MASTER_FINI bit to put master in SG_RQ_SHR_SWAP state */
+        seip->valid_wr_mask |= SG_SEIM_CTL_FLAGS;
+        seip->valid_rd_mask |= SG_SEIM_CTL_FLAGS;
+        seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_MASTER_FINI;
+        seip->ctl_flags &= SG_CTL_FLAGM_MASTER_FINI;/* would be 0 anyway */
+    }
+    while ((ioctl(master_fd, SG_SET_GET_EXTENDED, seip) < 0) &&
+           (EBUSY == errno)) {
+        err = errno;
+        if (! not_first) {
+            if (rep->debug > 9)
+                pr2serr_lk("tid=%d: ioctl(EXTENDED(change_shared_fd=%d), "
+                           "failed errno=%d %s\n", rep->id, master_fd, err,
+                           strerror(err));
+            not_first = true;
+        }
+        err = 0;
+        sched_yield();  /* another thread may be able to progress */
+    }
+    if (err) {
+        pr2serr_lk("tid=%d: ioctl(EXTENDED(change_shared_fd=%d), failed "
+                   "errno=%d %s\n", rep->id, master_fd, err, strerror(err));
+        return false;
+    }
+    if (rep->debug > 15)
+        pr2serr_lk("%s: tid=%d: ioctl(EXTENDED(change_shared_fd)) ok, "
+                   "master_fd=%d, to_slave_fd=%d\n", __func__, rep->id,
+                   master_fd, to_fd);
+    return true;
+}
+
 /* Enters this function holding out_mutex */
 static void
-sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep)
+sg_out_wr_cmd(Gbl_coll * clp, Rq_elem * rep, bool is_wr2)
 {
     int res;
     int status;
+    pthread_mutex_t * mutexp = is_wr2 ? &clp->out2_mutex : &clp->out_mutex;
+
+    if (rep->has_share && is_wr2)
+        sg_wr_swap_share(rep, rep->out2fd, true);
 
     while (1) {
-        res = sg_start_io(rep);
+        res = sg_start_io(rep, is_wr2);
         if (1 == res)
             err_exit(ENOMEM, "sg starting out command");
         else if (res < 0) {
             pr2serr_lk("%soutputting from sg failed, blk=%" PRId64 "\n",
                        my_name, rep->blk);
-            status = pthread_mutex_unlock(&clp->out_mutex);
+            status = pthread_mutex_unlock(mutexp);
             if (0 != status) err_exit(status, "unlock out_mutex");
             guarded_stop_both(clp);
-            return;
+            goto fini;
         }
         /* Now release in mutex to let other reads run in parallel */
-        status = pthread_mutex_unlock(&clp->out_mutex);
+        status = pthread_mutex_unlock(mutexp);
         if (0 != status) err_exit(status, "unlock out_mutex");
 
-        res = sg_finish_io(rep->wr, rep);
+        res = sg_finish_io(rep->wr, rep, is_wr2);
         switch (res) {
         case SG_LIB_CAT_ABORTED_COMMAND:
         case SG_LIB_CAT_UNIT_ATTENTION:
             /* try again with same addr, count info */
             /* now re-acquire out mutex for balance */
             /* N.B. This re-write could now be out of write sequence */
-            status = pthread_mutex_lock(&clp->out_mutex);
+            status = pthread_mutex_lock(mutexp);
             if (0 != status) err_exit(status, "lock out_mutex");
             break;      /* loops around */
         case SG_LIB_CAT_MEDIUM_HARD:
@@ -1227,7 +1335,7 @@
                 if (exit_status <= 0)
                     exit_status = res;
                 guarded_stop_both(clp);
-                return;
+                goto fini;
             } else
                 pr2serr_lk(">> ignored error for out blk=%" PRId64 " for %d "
                            "bytes\n", rep->blk, rep->num_blks * rep->bs);
@@ -1238,29 +1346,34 @@
 #endif
 #endif
         case 0:
-            status = pthread_mutex_lock(&clp->out_mutex);
-            if (0 != status) err_exit(status, "lock out_mutex");
-            if (rep->dio_incomplete_count || rep->resid) {
-                clp->dio_incomplete_count += rep->dio_incomplete_count;
-                clp->sum_of_resids += rep->resid;
+            if (! is_wr2) {
+                status = pthread_mutex_lock(mutexp);
+                if (0 != status) err_exit(status, "lock out_mutex");
+                if (rep->dio_incomplete_count || rep->resid) {
+                    clp->dio_incomplete_count += rep->dio_incomplete_count;
+                    clp->sum_of_resids += rep->resid;
+                }
+                clp->out_rem_count -= rep->num_blks;
+                status = pthread_mutex_unlock(mutexp);
+                if (0 != status) err_exit(status, "unlock out_mutex");
             }
-            clp->out_rem_count -= rep->num_blks;
-            status = pthread_mutex_unlock(&clp->out_mutex);
-            if (0 != status) err_exit(status, "unlock out_mutex");
-            return;
+            goto fini;
         default:
             pr2serr_lk("error finishing sg out command (%d)\n", res);
             if (exit_status <= 0)
                 exit_status = res;
             guarded_stop_both(clp);
-            return;
+            goto fini;
         }
     }           /* end of while (1) loop */
+fini:
+    if (rep->has_share && is_wr2)
+        sg_wr_swap_share(rep, rep->outfd, false);
 }
 
 /* Returns 0 on success, 1 if ENOMEM error else -1 for other errors. */
 static int
-sg_start_io(Rq_elem * rep)
+sg_start_io(Rq_elem * rep, bool is_wr2)
 {
     bool wr = rep->wr;
     bool fua = wr ? rep->out_flags.fua : rep->in_flags.fua;
@@ -1271,20 +1384,28 @@
     bool v4 = wr ? rep->out_flags.v4 : rep->in_flags.v4;
     int cdbsz = wr ? rep->cdbsz_out : rep->cdbsz_in;
     int flags = 0;
-    int res, err;
+    int res, err, fd;
     struct sg_io_hdr * hp = &rep->io_hdr;
     struct sg_io_v4 * h4p = &rep->io_hdr4;
     const char * cp = "";
     const char * c2p = "";
     const char * c3p = "";
+    const char * crwp;
 
+    if (wr) {
+        fd = is_wr2 ? rep->out2fd : rep->outfd;
+        crwp = is_wr2 ? "writing2" : "writing";
+    } else {
+        fd = rep->infd;
+        crwp = "reading";
+    }
     if (sg_build_scsi_cdb(rep->cmd, cdbsz, rep->num_blks, rep->blk,
                           wr, fua, dpo)) {
         pr2serr_lk("%sbad cdb build, start_blk=%" PRId64 ", blocks=%d\n",
                    my_name, rep->blk, rep->num_blks);
         return -1;
     }
-    if (mmap && (rep->out2fd >= 0)) {
+    if (mmap && (rep->outregfd >= 0)) {
         flags |= SG_FLAG_MMAP_IO;
         c3p = " mmap";
     }
@@ -1296,7 +1417,7 @@
         flags |= SGV4_FLAG_SHARE;
         if (wr)
             flags |= SGV4_FLAG_NO_DXFER;
-        else if (rep->out2fd < 0)
+        else if (rep->outregfd < 0)
             flags |= SGV4_FLAG_NO_DXFER;
         if (flags & SGV4_FLAG_NO_DXFER)
             c2p = " and FLAG_NO_DXFER";
@@ -1306,8 +1427,8 @@
         cp = (wr ? " slave not sharing" : " master not sharing");
     if (rep->debug > 3) {
         pr2serr_lk("%s tid=%d: SCSI %s%s%s%s, blk=%" PRId64 " num_blks=%d\n",
-                   __func__, rep->id, (rep->wr ? "WRITE" : "READ"), cp,
-                   c2p, c3p, rep->blk, rep->num_blks);
+                   __func__, rep->id, crwp, cp, c2p, c3p, rep->blk,
+                   rep->num_blks);
         lk_print_command(rep->cmd);
     }
     if (v4)
@@ -1327,10 +1448,9 @@
     hp->pack_id = (int)rep->blk;
     hp->flags = flags;
 
-    while (((res = write(rep->wr ? rep->outfd : rep->infd, hp,
-                         sizeof(struct sg_io_hdr))) < 0) &&
+    while (((res = write(fd, hp, sizeof(struct sg_io_hdr))) < 0) &&
            ((EINTR == errno) || (EAGAIN == errno)))
-        ;
+        sched_yield();  /* another thread may be able to progress */
     err = errno;
     if (res < 0) {
         if (ENOMEM == err)
@@ -1358,10 +1478,9 @@
     h4p->usr_ptr = (uint64_t)rep;
     h4p->request_extra = rep->blk;/* N.B. blk --> pack_id --> request_extra */
     h4p->flags = flags;
-    while (((res = ioctl(rep->wr ? rep->outfd : rep->infd, SG_IOSUBMIT,
-                         h4p)) < 0) && ((EINTR == errno) ||
-                                         (EAGAIN == errno)))
-        ;
+    while (((res = ioctl(fd, SG_IOSUBMIT, h4p)) < 0) &&
+           ((EINTR == errno) || (EAGAIN == errno)))
+        sched_yield();  /* another thread may be able to progress */
     err = errno;
     if (res < 0) {
         if (ENOMEM == err)
@@ -1377,17 +1496,25 @@
    -> try again, SG_LIB_CAT_NOT_READY, SG_LIB_CAT_MEDIUM_HARD,
    -1 other errors */
 static int
-sg_finish_io(bool wr, Rq_elem * rep)
+sg_finish_io(bool wr, Rq_elem * rep, bool is_wr2)
 {
     bool v4 = wr ? rep->out_flags.v4 : rep->in_flags.v4;
-    int res;
+    int res, fd;
     struct sg_io_hdr io_hdr;
     struct sg_io_hdr * hp;
     struct sg_io_v4 * h4p;
+    const char *cp;
 #if 0
     static int testing = 0;     /* thread dubious! */
 #endif
 
+    if (wr) {
+        fd = is_wr2 ? rep->out2fd : rep->outfd;
+        cp = is_wr2 ? "writing2" : "writing";
+    } else {
+        fd = rep->infd;
+        cp = "reading";
+    }
     if (v4)
         goto do_v4;
     memset(&io_hdr, 0 , sizeof(struct sg_io_hdr));
@@ -1396,10 +1523,9 @@
     io_hdr.dxfer_direction = wr ? SG_DXFER_TO_DEV : SG_DXFER_FROM_DEV;
     io_hdr.pack_id = (int)rep->blk;
 
-    while (((res = read(wr ? rep->outfd : rep->infd, &io_hdr,
-                        sizeof(struct sg_io_hdr))) < 0) &&
+    while (((res = read(fd, &io_hdr, sizeof(struct sg_io_hdr))) < 0) &&
            ((EINTR == errno) || (EAGAIN == errno)))
-        ;
+        sched_yield();  /* another thread may be able to progress */
     if (res < 0) {
         perror("finishing io on sg device, error");
         return -1;
@@ -1414,21 +1540,19 @@
     case SG_LIB_CAT_CLEAN:
         break;
     case SG_LIB_CAT_RECOVERED:
-        lk_chk_n_print3((wr ? "writing continuing":
-                                   "reading continuing"), hp, false);
+        lk_chk_n_print3(cp, hp, false);
         break;
     case SG_LIB_CAT_ABORTED_COMMAND:
     case SG_LIB_CAT_UNIT_ATTENTION:
         if (rep->debug > 3)
-            lk_chk_n_print3((wr ? "writing": "reading"), hp, false);
+            lk_chk_n_print3(cp, hp, false);
         return res;
     case SG_LIB_CAT_NOT_READY:
     default:
         {
             char ebuff[EBUFF_SZ];
 
-            snprintf(ebuff, EBUFF_SZ, "%s blk=%" PRId64,
-                     wr ? "writing": "reading", rep->blk);
+            snprintf(ebuff, EBUFF_SZ, "%s blk=%" PRId64, cp, rep->blk);
             lk_chk_n_print3(ebuff, hp, false);
             return res;
         }
@@ -1443,16 +1567,14 @@
         rep->dio_incomplete_count = 0;
     rep->resid = hp->resid;
     if (rep->debug > 3)
-        pr2serr_lk("%s: tid=%d: completed %s\n", __func__, rep->id,
-                   wr ? "WRITE" : "READ");
+        pr2serr_lk("%s: tid=%d: completed %s\n", __func__, rep->id, cp);
     return 0;
 
 do_v4:
     h4p = &rep->io_hdr4;
-    while (((res = ioctl(wr ? rep->outfd : rep->infd, SG_IORECEIVE,
-                         h4p)) < 0) && ((EINTR == errno) ||
-                                        (EAGAIN == errno)))
-        ;
+    while (((res = ioctl(fd, SG_IORECEIVE, h4p)) < 0) &&
+           ((EINTR == errno) || (EAGAIN == errno)))
+        sched_yield();  /* another thread may be able to progress */
     if (res < 0) {
         perror("finishing io on sg device, error");
         return -1;
@@ -1467,21 +1589,19 @@
     case SG_LIB_CAT_CLEAN:
         break;
     case SG_LIB_CAT_RECOVERED:
-        lk_chk_n_print4((wr ? "writing continuing":
-                              "reading continuing"), h4p, false);
+        lk_chk_n_print4(cp, h4p, false);
         break;
     case SG_LIB_CAT_ABORTED_COMMAND:
     case SG_LIB_CAT_UNIT_ATTENTION:
         if (rep->debug > 3)
-            lk_chk_n_print4((wr ? "writing": "reading"), h4p, false);
+            lk_chk_n_print4(cp, h4p, false);
         return res;
     case SG_LIB_CAT_NOT_READY:
     default:
         {
             char ebuff[EBUFF_SZ];
 
-            snprintf(ebuff, EBUFF_SZ, "%s blk=%" PRId64,
-                     wr ? "writing": "reading", rep->blk);
+            snprintf(ebuff, EBUFF_SZ, "%s blk=%" PRId64, cp, rep->blk);
             lk_chk_n_print4(ebuff, h4p, false);
             return res;
         }
@@ -1496,8 +1616,7 @@
         rep->dio_incomplete_count = 0;
     rep->resid = h4p->din_resid;
     if (rep->debug > 3) {
-        pr2serr_lk("%s: tid=%d: completed %s\n", __func__, rep->id,
-                   wr ? "WRITE" : "READ");
+        pr2serr_lk("%s: tid=%d: completed %s\n", __func__, rep->id, cp);
         if (rep->debug > 4)
             pr2serr_lk(" info=0x%x sg_info_check=%d another_waiting=%d "
                        "direct=%d detaching=%d\n", h4p->info,
@@ -1583,9 +1702,7 @@
         np = strchr(cp, ',');
         if (np)
             *np++ = '\0';
-        if (0 == strcmp(cp, "2fds"))
-            fp->fds2 = true;
-        else if (0 == strcmp(cp, "append"))
+        if (0 == strcmp(cp, "append"))
             fp->append = true;
         else if (0 == strcmp(cp, "coe"))
             fp->coe = true;
@@ -1611,6 +1728,8 @@
             fp->noxfer = true;
         else if (0 == strcmp(cp, "null"))
             ;
+        else if (0 == strcmp(cp, "same_fds"))
+            fp->same_fds = true;
         else if (0 == strcmp(cp, "v3"))
             fp->v3 = true;
         else if (0 == strcmp(cp, "v4"))
@@ -1657,7 +1776,7 @@
         snprintf(ebuff, EBUFF_SZ, "%s: could not open %s for sg reading",
                  __func__, inf);
         perror(ebuff);
-        return -sg_convert_errno(err);;
+        return -sg_convert_errno(err);
     }
     n = sg_prepare_resbuf(fd, clp->bs, clp->bpt, clp->in_flags.defres,
                          clp->elem_sz,  mmpp);
@@ -1706,18 +1825,19 @@
 {
     bool verbose_given = false;
     bool version_given = false;
+    bool bpt_given = false;
+    bool cdbsz_given = false;
     int64_t skip = 0;
     int64_t seek = 0;
     int ibs = 0;
     int obs = 0;
-    int bpt_given = 0;
-    int cdbsz_given = 0;
     char str[STR_SZ];
     char * key;
     char * buf;
     char inf[INOUTF_SZ];
     char outf[INOUTF_SZ];
     char out2f[INOUTF_SZ];
+    char outregf[INOUTF_SZ];
     int res, k, err, keylen;
     int64_t in_num_sect = 0;
     int64_t out_num_sect = 0;
@@ -1741,11 +1861,13 @@
     clp->in_type = FT_OTHER;
     /* change dd's default: if of=OFILE not given, assume /dev/null */
     clp->out_type = FT_DEV_NULL;
+    clp->out2_type = FT_DEV_NULL;
     clp->cdbsz_in = DEF_SCSI_CDBSZ;
     clp->cdbsz_out = DEF_SCSI_CDBSZ;
     inf[0] = '\0';
     outf[0] = '\0';
     out2f[0] = '\0';
+    outregf[0] = '\0';
 
     for (k = 1; k < argc; k++) {
         if (argv[k]) {
@@ -1765,7 +1887,7 @@
                 pr2serr("%sbad argument to 'bpt='\n", my_name);
                 return SG_LIB_SYNTAX_ERROR;
             }
-            bpt_given = 1;
+            bpt_given = true;
         } else if (0 == strcmp(key,"bs")) {
             clp->bs = sg_get_num(buf);
             if (-1 == clp->bs) {
@@ -1775,7 +1897,7 @@
         } else if (0 == strcmp(key,"cdbsz")) {
             clp->cdbsz_in = sg_get_num(buf);
             clp->cdbsz_out = clp->cdbsz_in;
-            cdbsz_given = 1;
+            cdbsz_given = true;
         } else if (0 == strcmp(key,"coe")) {
             clp->in_flags.coe = !! sg_get_num(buf);
             clp->out_flags.coe = clp->in_flags.coe;
@@ -1835,6 +1957,12 @@
                 return SG_LIB_CONTRADICT;
             } else
                 strncpy(out2f, buf, INOUTF_SZ - 1);
+        } else if (strcmp(key,"ofreg") == 0) {
+            if ('\0' != outregf[0]) {
+                pr2serr("Second OFREG argument??\n");
+                return SG_LIB_CONTRADICT;
+            } else
+                strncpy(outregf, buf, INOUTF_SZ - 1);
         } else if (strcmp(key,"of") == 0) {
             if ('\0' != outf[0]) {
                 pr2serr("Second 'of=' argument??\n");
@@ -1961,14 +2089,14 @@
         return SG_LIB_SYNTAX_ERROR;
     }
     if ((clp->in_flags.mmap || clp->out_flags.mmap) &&
-        (clp->in_flags.fds2 || clp->in_flags.fds2)) {
-        pr2serr("can't have both 'mmap' and '2fds' flags\n");
+        (clp->in_flags.same_fds || clp->in_flags.same_fds)) {
+        pr2serr("can't have both 'mmap' and 'same_fds' flags\n");
         return SG_LIB_SYNTAX_ERROR;
     }
     /* defaulting transfer size to 128*2048 for CD/DVDs is too large
        for the block layer in lk 2.6 and results in an EIO on the
        SG_IO ioctl. So reduce it in that case. */
-    if ((clp->bs >= 2048) && (0 == bpt_given))
+    if ((clp->bs >= 2048) && (! bpt_given))
         clp->bpt = DEF_BLOCKS_PER_2048TRANSFER;
     if ((num_threads < 1) || (num_threads > MAX_NUM_THREADS)) {
         pr2serr("too few or too many threads requested\n");
@@ -2103,6 +2231,67 @@
                     "device\n", my_name);
         }
     }
+
+    if (out2f[0])
+        clp->ofile2_given = true;
+    if (out2f[0] && ('-' != out2f[0])) {
+        clp->out2_type = dd_filetype(out2f);
+
+        if (FT_ST == clp->out2_type) {
+            pr2serr("%sunable to use scsi tape device %s\n", my_name, out2f);
+            return SG_LIB_FILE_ERROR;
+        }
+        else if (FT_SG == clp->out2_type) {
+            clp->out2fd = sg_out_open(clp, out2f, NULL, NULL);
+            if (clp->out2fd < 0)
+                return -clp->out2fd;
+        }
+        else if (FT_DEV_NULL == clp->out2_type)
+            clp->out2fd = -1; /* don't bother opening */
+        else {
+            if (FT_RAW != clp->out2_type) {
+                flags = O_WRONLY | O_CREAT;
+                if (clp->out_flags.direct)
+                    flags |= O_DIRECT;
+                if (clp->out_flags.excl)
+                    flags |= O_EXCL;
+                if (clp->out_flags.dsync)
+                    flags |= O_SYNC;
+                if (clp->out_flags.append)
+                    flags |= O_APPEND;
+
+                if ((clp->out2fd = open(out2f, flags, 0666)) < 0) {
+                    err = errno;
+                    snprintf(ebuff, EBUFF_SZ, "%scould not open %s for "
+                             "writing", my_name, out2f);
+                    perror(ebuff);
+                    return sg_convert_errno(err);
+                }
+            }
+            else {      /* raw output file */
+                if ((clp->out2fd = open(out2f, O_WRONLY)) < 0) {
+                    err = errno;
+                    snprintf(ebuff, EBUFF_SZ, "%scould not open %s for raw "
+                             "writing", my_name, out2f);
+                    perror(ebuff);
+                    return sg_convert_errno(err);
+                }
+            }
+            if (seek > 0) {
+                off64_t offset = seek;
+
+                offset *= clp->bs;       /* could exceed 32 bits here! */
+                if (lseek64(clp->out2fd, offset, SEEK_SET) < 0) {
+                    err = errno;
+                    snprintf(ebuff, EBUFF_SZ, "%scouldn't seek to required "
+                             "position on %s", my_name, out2f);
+                    perror(ebuff);
+                    return sg_convert_errno(err);
+                }
+            }
+        }
+        clp->out2fp = out2f;
+    }
     if ((FT_SG == clp->in_type ) && (FT_SG == clp->out_type)) {
         if (clp->in_flags.v4 && (! clp->out_flags.v3)) {
             if (! clp->out_flags.v4) {
@@ -2121,16 +2310,29 @@
             }
         }
     }
-    if (out2f[0]) {
-        clp->out2_type = dd_filetype(out2f);
-        if ((clp->out2fd = open(out2f, O_WRONLY | O_CREAT, 0666)) < 0) {
+    if (outregf[0]) {
+        int ftyp = dd_filetype(outregf);
+
+        clp->outreg_type = ftyp;
+        if (! ((FT_OTHER == ftyp) || (FT_ERROR == ftyp) ||
+               (FT_DEV_NULL == ftyp))) {
+            pr2serr("File: %s can only be regular file or pipe (or "
+                    "/dev/null)\n", outregf);
+            return SG_LIB_SYNTAX_ERROR;
+        }
+        if ((clp->outregfd = open(outregf, O_WRONLY | O_CREAT, 0666)) < 0) {
             err = errno;
-            snprintf(ebuff, EBUFF_SZ, "could not open %s for writing", out2f);
+            snprintf(ebuff, EBUFF_SZ, "could not open %s for writing",
+                     outregf);
             perror(ebuff);
             return sg_convert_errno(err);
         }
+        if (clp->debug > 1)
+            pr2serr("ofreg=%s opened okay, fd=%d\n", outregf, clp->outregfd);
+        if (FT_ERROR == ftyp)
+            clp->outreg_type = FT_OTHER;        /* regular file created */
     } else
-        clp->out2fd = -1;
+        clp->outregfd = -1;
 
     if ((STDIN_FILENO == clp->infd) && (STDOUT_FILENO == clp->outfd)) {
         pr2serr("Won't default both IFILE to stdin _and_ OFILE to stdout\n");
@@ -2247,6 +2449,8 @@
     if (0 != status) err_exit(status, "init in_mutex");
     status = pthread_mutex_init(&clp->out_mutex, NULL);
     if (0 != status) err_exit(status, "init out_mutex");
+    status = pthread_mutex_init(&clp->out2_mutex, NULL);
+    if (0 != status) err_exit(status, "init out2_mutex");
     status = pthread_cond_init(&clp->out_sync_cv, NULL);
     if (0 != status) err_exit(status, "init out_sync_cv");
 
@@ -2284,17 +2488,6 @@
         status = pthread_create(&tip->a_pthr, NULL, read_write_thread,
                                 (void *)tip);
         if (0 != status) err_exit(status, "pthread_create");
-        if (clp->debug > 1)
-            pr2serr_lk("Starting worker thread k=0 alone\n");
-
-        /* wait for a broadcast on hold_1st_cv, implies tid=0 thread has
-         * completed one READ/WRITE cycle (segment). */
-        pthread_cleanup_push(cleanup_out, (void *)clp);
-        status = pthread_cond_wait(&clp->hold_1st_cv, &clp->out_mutex);
-        if (0 != status) err_exit(status, "cond hold_1st_cv");
-        pthread_cleanup_pop(0);
-        status = pthread_mutex_unlock(&clp->out_mutex);
-        if (0 != status) err_exit(status, "unlock out_mutex");
 
         /* now start the rest of the threads */
         for (k = 1; k < num_threads; ++k) {
@@ -2304,8 +2497,6 @@
             status = pthread_create(&tip->a_pthr, NULL, read_write_thread,
                                     (void *)tip);
             if (0 != status) err_exit(status, "pthread_create");
-            if (clp->debug > 1)
-                pr2serr_lk("Starting worker thread k=%d\n", k);
         }
 
         /* now wait for worker threads to finish */
@@ -2333,6 +2524,17 @@
             if (0 != res)
                 pr2serr_lk("Unable to synchronize cache\n");
         }
+        if (FT_SG == clp->out2_type) {
+            pr2serr_lk(">> Synchronizing cache on %s\n", out2f);
+            res = sg_ll_sync_cache_10(clp->out2fd, 0, 0, 0, 0, 0, false, 0);
+            if (SG_LIB_CAT_UNIT_ATTENTION == res) {
+                pr2serr_lk("Unit attention(out2), continuing\n");
+                res = sg_ll_sync_cache_10(clp->out2fd, 0, 0, 0, 0, 0, false,
+                                          0);
+            }
+            if (0 != res)
+                pr2serr_lk("Unable to synchronize cache (of2)\n");
+        }
     }
 
     shutting_down = true;
@@ -2349,6 +2551,8 @@
         close(clp->outfd);
     if ((STDOUT_FILENO != clp->out2fd) && (FT_DEV_NULL != clp->out2_type))
         close(clp->out2fd);
+    if ((STDOUT_FILENO != clp->outregfd) && (FT_DEV_NULL != clp->outreg_type))
+        close(clp->outregfd);
     res = exit_status;
     if ((0 != clp->out_count) && (0 == clp->dry_run)) {
         pr2serr(">>>> Some error occurred, remaining blocks=%" PRId64 "\n",
diff --git a/testing/sgs_dd.c b/testing/sgs_dd.c
new file mode 100644
index 0000000..2d4cbe1
--- /dev/null
+++ b/testing/sgs_dd.c
@@ -0,0 +1,986 @@
+/* We need F_SETSIG, (signal redirect), so following define */
+#define _GNU_SOURCE 1
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <poll.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "sg_lib.h"
+#include "sg_linux_inc.h"
+#include "sg_io_linux.h"
+
+/* Test code for the extensions to the Linux OS SCSI generic ("sg")
+ * device driver.
+ * Copyright (C) 1999-2018 D. Gilbert and P. Allworth
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is a specialization of the Unix "dd" command in which
+ * one or both of the given files is a scsi generic device. A block size
+ * ('bs') is assumed to be 512 if not given. This program complains if
+ * 'ibs' or 'obs' are given with some other value than 'bs'.
+ * If 'if' is not given or 'if=-' then stdin is assumed. If 'of' is
+ * not given of 'of=-' then stdout assumed. The multipliers "c, b, k, m"
+ * are recognized on numeric arguments.
+ *
+ * A non-standard argument "bpt" (blocks per transfer) is added to control
+ * the maximum number of blocks in each transfer. The default bpt value is
+ * (64 * 1024 * 1024 / bs) or 1 if the first expresion is 0. That is an
+ * integer division (rounds toward 0). For example if "bs=512" and "bpt=32"
+ * are given then a maximum of 32 blocks (16KB in this case) are transferred
+ * to or from the sg device in a single SCSI command.
+ *
+ * BEWARE: If the 'of' file is a 'sg' device (eg a disk) then it _will_
+ * be written to, potentially destroying its previous contents.
+ *
+ * This version should compile with Linux sg drivers with version numbers
+ * >= 30000 . Also this version also allows SIGPOLL or a RT signal to be
+ * chosen. SIGIO is a synonym for SIGPOLL; SIGIO seems to be deprecated.
+ */
+
+
+static const char * version_str = "4.01 20181223";
+static const char * my_name = "sgs_dd";
+
+#define DEF_BLOCK_SIZE 512
+#define DEF_BPT_TIMES_BS_SZ (64 * 1024) /* 64 KB */
+
+/* #define SG_DEBUG  1          comment out if not needed */
+
+#define SENSE_BUFF_LEN 32       /* Arbitrary, could be larger */
+#define DEF_TIMEOUT 40000       /* 40,000 millisecs == 40 seconds */
+#define S_RW_LEN 10             /* Use SCSI READ(10) and WRITE(10) */
+#define SGQ_MAX_RD_AHEAD 4
+#define SGQ_MAX_WR_AHEAD 4
+#define SGQ_NUM_ELEMS (SGQ_MAX_RD_AHEAD+ SGQ_MAX_WR_AHEAD + 1)
+
+#define SGQ_FREE 0
+#define SGQ_IO_STARTED 1
+#define SGQ_IO_FINISHED 2
+#define SGQ_IO_ERR 3
+#define SGQ_IO_WAIT 4
+
+#define SGQ_CAN_DO_NOTHING 0    /* only temporarily in use */
+#define SGQ_CAN_READ 1
+#define SGQ_CAN_WRITE 2
+#define SGQ_TIMEOUT 4
+
+
+#define STR_SZ 1024
+#define INOUTF_SZ 512
+#define EBUFF_SZ 512
+
+
+typedef struct request_element
+{
+    struct request_element * nextp;
+    bool stop_after_wr;
+    bool wr;
+    int state;
+    int blk;
+    int num_blks;
+    uint8_t * buffp;
+    sg_io_hdr_t io_hdr;
+    uint8_t cmd[S_RW_LEN];
+    uint8_t sb[SENSE_BUFF_LEN];
+    int result;
+} Rq_elem;
+
+typedef struct request_collection
+{
+    bool in_is_sg;
+    bool out_is_sg;
+    bool dio;
+    bool use_rt_sig;
+    int infd;
+    int in_blk;                 /* most recent read */
+    int in_count;               /* most recent read */
+    int in_done_count;          /* count of completed in blocks */
+    int in_partial;
+    int outfd;
+    int lowest_seek;
+    int out_blk;                /* most recent write */
+    int out_count;              /* most recent write */
+    int out_done_count;         /* count of completed out blocks */
+    int out_partial;
+    int bs;
+    int bpt;
+    int dio_incomplete;
+    int sum_of_resids;
+    int debug;
+    sigset_t blocked_sigs;
+    int sigs_waiting;
+    int sigs_rt_received;
+    int sigs_io_received;
+    Rq_elem * rd_posp;
+    Rq_elem * wr_posp;
+    Rq_elem elem[SGQ_NUM_ELEMS];
+} Rq_coll;
+
+
+static void
+usage(void)
+{
+    printf("Usage: "
+           "sgs_dd  [if=<ifile>] [skip=<n>] [of=<ofile>] [seek=<n>]\n"
+           "               [bs=<num>] [bpt=<num>] [count=<n>]"
+           " [deb=<n>] [dio=0|1]\n"
+           "               [rt_sig=0|1] [--version]\n"
+           "where:\n"
+           "  bpt      blocks_per_transfer (default: 65536/bs (or 128 for "
+           "bs=512))\n"
+           "  bs       not just any block size, the logical block size of "
+           "device\n"
+           "  dio      direct IO, 1->attempt, 0->indirect IO (def)\n"
+           "  rt_sig   0->use SIGIO (def); 1->use RT sig (SIGRTMIN + 1)\n"
+           "  deb      debug: 0->no debug (def); > 0 -> more debug\n"
+           "  <other operands>     as per dd command\n\n");
+    printf("dd clone for testing Linux sg driver SIGPOLL and friends. Either "
+           "'if' or 'of'\nmust be a scsi generic device. If 'of' not given "
+           "then /dev/null assumed.\n");
+}
+
+/* Return of 0 -> success, -1 -> failure, 2 -> try again */
+static int
+read_capacity(int sg_fd, int * num_sect, int * sect_sz)
+{
+    int res;
+    uint8_t rcCmdBlk [10] = {0x25, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    uint8_t rcBuff[64];
+    uint8_t sense_b[64];
+    sg_io_hdr_t io_hdr;
+
+    memset(&io_hdr, 0, sizeof(sg_io_hdr_t));
+    io_hdr.interface_id = 'S';
+    io_hdr.cmd_len = sizeof(rcCmdBlk);
+    io_hdr.mx_sb_len = sizeof(sense_b);
+    io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+    io_hdr.dxfer_len = sizeof(rcBuff);
+    io_hdr.dxferp = rcBuff;
+    io_hdr.cmdp = rcCmdBlk;
+    io_hdr.sbp = sense_b;
+    io_hdr.timeout = DEF_TIMEOUT;
+
+    if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) {
+        perror("read_capacity (SG_IO) error");
+        return -1;
+    }
+    res = sg_err_category3(&io_hdr);
+    if (SG_LIB_CAT_UNIT_ATTENTION == res)
+        return 2; /* probably have another go ... */
+    else if (SG_LIB_CAT_CLEAN != res) {
+        sg_chk_n_print3("read capacity", &io_hdr, true);
+        return -1;
+    }
+    *num_sect = 1 + ((rcBuff[0] << 24) | (rcBuff[1] << 16) |
+                (rcBuff[2] << 8) | rcBuff[3]);
+    *sect_sz = (rcBuff[4] << 24) | (rcBuff[5] << 16) |
+               (rcBuff[6] << 8) | rcBuff[7];
+#ifdef SG_DEBUG
+    fprintf(stderr, "number of sectors=%d, sector size=%d\n", *num_sect,
+            *sect_sz);
+#endif
+    return 0;
+}
+
+/* -ve -> unrecoverable error, 0 -> successful, 1 -> recoverable (ENOMEM) */
+static int
+sg_start_io(Rq_coll * clp, Rq_elem * rep)
+{
+    sg_io_hdr_t * hp = &rep->io_hdr;
+    int res;
+
+    memset(rep->cmd, 0, sizeof(rep->cmd));
+    rep->cmd[0] = rep->wr ? 0x2a : 0x28;
+    rep->cmd[2] = (uint8_t)((rep->blk >> 24) & 0xFF);
+    rep->cmd[3] = (uint8_t)((rep->blk >> 16) & 0xFF);
+    rep->cmd[4] = (uint8_t)((rep->blk >> 8) & 0xFF);
+    rep->cmd[5] = (uint8_t)(rep->blk & 0xFF);
+    rep->cmd[7] = (uint8_t)((rep->num_blks >> 8) & 0xff);
+    rep->cmd[8] = (uint8_t)(rep->num_blks & 0xff);
+    memset(hp, 0, sizeof(sg_io_hdr_t));
+    hp->interface_id = 'S';
+    hp->cmd_len = sizeof(rep->cmd);
+    hp->cmdp = rep->cmd;
+    hp->dxfer_direction = rep->wr ? SG_DXFER_TO_DEV : SG_DXFER_FROM_DEV;
+    hp->dxfer_len = clp->bs * rep->num_blks;
+    hp->dxferp = rep->buffp;
+    hp->mx_sb_len = sizeof(rep->sb);
+    hp->sbp = rep->sb;
+    hp->timeout = DEF_TIMEOUT;
+    hp->usr_ptr = rep;
+    hp->pack_id = rep->blk;
+    if (clp->dio)
+        hp->flags |= SG_FLAG_DIRECT_IO;
+#ifdef SG_DEBUG
+    fprintf(stderr, "%s: SCSI %s, blk=%d num_blks=%d\n", __func__,
+           rep->wr ? "WRITE" : "READ", rep->blk, rep->num_blks);
+    sg_print_command(hp->cmdp);
+    fprintf(stderr, "dir=%d, len=%d, dxfrp=%p, cmd_len=%d\n",
+            hp->dxfer_direction, hp->dxfer_len, hp->dxferp, hp->cmd_len);
+#endif
+
+    while (((res = write(rep->wr ? clp->outfd : clp->infd, hp,
+                         sizeof(sg_io_hdr_t))) < 0) && (EINTR == errno))
+        ;
+    if (res < 0) {
+        if (ENOMEM == errno)
+            return 1;
+        if ((EDOM == errno) || (EAGAIN == errno)) {
+            rep->state = SGQ_IO_WAIT;   /* busy so wait */
+            return 0;
+        }
+        fprintf(stderr, "%s: write(): %s [%d]\n", __func__, strerror(errno),
+                errno);
+        rep->state = SGQ_IO_ERR;
+        return res;
+    }
+    rep->state = SGQ_IO_STARTED;
+    clp->sigs_waiting++;
+    return 0;
+}
+
+/* -1 -> unrecoverable error, 0 -> successful, 1 -> try again */
+static int
+sg_finish_io(Rq_coll * clp, bool wr, Rq_elem ** repp)
+{
+    int res;
+    sg_io_hdr_t io_hdr;
+    sg_io_hdr_t * hp;
+    Rq_elem * rep;
+
+    memset(&io_hdr, 0 , sizeof(sg_io_hdr_t));
+    while (((res = read(wr ? clp->outfd : clp->infd, &io_hdr,
+                        sizeof(sg_io_hdr_t))) < 0) && (EINTR == errno))
+        ;
+    rep = (Rq_elem *)io_hdr.usr_ptr;
+    if (res < 0) {
+        fprintf(stderr, "%s: read(): %s [%d]\n", __func__, strerror(errno),
+                errno);
+        if (rep)
+            rep->state = SGQ_IO_ERR;
+        return -1;
+    }
+    if (! (rep && (SGQ_IO_STARTED == rep->state))) {
+        fprintf(stderr, "%s: bad usr_ptr\n", __func__);
+        if (rep)
+            rep->state = SGQ_IO_ERR;
+        return -1;
+    }
+    memcpy(&rep->io_hdr, &io_hdr, sizeof(sg_io_hdr_t));
+    hp = &rep->io_hdr;
+    if (repp)
+        *repp = rep;
+
+    switch (sg_err_category3(hp)) {
+        case SG_LIB_CAT_CLEAN:
+            break;
+        case SG_LIB_CAT_RECOVERED:
+            fprintf(stderr, "Recovered error on block=%d, num=%d\n",
+                   rep->blk, rep->num_blks);
+            break;
+        case SG_LIB_CAT_UNIT_ATTENTION:
+            return 1;
+        default:
+            sg_chk_n_print3(rep->wr ? "writing": "reading", hp, true);
+            rep->state = SGQ_IO_ERR;
+            return -1;
+    }
+    if (clp->dio &&
+        ((hp->info & SG_INFO_DIRECT_IO_MASK) != SG_INFO_DIRECT_IO))
+        ++clp->dio_incomplete; /* count dios done as indirect IO */
+    clp->sum_of_resids += hp->resid;
+    rep->state = SGQ_IO_FINISHED;
+#ifdef SG_DEBUG
+    fprintf(stderr, "%s: %s  ", __func__, wr ? "writing" : "reading");
+    fprintf(stderr, "    SGQ_IO_FINISHED elem idx=%zd\n", rep - clp->elem);
+#endif
+    return 0;
+}
+
+static int
+sz_reserve(int fd, int bs, int bpt, bool rt_sig)
+{
+    int res, t, flags;
+
+    res = ioctl(fd, SG_GET_VERSION_NUM, &t);
+    if ((res < 0) || (t < 30000)) {
+        fprintf(stderr, "sgs_dd: sg driver prior to 3.x.y\n");
+        return 1;
+    }
+    res = 0;
+    t = bs * bpt;
+    res = ioctl(fd, SG_SET_RESERVED_SIZE, &t);
+    if (res < 0)
+        perror("sgs_dd: SG_SET_RESERVED_SIZE error");
+    if (-1 == fcntl(fd, F_SETOWN, getpid())) {
+        perror("fcntl(F_SETOWN)");
+        return 1;
+    }
+    flags = fcntl(fd, F_GETFL, 0);
+    if (-1 == fcntl(fd, F_SETFL, flags | O_ASYNC)) {
+        perror("fcntl(F_SETFL)");
+        return 1;
+    }
+    if (rt_sig) {       /* displaces SIGIO/SIGPOLL with SIGRTMIN + 1 */
+        if (-1 == fcntl(fd, F_SETSIG, SIGRTMIN + 1))
+            perror("fcntl(F_SETSIG)");
+    }
+    return 0;
+}
+
+static void
+init_elems(Rq_coll * clp)
+{
+    Rq_elem * rep;
+    int k;
+
+    clp->wr_posp = &clp->elem[0]; /* making ring buffer */
+    clp->rd_posp = clp->wr_posp;
+    for (k = 0; k < SGQ_NUM_ELEMS - 1; ++k)
+        clp->elem[k].nextp = &clp->elem[k + 1];
+    clp->elem[SGQ_NUM_ELEMS - 1].nextp = &clp->elem[0];
+    for (k = 0; k < SGQ_NUM_ELEMS; ++k) {
+        rep = &clp->elem[k];
+        rep->state = SGQ_FREE;
+        if (NULL == (rep->buffp = malloc(clp->bpt * clp->bs)))
+            fprintf(stderr, "out of memory creating user buffers\n");
+    }
+}
+
+static int
+start_read(Rq_coll * clp)
+{
+    int blocks = (clp->in_count > clp->bpt) ? clp->bpt : clp->in_count;
+    Rq_elem * rep = clp->rd_posp;
+    int buf_sz, res;
+    char ebuff[EBUFF_SZ];
+
+#ifdef SG_DEBUG
+    fprintf(stderr, "%s: elem idx=%zd\n", __func__, rep - clp->elem);
+#endif
+    rep->wr = false;
+    rep->blk = clp->in_blk;
+    rep->num_blks = blocks;
+    clp->in_blk += blocks;
+    clp->in_count -= blocks;
+    if (clp->in_is_sg) {
+        res = sg_start_io(clp, rep);
+        if (1 == res) {     /* ENOMEM, find what's available+try that */
+            if ((res = ioctl(clp->infd, SG_GET_RESERVED_SIZE, &buf_sz)) < 0) {
+                perror("RESERVED_SIZE ioctls failed");
+                return res;
+            }
+            clp->bpt = (buf_sz + clp->bs - 1) / clp->bs;
+            fprintf(stderr, "Reducing blocks per transfer to %d\n", clp->bpt);
+            if (clp->bpt < 1)
+                return -ENOMEM;
+            res = sg_start_io(clp, rep);
+            if (1 == res)
+                res = -ENOMEM;
+        }
+        else if (res < 0) {
+            fprintf(stderr, "sgs_dd inputting from sg failed, blk=%d\n",
+                    rep->blk);
+            rep->state = SGQ_IO_ERR;
+            return res;
+        }
+    }
+    else {
+        rep->state = SGQ_IO_STARTED;
+        while (((res = read(clp->infd, rep->buffp, blocks * clp->bs)) < 0) &&
+               (EINTR == errno))
+            ;
+        if (res < 0) {
+            snprintf(ebuff, EBUFF_SZ, "sgs_dd: reading, in_blk=%d ", rep->blk);
+            perror(ebuff);
+            rep->state = SGQ_IO_ERR;
+            return res;
+        }
+        if (res < blocks * clp->bs) {
+            int o_blocks = blocks;
+            rep->stop_after_wr = true;
+            blocks = res / clp->bs;
+            if ((res % clp->bs) > 0) {
+                blocks++;
+                clp->in_partial++;
+            }
+            /* Reverse out + re-apply blocks on clp */
+            clp->in_blk -= o_blocks;
+            clp->in_count += o_blocks;
+            rep->num_blks = blocks;
+            clp->in_blk += blocks;
+            clp->in_count -= blocks;
+        }
+        clp->in_done_count -= blocks;
+        rep->state = SGQ_IO_FINISHED;
+    }
+    clp->rd_posp = rep->nextp;
+    return blocks;
+}
+
+static int
+start_write(Rq_coll * clp)
+{
+    Rq_elem * rep = clp->wr_posp;
+    int res, blocks;
+    char ebuff[EBUFF_SZ];
+
+    while ((0 != rep->wr) || (SGQ_IO_FINISHED != rep->state)) {
+        rep = rep->nextp;
+        if (rep == clp->rd_posp)
+            return -1;
+    }
+#ifdef SG_DEBUG
+    fprintf(stderr, "%s: elem idx=%zd\n", __func__, rep - clp->elem);
+#endif
+    rep->wr = true;
+    blocks = rep->num_blks;
+    rep->blk = clp->out_blk;
+    clp->out_blk += blocks;
+    clp->out_count -= blocks;
+    if (clp->out_is_sg) {
+        res = sg_start_io(clp, rep);
+        if (1 == res)      /* ENOMEM, give up */
+            return -ENOMEM;
+        else if (res < 0) {
+            fprintf(stderr, "sgs_dd output to sg failed, blk=%d\n", rep->blk);
+            rep->state = SGQ_IO_ERR;
+            return res;
+        }
+    }
+    else {
+        rep->state = SGQ_IO_STARTED;
+        while (((res = write(clp->outfd, rep->buffp,
+                     rep->num_blks * clp->bs)) < 0) && (EINTR == errno))
+            ;
+        if (res < 0) {
+            snprintf(ebuff, EBUFF_SZ, "sgs_dd: output, out_blk=%d ", rep->blk);
+            perror(ebuff);
+            rep->state = SGQ_IO_ERR;
+            return res;
+        }
+        if (res < blocks * clp->bs) {
+            blocks = res / clp->bs;
+            if ((res % clp->bs) > 0) {
+                blocks++;
+                clp->out_partial++;
+            }
+            rep->num_blks = blocks;
+        }
+        rep->state = SGQ_IO_FINISHED;
+    }
+    return blocks;
+}
+
+/* Returns 0 if SGIO/SIGPOLL or (SIGRTMIN + 1) received, else returns negated
+ * errno value; -EAGAIN for timeout. */
+static int
+do_sigwait(Rq_coll * clp, bool inc1_clear0)
+{
+    siginfo_t info;
+    struct timespec ts;
+
+    if (clp->debug > 9)
+        fprintf(stderr, "%s: inc1_clear0=%d\n", __func__, (int)inc1_clear0);
+    ts.tv_sec = 60;         /* 60 second timeout */
+    ts.tv_nsec = 0;
+    while (sigtimedwait(&clp->blocked_sigs, &info, &ts) < 0) {
+        if (EINTR != errno) {
+            int err = errno;
+
+            fprintf(stderr, "%s: sigtimedwait(): %s [%d]\n", __func__,
+                    strerror(err), err);        /* EAGAIN is timeout */
+            return -err;        /* EAGAIN is timeout error */
+        }
+    }
+    if ((SIGRTMIN + 1) == info.si_signo) {
+        if (inc1_clear0) {
+            clp->sigs_waiting--;
+            clp->sigs_rt_received++;
+        } else
+            clp->sigs_waiting = 0;
+    } else if (SIGPOLL == info.si_signo) {
+        if (inc1_clear0) {
+            clp->sigs_waiting--;
+            clp->sigs_io_received++;
+        } else
+            clp->sigs_waiting = 0;
+    } else {
+        fprintf(stderr, "%s: sigwaitinfo() returned si_signo=%d\n",
+                __func__, info.si_signo);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+/* Returns 1 on success (found), 0 on not found, -1 on error. */
+static int
+do_poll_for_in(Rq_coll * clp, int fd)
+{
+    int err;
+    struct pollfd a_pollfd = {0, POLLIN | POLLOUT, 0};
+
+    if (clp->sigs_waiting) {
+        int res = do_sigwait(clp, true);
+
+        if (res < 0)
+            return res;
+    }
+    a_pollfd.fd = fd;
+    if (poll(&a_pollfd, 1, 0) < 0) {
+        err = errno;
+        fprintf(stderr, "%s: poll(): %s [%d]\n", __func__, strerror(err),
+                err);
+        return -err;
+    }
+    /* fprintf(stderr, "%s: revents=0x%x\n", __func__, a_pollfd.revents); */
+    return !!(a_pollfd.revents & POLLIN);
+}
+
+static int
+can_read_write(Rq_coll * clp)
+{
+    Rq_elem * rep = NULL;
+    bool writeable = false;
+    int res = 0;
+    int reading = 0;
+    int writing = 0;
+    int rd_waiting = 0;
+    int wr_waiting = 0;
+    int sg_finished = 0;
+
+    /* if write completion pending, then complete it + start read */
+    if (clp->out_is_sg) {
+        while ((res = do_poll_for_in(clp, clp->outfd))) {
+            if (res < 0)
+                return res;
+            res = sg_finish_io(clp, 1, &rep);
+            if (res < 0)
+                return res;
+            else if (1 == res) {
+                res = sg_start_io(clp, rep);
+                if (0 != res)
+                    return -1;  /* give up if any problems with retry */
+            }
+            else
+                sg_finished++;
+        }
+        while ((rep = clp->wr_posp) && (SGQ_IO_FINISHED == rep->state) &&
+               rep->wr && (rep != clp->rd_posp)) {
+            rep->state = SGQ_FREE;
+            clp->out_done_count -= rep->num_blks;
+            clp->wr_posp = rep->nextp;
+            if (rep->stop_after_wr)
+                return -1;
+        }
+    }
+    else if ((rep = clp->wr_posp) && rep->wr &&
+             (SGQ_IO_FINISHED == rep->state)) {
+        rep->state = SGQ_FREE;
+        clp->out_done_count -= rep->num_blks;
+        clp->wr_posp = rep->nextp;
+        if (rep->stop_after_wr)
+            return -1;
+    }
+
+    /* if read completion pending, then complete it + start maybe write */
+    if (clp->in_is_sg) {
+        while ((res = do_poll_for_in(clp, clp->infd))) {
+            if (res < 0)
+                return res;
+            res = sg_finish_io(clp, 0, &rep);
+            if (res < 0)
+                return res;
+            if (1 == res) {
+                res = sg_start_io(clp, rep);
+                if (0 != res)
+                    return -1;  /* give up if any problems with retry */
+            }
+            else {
+                sg_finished++;
+                clp->in_done_count -= rep->num_blks;
+            }
+        }
+    }
+
+    for (rep = clp->wr_posp, res = 1;
+         rep != clp->rd_posp; rep = rep->nextp) {
+        if (SGQ_IO_STARTED == rep->state) {
+            if (rep->wr)
+                ++writing;
+            else {
+                res = 0;
+                ++reading;
+            }
+        }
+        else if ((! rep->wr) && (SGQ_IO_FINISHED == rep->state)) {
+            if (res)
+                writeable = true;
+        }
+        else if (SGQ_IO_WAIT == rep->state) {
+            res = 0;
+            if (rep->wr)
+                ++wr_waiting;
+            else
+                ++rd_waiting;
+        }
+        else
+            res = 0;
+    }
+    if (clp->debug) {
+        if ((clp->debug >= 9) || wr_waiting || rd_waiting)
+            fprintf(stderr, "%d/%d (nwb/nrb): read=%d/%d (do/wt) "
+                    "write=%d/%d (do/wt) writeable=%d sg_fin=%d\n",
+                    clp->out_blk, clp->in_blk, reading, rd_waiting,
+                    writing, wr_waiting, (int)writeable, sg_finished);
+        fflush(stdout);
+    }
+    if (writeable && (writing < SGQ_MAX_WR_AHEAD) && (clp->out_count > 0))
+        return SGQ_CAN_WRITE;
+    if ((reading < SGQ_MAX_RD_AHEAD) && (clp->in_count > 0) &&
+        (0 == rd_waiting) && (clp->rd_posp->nextp != clp->wr_posp))
+        return SGQ_CAN_READ;
+
+    if (clp->out_done_count <= 0)
+        return SGQ_CAN_DO_NOTHING;
+
+    /* usleep(10000); */      /* hang about for 10 milliseconds */
+    if (clp->sigs_waiting) {
+        res = do_sigwait(clp, false);
+        if (res < 0)
+            return res;
+    }
+    /* Now check the _whole_ buffer for pending requests */
+    for (rep = clp->rd_posp->nextp; rep != clp->rd_posp; rep = rep->nextp) {
+        if (SGQ_IO_WAIT == rep->state) {
+            res = sg_start_io(clp, rep);
+            if (res < 0)
+                return res;
+            if (res > 0)
+                return -1;
+            break;
+        }
+    }
+    return SGQ_CAN_DO_NOTHING;
+}
+
+
+int
+main(int argc, char * argv[])
+{
+    bool bs_given = false;
+    int skip = 0;
+    int seek = 0;
+    int ibs = 0;
+    int obs = 0;
+    int count = -1;
+    char str[STR_SZ];
+    char * key;
+    char * buf;
+    char inf[INOUTF_SZ];
+    char outf[INOUTF_SZ];
+    int res, k;
+    int in_num_sect = 0;
+    int out_num_sect = 0;
+    int in_sect_sz, out_sect_sz, crw;
+    char ebuff[EBUFF_SZ];
+    Rq_coll rcoll;
+    Rq_coll * clp = &rcoll;
+
+    memset(clp, 0, sizeof(*clp));
+    clp->bpt = 0;
+    inf[0] = '\0';
+    outf[0] = '\0';
+    if (argc < 2) {
+        usage();
+        return 1;
+    }
+
+    for(k = 1; k < argc; k++) {
+        if (argv[k]) {
+            strncpy(str, argv[k], STR_SZ);
+            str[STR_SZ - 1] = '\0';
+        }
+        else
+            continue;
+        for(key = str, buf = key; *buf && *buf != '=';)
+            buf++;
+        if (*buf)
+            *buf++ = '\0';
+        if (strcmp(key,"if") == 0)
+            strncpy(inf, buf, INOUTF_SZ);
+        else if (strcmp(key,"of") == 0)
+            strncpy(outf, buf, INOUTF_SZ);
+        else if (0 == strcmp(key,"ibs"))
+            ibs = sg_get_num(buf);
+        else if (0 == strcmp(key,"obs"))
+            obs = sg_get_num(buf);
+        else if (0 == strcmp(key,"bs"))
+            clp->bs = sg_get_num(buf);
+        else if (0 == strcmp(key,"bpt"))
+            clp->bpt = sg_get_num(buf);
+        else if (0 == strcmp(key,"skip"))
+            skip = sg_get_num(buf);
+        else if (0 == strcmp(key,"seek"))
+            seek = sg_get_num(buf);
+        else if (0 == strcmp(key,"count"))
+            count = sg_get_num(buf);
+        else if (0 == strcmp(key,"dio"))
+            clp->dio = !!sg_get_num(buf);
+        else if (0 == strcmp(key,"rt_sig"))
+            clp->use_rt_sig = !!sg_get_num(buf);
+        else if (0 == strcmp(key,"deb"))
+            clp->debug = sg_get_num(buf);
+        else if ((0 == strcmp(key,"-V")) || (0 == strcmp(key,"--version"))) {
+            fprintf(stderr, "%s: version: %s\n", my_name, version_str);
+            return 0;
+        } else {
+            fprintf(stderr, "Unrecognized argument '%s'\n", key);
+            usage();
+            return 1;
+        }
+    }
+    if (clp->bs <= 0) {
+        clp->bs = DEF_BLOCK_SIZE;
+    } else
+        bs_given = true;
+
+    if ((ibs && (ibs != clp->bs)) || (obs && (obs != clp->bs))) {
+        fprintf(stderr, "If 'ibs' or 'obs' given must be same as 'bs'\n");
+        usage();
+        return 1;
+    }
+    if (clp->bpt <= 0) {
+        clp->bpt = (DEF_BPT_TIMES_BS_SZ / clp->bs);
+        if (0 == clp->bpt)
+            clp->bpt = 1;
+        if (! bs_given)
+            fprintf(stderr, "Assume blocks size bs=%d [bytes] and blocks "
+                    "per transfer bpt=%d\n", clp->bs, clp->bpt);
+    } else if (! bs_given)
+        fprintf(stderr, "Assume 'bs' (block size) of %d bytes\n", clp->bs);
+
+    if ((skip < 0) || (seek < 0)) {
+        fprintf(stderr, "skip and seek cannot be negative\n");
+        return 1;
+    }
+#ifdef SG_DEBUG
+    fprintf(stderr, "sgs_dd: if=%s skip=%d of=%s seek=%d count=%d\n",
+           inf, skip, outf, seek, count);
+#endif
+    /* Need to block signals before SIGPOLL is enabled in sz_reserve() */
+    sigemptyset(&clp->blocked_sigs);
+    if (clp->use_rt_sig)
+        sigaddset(&clp->blocked_sigs, SIGRTMIN + 1);
+    sigaddset(&clp->blocked_sigs, SIGINT);
+    sigaddset(&clp->blocked_sigs, SIGPOLL);
+    sigprocmask(SIG_BLOCK, &clp->blocked_sigs, 0);
+
+    clp->infd = STDIN_FILENO;
+    clp->outfd = STDOUT_FILENO;
+    if (inf[0] && ('-' != inf[0])) {
+        if ((clp->infd = open(inf, O_RDONLY)) < 0) {
+            snprintf(ebuff, EBUFF_SZ, "sgs_dd: could not open %s for reading",
+                     inf);
+            perror(ebuff);
+            return 1;
+        }
+        if (ioctl(clp->infd, SG_GET_TIMEOUT, 0) < 0) {
+            clp->in_is_sg = false;
+            if (skip > 0) {
+                off_t offset = skip;
+
+                offset *= clp->bs;       /* could overflow here! */
+                if (lseek(clp->infd, offset, SEEK_SET) < 0) {
+                    snprintf(ebuff, EBUFF_SZ,
+                "sgs_dd: couldn't skip to required position on %s", inf);
+                    perror(ebuff);
+                    return 1;
+                }
+            }
+        }
+        else { /* looks like sg device so close then re-open it RW */
+            close(clp->infd);
+            if ((clp->infd = open(inf, O_RDWR | O_NONBLOCK)) < 0) {
+                fprintf(stderr, "If %s is a sg device, need read+write "
+                        "permissions, even to read it!\n", inf);
+                return 1;
+            }
+            clp->in_is_sg = true;
+            if (sz_reserve(clp->infd, clp->bs, clp->bpt, clp->use_rt_sig))
+                return 1;
+        }
+    }
+    if (outf[0] && ('-' != outf[0])) {
+        if ((clp->outfd = open(outf, O_RDWR | O_NONBLOCK)) >= 0) {
+            if (ioctl(clp->outfd, SG_GET_TIMEOUT, 0) < 0) {
+                /* not a scsi generic device so now try and open RDONLY */
+                close(clp->outfd);
+            }
+            else {
+                clp->out_is_sg = true;
+                if (sz_reserve(clp->outfd, clp->bs, clp->bpt,
+                               clp->use_rt_sig))
+                    return 1;
+            }
+        }
+        if (! clp->out_is_sg) {
+            if ((clp->outfd = open(outf, O_WRONLY | O_CREAT, 0666)) < 0) {
+                snprintf(ebuff, EBUFF_SZ,
+                         "sgs_dd: could not open %s for writing", outf);
+                perror(ebuff);
+                return 1;
+            }
+            else if (seek > 0) {
+                off_t offset = seek;
+
+                offset *= clp->bs;       /* could overflow here! */
+                if (lseek(clp->outfd, offset, SEEK_SET) < 0) {
+                    snprintf(ebuff, EBUFF_SZ,
+                "sgs_dd: couldn't seek to required position on %s", outf);
+                    perror(ebuff);
+                    return 1;
+                }
+            }
+        }
+    } else if ('\0' == outf[0]) {
+        if (STDIN_FILENO == clp->infd) {
+            fprintf(stderr, "Can't have both 'if' as stdin _and_ 'of' as "
+                    "/dev/null\n");
+            return 1;
+        }
+        clp->outfd = open("/dev/null", O_RDWR);
+        if (clp->outfd < 0) {
+            perror("sgs_dd: could not open /dev/null");
+            return 1;
+        }
+        clp->out_is_sg = false;
+        /* ignore any seek */
+    } else {    /* must be '-' for stdout */
+        if (STDIN_FILENO == clp->infd) {
+            fprintf(stderr, "Can't have both 'if' as stdin _and_ 'of' as "
+                    "stdout\n");
+            return 1;
+        }
+    }
+    if (0 == count)
+        return 0;
+    else if (count < 0) {
+        if (clp->in_is_sg) {
+            res = read_capacity(clp->infd, &in_num_sect, &in_sect_sz);
+            if (2 == res) {
+                fprintf(stderr, "Unit attention, media changed(in), try "
+                        "again\n");
+                res = read_capacity(clp->infd, &in_num_sect, &in_sect_sz);
+            }
+            if (0 != res) {
+                fprintf(stderr, "Unable to read capacity on %s\n", inf);
+                in_num_sect = -1;
+            }
+            else {
+                if (in_num_sect > skip)
+                    in_num_sect -= skip;
+            }
+        }
+        if (clp->out_is_sg) {
+            res = read_capacity(clp->outfd, &out_num_sect, &out_sect_sz);
+            if (2 == res) {
+                fprintf(stderr, "Unit attention, media changed(out), try "
+                        "again\n");
+                res = read_capacity(clp->outfd, &out_num_sect, &out_sect_sz);
+            }
+            if (0 != res) {
+                fprintf(stderr, "Unable to read capacity on %s\n", outf);
+                out_num_sect = -1;
+            }
+            else {
+                if (out_num_sect > seek)
+                    out_num_sect -= seek;
+            }
+        }
+#ifdef SG_DEBUG
+        fprintf(stderr, "Start of loop, count=%d, in_num_sect=%d, "
+                "out_num_sect=%d\n", count, in_num_sect, out_num_sect);
+#endif
+        if (in_num_sect > 0) {
+            if (out_num_sect > 0)
+                count = (in_num_sect > out_num_sect) ? out_num_sect :
+                                                       in_num_sect;
+            else
+                count = in_num_sect;
+        }
+        else
+            count = out_num_sect;
+    }
+
+#ifdef SG_DEBUG
+    fprintf(stderr, "Start of loop, count=%d, bpt=%d\n", count, clp->bpt);
+#endif
+
+    clp->in_count = count;
+    clp->in_done_count = count;
+    clp->in_blk = skip;
+    clp->out_count = count;
+    clp->out_done_count = count;
+    clp->out_blk = seek;
+    init_elems(clp);
+
+/* vvvvvvvvvvvvvvvvv  Main Loop  vvvvvvvvvvvvvvvvvvvvvvvv */
+    while (clp->out_done_count > 0) {
+        crw = can_read_write(clp);
+        if (crw < 0)
+            break;
+        if (SGQ_CAN_READ & crw) {
+            res = start_read(clp);
+            if (res <= 0) {
+                fprintf(stderr, "start_read: res=%d\n", res);
+                break;
+            }
+        }
+        if (SGQ_CAN_WRITE & crw) {
+            res = start_write(clp);
+            if (res <= 0) {
+                fprintf(stderr, "start_write: res=%d\n", res);
+                break;
+            }
+        }
+    }
+
+    if (STDIN_FILENO != clp->infd)
+        close(clp->infd);
+    if (STDOUT_FILENO != clp->outfd)
+        close(clp->outfd);
+    if (0 != clp->out_count) {
+        fprintf(stderr, "Some error occurred, remaining blocks=%d\n",
+                clp->out_count);
+        return 1;
+    }
+    fprintf(stderr, "%d+%d records in\n", count - clp->in_done_count,
+           clp->in_partial);
+    fprintf(stderr, "%d+%d records out\n", count - clp->out_done_count,
+           clp->out_partial);
+    if (clp->dio_incomplete)
+        fprintf(stderr, ">> Direct IO requested but incomplete %d times\n",
+               clp->dio_incomplete);
+    if (clp->sum_of_resids)
+        fprintf(stderr, ">> Non-zero sum of residual counts=%d\n",
+               clp->sum_of_resids);
+    if (clp->debug > 0)
+        fprintf(stderr, "SIGIO/SIGPOLL signals received: %d, RT sigs: %d\n",
+               clp->sigs_io_received, clp->sigs_rt_received);
+    return 0;
+}
diff --git a/testing/uapi_sg.h b/testing/uapi_sg.h
index 4d3ccba..51cc31d 100644
--- a/testing/uapi_sg.h
+++ b/testing/uapi_sg.h
@@ -10,10 +10,11 @@
  *
  * Original driver (sg.h):
  *   Copyright (C) 1992 Lawrence Foard
- * Version 2 and 3 extensions to driver:
+ *
+ * Later extensions (versions 2, 3 and 4) to driver:
  *   Copyright (C) 1998 - 2018 Douglas Gilbert
  *
- *   Version: 4.0.02 (20181216)
+ * Version 4.0.02 (20181223)
  *  This version is for Linux 2.6, 3 and 4 series kernels.
  *
  * Documentation
@@ -30,7 +31,7 @@
 #include <linux/types.h>
 #include <linux/major.h>
 
-/* bsg.h contains the sg v4 user space interface structure. */
+/* bsg.h contains the sg v4 user space interface structure (sg_io_v4). */
 #include <linux/bsg.h>
 
 /*
@@ -173,7 +174,8 @@
 #define SG_SEIM_READ_VAL	0x20	/* write SG_SEIRV, read related */
 #define SG_SEIM_SHARE_FD	0x40	/* slave gives fd of master, sharing */
 #define SG_SEIM_SGAT_ELEM_SZ	0x80	/* sgat element size (>= PAGE_SIZE) */
-#define SG_SEIM_ALL_BITS	0xff	/* should be OR of previous items */
+#define SG_SEIM_CHG_SHARE_FD	0x100	/* master gives fd of new slave */
+#define SG_SEIM_ALL_BITS	0x1ff	/* should be OR of previous items */
 
 #define SG_CTL_FLAGM_TIME_IN_NS	0x1	/* time: nanosecs (def: millisecs) */
 #define SG_CTL_FLAGM_TAG_FOR_PACK_ID 0x2
@@ -198,7 +200,7 @@
 #define SG_SEIRV_TRC_MAX_SZ	0x6	/* maximum size of trace buffer */
 
 /*
- * A pointer to the following structure is passed as the third argument to 
+ * A pointer to the following structure is passed as the third argument to
  * ioctl(SG_SET_GET_EXTENDED). Each bit in the *_wr_mask fields causes the
  * corresponding integer (e.g. reserved_sz) or bit (e.g. the
  * SG_CTL_FLAG_TIME_IN_NS bit in ctl_flags) to be read from the user space