[BE] Remove dependency on `six` and `future` (#94709)

Remove the Python 2 and 3 compatibility library [six](https://pypi.org/project/six) and [future](https://pypi.org/project/future) and `torch._six`. We only support Python 3.8+ now. It's time to retire them.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/94709
Approved by: https://github.com/malfet, https://github.com/Skylion007
diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py
index eb7afae..bd26fca 100644
--- a/test/distributed/test_store.py
+++ b/test/distributed/test_store.py
@@ -16,7 +16,6 @@
     sys.exit(0)
 
 import torch.testing._internal.common_utils as common
-from torch._six import string_classes
 from torch.testing._internal.common_distributed import (
     skip_if_win32,
     create_tcp_store
@@ -336,7 +335,7 @@
         self.store = {}
 
     def set(self, key, value):
-        if not isinstance(key, string_classes):
+        if not isinstance(key, str):
             raise AssertionError("Expected set to be called with string key")
         if type(value) is not bytes:
             raise AssertionError("Expected set to be called with bytes value")
diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py
index 836b595..db36429 100644
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@@ -42,7 +42,7 @@
 # Distributions tests use double as the default dtype
 torch.set_default_dtype(torch.double)
 
-from torch._six import inf, nan
+from torch import inf, nan
 from torch.testing._internal.common_utils import \
     (TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN, load_tests,
      gradcheck, skipIfTorchDynamo)
diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py
index e795d6b..9a9124a 100644
--- a/test/nn/test_pooling.py
+++ b/test/nn/test_pooling.py
@@ -10,7 +10,7 @@
 import itertools
 import math
 
-from torch._six import inf, nan
+from torch import inf, nan
 import torch
 from torch.testing import make_tensor
 from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_UBSAN, set_default_dtype, \
diff --git a/test/test_autograd.py b/test/test_autograd.py
index efacfc0..9fecbab 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -23,7 +23,7 @@
 import torch
 
 from torch import nn
-from torch._six import inf, nan
+from torch import inf, nan
 from torch.autograd.function import once_differentiable
 from torch.autograd.profiler import (profile, record_function, emit_nvtx, emit_itt)
 from torch.autograd.profiler_util import (_format_time, EventList, FunctionEvent, FunctionEventAvg)
diff --git a/test/test_binary_ufuncs.py b/test/test_binary_ufuncs.py
index 82113ef..3f23be1 100644
--- a/test/test_binary_ufuncs.py
+++ b/test/test_binary_ufuncs.py
@@ -14,7 +14,7 @@
 from functools import partial
 
 import torch.autograd.forward_ad as fwAD
-from torch._six import inf, nan
+from torch import inf, nan
 from torch.testing._internal.common_utils import (
     TestCase,
     slowTest,
diff --git a/test/test_cuda.py b/test/test_cuda.py
index 9bb601c..344e66d 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -22,9 +22,9 @@
 import torch
 import torch.cuda
 import torch.cuda.comm as comm
+from torch import inf, nan
 from torch.nn.parallel import scatter_gather
 from torch.utils.checkpoint import checkpoint_sequential
-from torch._six import inf, nan
 from torch.testing._internal.common_utils import TestCase, freeze_rng_state, run_tests, \
     NO_MULTIPROCESSING_SPAWN, skipIfRocm, load_tests, IS_REMOTE_GPU, IS_SANDCASTLE, IS_WINDOWS, \
     slowTest, skipCUDANonDefaultStreamIf, skipCUDAMemoryLeakCheckIf, TEST_WITH_ROCM, TEST_NUMPY, \
@@ -1595,7 +1595,7 @@
             p = subprocess.Popen([sys.executable, '-c', f"""\
 import sys
 import torch
-from torch._six import inf, nan
+from torch import inf, nan
 try:
     with torch.random.fork_rng(devices=[0]):
         torch.multinomial(torch.tensor({probs}).to('cuda'), 2, replacement=True)
diff --git a/test/test_mps.py b/test/test_mps.py
index c03e4e3..f45601fa 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -17,7 +17,7 @@
 import torch.nn.functional as F
 import itertools
 from collections import defaultdict
-from torch._six import inf
+from torch import inf
 from torch.nn import Parameter
 from torch.testing._internal import opinfo
 from torch.testing._internal.common_utils import \
diff --git a/test/test_nn.py b/test/test_nn.py
index fc1d623..be5ca93 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -21,7 +21,7 @@
 # NN tests use double as the default dtype
 torch.set_default_dtype(torch.double)
 
-from torch._six import inf, nan
+from torch import inf, nan
 import torch.autograd.forward_ad as fwAD
 import torch.backends.cudnn as cudnn
 import torch.nn as nn
diff --git a/test/test_reductions.py b/test/test_reductions.py
index e14225d..29fc72e 100644
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@@ -11,7 +11,7 @@
 from itertools import product, combinations, permutations
 import warnings
 
-from torch._six import inf, nan
+from torch import inf, nan
 from torch.testing import make_tensor
 from torch.testing._internal.common_dtype import (
     all_types_and_complex_and, get_all_math_dtypes, integral_types, complex_types, floating_types_and,
diff --git a/test/test_shape_ops.py b/test/test_shape_ops.py
index a43d632..d3fefca 100644
--- a/test/test_shape_ops.py
+++ b/test/test_shape_ops.py
@@ -8,7 +8,7 @@
 import random
 import warnings
 
-from torch._six import nan
+from torch import nan
 from torch.testing import make_tensor
 from torch.testing._internal.common_utils import (
     TestCase, run_tests, skipIfTorchDynamo, torch_to_numpy_dtype_dict)
diff --git a/test/test_sort_and_select.py b/test/test_sort_and_select.py
index 1343e1a..540df06 100644
--- a/test/test_sort_and_select.py
+++ b/test/test_sort_and_select.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 import random
-from torch._six import nan
+from torch import nan
 from itertools import permutations, product
 
 from torch.testing import make_tensor
diff --git a/test/test_torch.py b/test/test_torch.py
index 205328f..7069ccc 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -24,7 +24,7 @@
 import subprocess
 import weakref
 import sys
-from torch._six import inf, nan, string_classes
+from torch import inf, nan
 from itertools import product, combinations, permutations
 from functools import partial
 from torch import multiprocessing as mp
@@ -8288,7 +8288,7 @@
                 ns_name = ns.__name__
             skip_regexes = []
             for r in skips:
-                if isinstance(r, string_classes):
+                if isinstance(r, str):
                     skip_regexes.append(re.compile('^{}$'.format(re.escape(r))))
                 else:
                     skip_regexes.append(r)
diff --git a/test/test_unary_ufuncs.py b/test/test_unary_ufuncs.py
index 77a1940..bb9107b 100644
--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@@ -8,7 +8,7 @@
 import random
 import unittest
 
-from torch._six import inf, nan
+from torch import inf, nan
 from torch.testing._internal.common_utils import (
     TestCase,
     run_tests,