Restructure torch/torch.h and extension.h (#13482)

Summary:
This PR restructures the public-facing C++ headers in a backwards compatible way. The problem right now is that the C++ extension header `torch/extension.h` does not include the C++ frontend headers from `torch/torch.h`. However, those C++ frontend headers can be convenient. Further, including the C++ frontend main header `torch/torch.h` in a C++ extension currently raises a warning because we want to move people away from exclusively including `torch/torch.h` in extensions (which was the correct thing 6 months ago), since that *used* to be the main C++ extension header but is now the main C++ frontend header. In short: it should be possible to include the C++ frontend functionality from `torch/torch.h`, but without including that header directly because it's deprecated for extensions.

For clarification: why is `torch/torch.h` deprecated for extensions? Because for extensions we need to include Python stuff, but for the C++ frontend we don't want this Python stuff. For now the python stuff is included in `torch/torch.h` whenever the header is used from a C++ extension (enabled by a macro passed by `cpp_extensions.py`) to not break existing users, but this should change in the future.

The overall fix is simple:

1. C++ frontend sub-headers move from `torch/torch.h` into `torch/all.h`.
2. `torch/all.h` is included in:
    1. `torch/torch.h`, as is.
    2. `torch/extensions.h`, to now also give C++ extension users this functionality.

With the next release we can then:
1. Remove the Python includes from `torch/torch.h`
2. Move C++-only sub-headers from `all.h` back into `torch.h`
3. Make `extension.h` include `torch.h` and `Python.h`

This will then break old C++ extensions that include `torch/torch.h`, since the correct header for C++ extensions is `torch/extension.h`.

I've also gone ahead and deprecated `torch::CPU` et al. since those are long due to die.

ezyang soumith apaszke fmassa
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13482

Differential Revision: D12924999

Pulled By: goldsborough

fbshipit-source-id: 5bb7bdc005fcb7b525195b769065176514efad8a
diff --git a/test/cpp_extensions/cpp_api_extension.cpp b/test/cpp_extensions/cpp_api_extension.cpp
index 066ad64..5bc1c88 100644
--- a/test/cpp_extensions/cpp_api_extension.cpp
+++ b/test/cpp_extensions/cpp_api_extension.cpp
@@ -1,6 +1,4 @@
 #include <torch/extension.h>
-#include <torch/python.h>
-#include <torch/torch.h>
 
 struct Net : torch::nn::Module {
   Net(int64_t in, int64_t out)
diff --git a/test/cpp_extensions/doubler.h b/test/cpp_extensions/doubler.h
index d9e6aae..afd4a00 100644
--- a/test/cpp_extensions/doubler.h
+++ b/test/cpp_extensions/doubler.h
@@ -2,8 +2,8 @@
 
 struct Doubler {
   Doubler(int A, int B) {
-    tensor_ = at::ones({A, B}, torch::CPU(at::kDouble));
-    torch::set_requires_grad(tensor_, true);
+    tensor_ =
+        torch::ones({A, B}, torch::dtype(torch::kDouble).requires_grad(true));
   }
   at::Tensor forward() {
     return tensor_ * 2;
diff --git a/test/cpp_extensions/extension.cpp b/test/cpp_extensions/extension.cpp
index 29bdd7d..e217b21 100644
--- a/test/cpp_extensions/extension.cpp
+++ b/test/cpp_extensions/extension.cpp
@@ -6,8 +6,8 @@
 
 struct MatrixMultiplier {
   MatrixMultiplier(int A, int B) {
-    tensor_ = at::ones({A, B}, torch::CPU(at::kDouble));
-    torch::set_requires_grad(tensor_, true);
+    tensor_ =
+        torch::ones({A, B}, torch::dtype(torch::kDouble).requires_grad(true));
   }
   at::Tensor forward(at::Tensor weights) {
     return tensor_.mm(weights);
diff --git a/test/cpp_extensions/setup.py b/test/cpp_extensions/setup.py
index a26fd8f..e7a61ec 100644
--- a/test/cpp_extensions/setup.py
+++ b/test/cpp_extensions/setup.py
@@ -1,12 +1,15 @@
+import sys
 import torch.cuda
 from setuptools import setup
 from torch.utils.cpp_extension import CppExtension, CUDAExtension
 from torch.utils.cpp_extension import CUDA_HOME
 
+CXX_FLAGS = [] if sys.platform == 'win32' else ['-g', '-Werror']
+
 ext_modules = [
     CppExtension(
         'torch_test_cpp_extension.cpp', ['extension.cpp'],
-        extra_compile_args=['-g']),
+        extra_compile_args=CXX_FLAGS),
 ]
 
 if torch.cuda.is_available() and CUDA_HOME is not None:
@@ -16,7 +19,7 @@
             'cuda_extension_kernel.cu',
             'cuda_extension_kernel2.cu',
         ],
-        extra_compile_args={'cxx': ['-g'],
+        extra_compile_args={'cxx': CXX_FLAGS,
                             'nvcc': ['-O2']})
     ext_modules.append(extension)
 
diff --git a/test/test_cpp_extensions.py b/test/test_cpp_extensions.py
index adbea8a..185356b 100755
--- a/test/test_cpp_extensions.py
+++ b/test/test_cpp_extensions.py
@@ -353,7 +353,6 @@
             name='cpp_api_extension',
             sources='cpp_extensions/cpp_api_extension.cpp',
             extra_include_paths=api_include,
-            extra_cflags=[] if IS_WINDOWS else ['-UTORCH_API_INCLUDE_EXTENSION_H'],
             verbose=True)
 
         net = module.Net(3, 5)
diff --git a/torch/csrc/api/include/torch/all.h b/torch/csrc/api/include/torch/all.h
new file mode 100644
index 0000000..dcd2367
--- /dev/null
+++ b/torch/csrc/api/include/torch/all.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <torch/cuda.h>
+#include <torch/data.h>
+#include <torch/jit.h>
+#include <torch/nn.h>
+#include <torch/optim.h>
+#include <torch/serialize.h>
+#include <torch/tensor.h>
+#include <torch/utils.h>
diff --git a/torch/csrc/api/include/torch/python.h b/torch/csrc/api/include/torch/python.h
index 1e3b64c..e35db62 100644
--- a/torch/csrc/api/include/torch/python.h
+++ b/torch/csrc/api/include/torch/python.h
@@ -1,5 +1,9 @@
 #pragma once
 
+#include <torch/detail/static.h>
+#include <torch/tensor.h>
+
+#include <torch/csrc/python_headers.h>
 #include <torch/csrc/utils/pybind.h>
 #include <torch/tensor.h>
 
diff --git a/torch/csrc/api/include/torch/torch.h b/torch/csrc/api/include/torch/torch.h
index 5e8b72a..e7d190c 100644
--- a/torch/csrc/api/include/torch/torch.h
+++ b/torch/csrc/api/include/torch/torch.h
@@ -1,15 +1,9 @@
 #pragma once
 
-#include <torch/cuda.h>
-#include <torch/data.h>
-#include <torch/jit.h>
-#include <torch/nn.h>
-#include <torch/optim.h>
-#include <torch/serialize.h>
-#include <torch/tensor.h>
-#include <torch/utils.h>
+#include <torch/all.h>
 
 #ifdef TORCH_API_INCLUDE_EXTENSION_H
 #include <torch/extension.h>
-#warning "Including torch/torch.h for C++ extensions is deprecated. Please include torch/extension.h"
+#warning \
+    "Including torch/torch.h for C++ extensions is deprecated. Please include torch/extension.h"
 #endif // defined(TORCH_API_INCLUDE_EXTENSION_H)
diff --git a/torch/csrc/variable_tensor_functions.h b/torch/csrc/variable_tensor_functions.h
index e18794a..3878a3c 100644
--- a/torch/csrc/variable_tensor_functions.h
+++ b/torch/csrc/variable_tensor_functions.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <ATen/ATen.h>
+#include <ATen/core/Deprecated.h>
 #include <torch/csrc/THP_export.h>
 
 namespace torch {
@@ -10,28 +11,35 @@
 
 // These functions provide a small wrapper around aten ensuring
 // that we create tensors with type Variable rather than raw tensors
-// when we create new tensors. We also provide a few accessors like requires_grad
-// that make it easier to get to varible information when we have a at::Tensor
+// when we create new tensors. We also provide a few accessors like
+// requires_grad that make it easier to get to varible information when we have
+// a at::Tensor
 
-/// Returns a `TypeExtendedInterface` object for the given backend (e.g. `at::kCPU`) and
-/// `ScalarType` (e.g. `at::kDouble`).
+/// Returns a `TypeExtendedInterface` object for the given backend (e.g.
+/// `at::kCPU`) and `ScalarType` (e.g. `at::kDouble`).
 /// TODO: Eliminate this function as much as possible
-THP_CLASS at::TypeExtendedInterface& getVariableType(at::Backend backend, at::ScalarType type);
+AT_DEPRECATED(THP_CLASS at::TypeExtendedInterface& getVariableType(
+    at::Backend backend,
+    at::ScalarType type));
 
-/// Returns a `TypeExtendedInterface` object for the CPU backend and the given `ScalarType`
-/// (e.g. `at::kDouble`). Equivalent to `getVariableType(kCPU, type)`.
+/// Returns a `TypeExtendedInterface` object for the CPU backend and the given
+/// `ScalarType` (e.g. `at::kDouble`). Equivalent to `getVariableType(kCPU,
+/// type)`.
 /// TODO: Eliminate this function as much as possible
-THP_CLASS at::TypeExtendedInterface& CPU(at::ScalarType type);
+AT_DEPRECATED(THP_CLASS at::TypeExtendedInterface& CPU(at::ScalarType type));
 
-/// Returns a `TypeExtendedInterface` object for the CUDA backend and the given `ScalarType`
-/// (e.g. `at::kDouble`). Equivalent to `getVariableType(kCUDA, type)`.
+/// Returns a `TypeExtendedInterface` object for the CUDA backend and the given
+/// `ScalarType` (e.g. `at::kDouble`). Equivalent to `getVariableType(kCUDA,
+/// type)`.
 /// TODO: Eliminate this function as much as possible
-THP_CLASS at::TypeExtendedInterface& CUDA(at::ScalarType type);
+AT_DEPRECATED(THP_CLASS at::TypeExtendedInterface& CUDA(at::ScalarType type));
 
 /// Sets the `requires_grad` property of the given `Tensor`.
-THP_CLASS void set_requires_grad(at::Tensor& tensor, bool requires_grad) noexcept;
+AT_DEPRECATED(THP_CLASS void set_requires_grad(
+    at::Tensor& tensor,
+    bool requires_grad) noexcept);
 
 /// Returns the `requires_grad` of the given `Tensor`.
-THP_CLASS bool requires_grad(const at::Tensor& tensor) noexcept;
+AT_DEPRECATED(THP_CLASS bool requires_grad(const at::Tensor& tensor) noexcept);
 
 } // namespace torch
diff --git a/torch/extension.h b/torch/extension.h
index 828aefd..0ff1425 100644
--- a/torch/extension.h
+++ b/torch/extension.h
@@ -1,6 +1,8 @@
 #pragma once
 
-#include <Python.h>
-
-#include <torch/csrc/utils/pybind.h>
+// All pure C++ headers for the C++ frontend.
+#include <torch/all.h>
+// Python bindings for the C++ frontend (includes Python.h).
+#include <torch/python.h>
+// Deprecated tensor factories (to be removed).
 #include <torch/csrc/variable_tensor_functions.h>