Add Half support for AvgPool2d on CPU (#109578)

Add Half support for AvgPool2d (both channels last and channels first) on CPU

Pull Request resolved: https://github.com/pytorch/pytorch/pull/109578
Approved by: https://github.com/mingfeima, https://github.com/albanD
diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py
index 09725d8..2c14b03 100644
--- a/test/nn/test_pooling.py
+++ b/test/nn/test_pooling.py
@@ -1036,9 +1036,10 @@
         helper(None, 3, 50, 50, ks=5)
 
     @onlyCPU
-    def test_avg_pool2d_bfloat16(self, device):
+    @dtypes(torch.half, torch.bfloat16)
+    def test_avg_pool2d_reduced_floating(self, device, dtype):
         def helper(n, c, h, w, kernel_size, stride, memory_format):
-            input = torch.randn(n, c, h, w, dtype=torch.float32, device=device).bfloat16()
+            input = torch.randn(n, c, h, w, dtype=torch.float32, device=device).to(dtype=dtype)
             input = input.to(memory_format=memory_format).requires_grad_()
             pool = torch.nn.AvgPool2d(kernel_size, stride).to(device)
 
@@ -1050,10 +1051,10 @@
             out2.sum().backward()
 
             self.assertTrue(out.is_contiguous(memory_format=memory_format))
-            self.assertEqual(out.dtype, torch.bfloat16)
-            self.assertEqual(input.grad.dtype, torch.bfloat16)
-            self.assertEqual(out, out2.bfloat16())
-            self.assertEqual(input.grad, input2.grad.bfloat16())
+            self.assertEqual(out.dtype, dtype)
+            self.assertEqual(input.grad.dtype, dtype)
+            self.assertEqual(out, out2.to(dtype=dtype))
+            self.assertEqual(input.grad, input2.grad.to(dtype=dtype))
 
         helper(4, 30, 8, 8, 7, 1, torch.contiguous_format)
         helper(4, 65, 8, 8, 7, 1, torch.channels_last)
diff --git a/test/test_mps.py b/test/test_mps.py
index f9f5558..6347d2e 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -819,6 +819,9 @@
     SKIPLIST = {
         'all': None,
         'any': None,
+        # Unsupported
+        # input types 'tensor<1x3x9x9xf16>' and 'tensor<1xf32>' are not broadcast compatible
+        'nn.functional.avg_pool2d': [torch.float16],
     }
 
     def addDecorator(op, d) -> None: