How to obtain which lines changed for each commited file with git

I know git diff can show the differences between two commits. However, I don't know how to obtain which lines changed (example @@ -12,6 +12,11 @@) for each commited file. I already use a regex to obtain the numbers, but I would like to have them separated for each file.

In other words, I would like something like this :

a/aten/src/ATen/cpu/vec256/vec256_int.h
@@ -12,6 +12,11 @@
@@ -95,25 +100,19 @@
@@ -190,25 +189,19 @@
@@ -380,25 +373,19 @@

diff --git a/test/test_torch.py b/test/test_torch.py
@@ -1388,6 +1388,14 @@

from this output below :

diff --git a/aten/src/ATen/cpu/vec256/vec256_int.h b/aten/src/ATen/cpu/vec256/vec256_int.h
index 9d2581e18..5c1cf80d5 100644
--- a/aten/src/ATen/cpu/vec256/vec256_int.h
+++ b/aten/src/ATen/cpu/vec256/vec256_int.h
@@ -12,6 +12,11 @@ namespace {
 struct Vec256i {
 protected:
   __m256i values;
+
+  static inline __m256i invert(const __m256i& v) {
+    const auto ones = _mm256_set1_epi64x(-1);
+    return _mm256_xor_si256(ones, v);
+  }
 public:
   Vec256i() {}
   Vec256i(__m256i v) : values(v) {}
@@ -95,25 +100,19 @@ struct Vec256<int64_t> : public Vec256i {
     return _mm256_cmpeq_epi64(values, other.values);
   }

@@ -190,25 +189,19 @@ struct Vec256<int32_t> : public Vec256i {
     return _mm256_cmpeq_epi32(values, other.values);
   }

@@ -380,25 +373,19 @@ struct Vec256<int16_t> : public Vec256i {
     return _mm256_cmpeq_epi16(values, other.values);
   }


diff --git a/test/test_torch.py b/test/test_torch.py
index 0c30c1f1a..10f6085cf 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -1388,6 +1388,14 @@ class _TestTorchMixin(object):
     def test_neg(self):
         self._test_neg(self, lambda t: t)

+    def test_threshold(self):
+        for dtype in torch.testing.get_all_dtypes():
+            if dtype != torch.uint8 and dtype != torch.float16:
+                # 100 is wide enough to use AVX2 instructions for all types
+                x = torch.randn(100).sign().to(dtype=dtype)
+                y = torch.threshold(x, 0, 0)
+                self.assertTrue(y.le(0).any())
+
     def test_reciprocal(self):
         a = torch.randn(100, 89)
         res_div = 1 / a

Note : I am using Python language.

Solution

Not knowing if something already exists, I developped something to achieve this.

First, I split the patchfile string using 'diff --git' as separators. This will return a separate patchfile for each file changed (all stored in split_patchfile) :

    def splitPatchfile(patchfile):
        split_patchfile = patchfile.split('diff --git')
        return split_patchfile

Second, I parse every patchfile to find the changed lines. This will create the 2Dimensional-ish list that are saved in lines_numbers. Each line_number will contain the changed lines of each patchfile.

    def findChangedLinesPerFile(split_patchfile):
        lines_numbers = []
        for split_patch in split_patchfile:
            lines_numbers.append(findChangedLines(split_patch))
        return lines_numbers

    def findChangedLines(split_patch):
        regex = r"^@@ [-+](\d+)"
        matches = re.finditer(regex, split_patch, re.MULTILINE)
        line_numbers = []

        for matchNum, match in enumerate(matches, start=1):
            # print(int(match.group(1))) # debug which lines are modified
            line_numbers.append(int(match.group(1)))
        return line_numbers

Third, since the order of elements is important for further work, I clear empty elements to make correspond each patchfile to its line_numbers. Empty elements appear because of patchfile.split('diff --git'). It created 3 patchfile because I had 2 'diff --git' (the first patchfile was empty).

    def removeEmptyElements(split_patchfile, lines_numbers):
        split_patchfile = list(filter(None, split_patchfile))
        lines_numbers = list(filter(None, lines_numbers))
        return split_patchfile, lines_numbers