DynamoDS · mmisol · Aug 10, 2020 · Aug 7, 2020 · mjkkirschner · Aug 9, 2020
diff --git a/src/PythonMigrationViewExtension/MigrationAssistant/PythonMigrationAssistantViewModel.cs b/src/PythonMigrationViewExtension/MigrationAssistant/PythonMigrationAssistantViewModel.cs
@@ -60,7 +60,7 @@ private void MigrateCode()
             this.NewCode = ScriptMigrator.MigrateCode(this.OldCode);
 
             var sidebyside = new SideBySideDiffBuilder();
-            this.diffModel = sidebyside.BuildDiffModel(this.OldCode, this.NewCode);
+            this.diffModel = sidebyside.BuildDiffModel(this.OldCode, this.NewCode, false);
         }
 
         /// <summary>

diff --git a/src/PythonMigrationViewExtension/MigrationAssistant/ScriptMigrator.cs b/src/PythonMigrationViewExtension/MigrationAssistant/ScriptMigrator.cs
@@ -32,14 +32,27 @@ internal static string MigrateCode(string code)
             {
                 using (Py.GIL())
                 {
-
+                    string output;
                     using (PyScope scope = Py.CreateScope())
                     {
                         scope.Set(INPUT_NAME, code.ToPython());
-                        scope.Exec(GetPythonMigrationScript());
+                        scope.Exec(Get2To3MigrationScript());
+                        output = scope.Contains(RETURN_NAME) ? scope.Get(RETURN_NAME).ToString() : string.Empty;
+                    }
 
-                        return scope.Contains(RETURN_NAME) ? scope.Get(RETURN_NAME).ToString() : string.Empty;
+                    // If the code contains tabs, normalize the whitespaces. This is a Python 3 requirement
+                    // that's not addressed by 2to3.
+                    if (output.Contains("\t"))
+                    {
+                        using (PyScope scope = Py.CreateScope())
+                        {
+                            scope.Set(INPUT_NAME, output.ToPython());
+                            scope.Exec(GetReindentationScript());
+                            output = scope.Contains(RETURN_NAME) ? scope.Get(RETURN_NAME).ToString() : string.Empty;
+                        }
                     }
+
+                    return output;
                 }
             }
 
@@ -49,12 +62,22 @@ internal static string MigrateCode(string code)
             }
         }
 
-        private static string GetPythonMigrationScript()
+        private static string Get2To3MigrationScript()
+        {
+            return GetEmbeddedScript("Dynamo.PythonMigration.MigrationAssistant.migrate_2to3.py");
+        }
+
+        private static string GetReindentationScript()
+        {
+            return GetEmbeddedScript("Dynamo.PythonMigration.MigrationAssistant.reindent.py");
+        }
+
+        private static string GetEmbeddedScript(string resourceName)
         {
             Assembly asm = Assembly.GetExecutingAssembly();
             string script;
             using (var reader =
-                new StreamReader(asm.GetManifestResourceStream("Dynamo.PythonMigration.MigrationAssistant.migrate_2to3.py")))
+                new StreamReader(asm.GetManifestResourceStream(resourceName)))
             {
                 script = reader.ReadToEnd();
             }

diff --git a/src/PythonMigrationViewExtension/MigrationAssistant/reindent.py b/src/PythonMigrationViewExtension/MigrationAssistant/reindent.py
@@ -0,0 +1,182 @@
+#! /usr/bin/env python3
+
+# Based on a script by Tim Peters, 03 October 2000.
+
+import tokenize
+
+
+def reindent(code):
+    r = Reindenter(code)
+    r.run()
+    return r.getresult()
+
+def _rstrip(line, JUNK='\n \t'):
+    """Return line stripped of trailing spaces, tabs, newlines.
+
+    Note that line.rstrip() instead also strips sundry control characters,
+    but at least one known Emacs user expects to keep junk like that, not
+    mentioning Barry by name or anything <wink>.
+    """
+
+    i = len(line)
+    while i > 0 and line[i - 1] in JUNK:
+        i -= 1
+    return line[:i]
+
+
+class Reindenter:
+
+    def __init__(self, code):
+        self.find_stmt = 1  # next token begins a fresh stmt?
+        self.level = 0      # current indent level
+
+        # Raw file lines.
+        self.raw = code.split("\r\n")
+        self.endswithnewline = code.endswith("\r\n")
+
+        # File lines, rstripped & tab-expanded.  Dummy at start is so
+        # that we can use tokenize's 1-based line numbering easily.
+        # Note that a line is all-blank iff it's "".
+        self.lines = [_rstrip(line).expandtabs()
+                      for line in self.raw]
+        self.lines.insert(0, None)
+        self.index = 1  # index into self.lines of next line
+
+        # List of (lineno, indentlevel) pairs, one for each stmt and
+        # comment line.  indentlevel is -1 for comment lines, as a
+        # signal that tokenize doesn't know what to do about them;
+        # indeed, they're our headache!
+        self.stats = []
+
+    def run(self):
+        tokens = tokenize.generate_tokens(self.getline)
+        for _token in tokens:
+            self.tokeneater(*_token)
+        # Remove trailing empty lines.
+        lines = self.lines
+        while lines and lines[-1] == "":
+            lines.pop()
+        # Sentinel.
+        stats = self.stats
+        stats.append((len(lines), 0))
+        # Map count of leading spaces to # we want.
+        have2want = {}
+        # Program after transformation.
+        after = self.after = []
+        # Copy over initial empty lines -- there's nothing to do until
+        # we see a line with *something* on it.
+        i = stats[0][0]
+        after.extend(lines[1:i])
+        for i in range(len(stats) - 1):
+            thisstmt, thislevel = stats[i]
+            nextstmt = stats[i + 1][0]
+            have = getlspace(lines[thisstmt])
+            want = thislevel * 4
+            if want < 0:
+                # A comment line.
+                if have:
+                    # An indented comment line.  If we saw the same
+                    # indentation before, reuse what it most recently
+                    # mapped to.
+                    want = have2want.get(have, -1)
+                    if want < 0:
+                        # Then it probably belongs to the next real stmt.
+                        for j in range(i + 1, len(stats) - 1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                if have == getlspace(lines[jline]):
+                                    want = jlevel * 4
+                                break
+                    if want < 0:           # Maybe it's a hanging
+                                           # comment like this one,
+                        # in which case we should shift it like its base
+                        # line got shifted.
+                        for j in range(i - 1, -1, -1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                want = have + (getlspace(after[jline - 1]) -
+                                               getlspace(lines[jline]))
+                                break
+                    if want < 0:
+                        # Still no luck -- leave it alone.
+                        want = have
+                else:
+                    want = 0
+            assert want >= 0
+            have2want[have] = want
+            diff = want - have
+            if diff == 0 or have == 0:
+                after.extend(lines[thisstmt:nextstmt])
+            else:
+                for line in lines[thisstmt:nextstmt]:
+                    if diff > 0:
+                        if line == "":
+                            after.append(line)
+                        else:
+                            after.append(" " * diff + line)
+                    else:
+                        remove = min(getlspace(line), -diff)
+                        after.append(line[remove:])
+        return self.raw != self.after
+
+    def getresult(self):
+        separator = "\r\n"
+        return separator.join(self.after) + ("\r\n" if self.endswithnewline else "")
+
+    # Line-getter for tokenize.
+    def getline(self):
+        if self.index >= len(self.lines):
+            line = ""
+        else:
+            line = self.lines[self.index]
+            self.index += 1
+        return line
+
+    # Line-eater for tokenize.
+    def tokeneater(self, type, token, slinecol, end, line,
+                   INDENT=tokenize.INDENT,
+                   DEDENT=tokenize.DEDENT,
+                   NEWLINE=tokenize.NEWLINE,
+                   COMMENT=tokenize.COMMENT,
+                   NL=tokenize.NL):
+
+        if type == NEWLINE:
+            # A program statement, or ENDMARKER, will eventually follow,
+            # after some (possibly empty) run of tokens of the form
+            #     (NL | COMMENT)* (INDENT | DEDENT+)?
+            self.find_stmt = 1
+
+        elif type == INDENT:
+            self.find_stmt = 1
+            self.level += 1
+
+        elif type == DEDENT:
+            self.find_stmt = 1
+            self.level -= 1
+
+        elif type == COMMENT:
+            if self.find_stmt:
+                self.stats.append((slinecol[0], -1))
+                # but we're still looking for a new stmt, so leave
+                # find_stmt alone
+
+        elif type == NL:
+            pass
+
+        elif self.find_stmt:
+            # This is the first "real token" following a NEWLINE, so it
+            # must be the first token of the next program statement, or an
+            # ENDMARKER.
+            self.find_stmt = 0
+            if line:   # not endmarker
+                self.stats.append((slinecol[0], self.level))
+
+
+# Count number of leading blanks.
+def getlspace(line):
+    i, n = 0, len(line)
+    while i < n and line[i] == " ":
+        i += 1
+    return i
+
+output = reindent(code)
diff --git a/src/PythonMigrationViewExtension/PythonMigrationViewExtension.csproj b/src/PythonMigrationViewExtension/PythonMigrationViewExtension.csproj
@@ -186,6 +186,9 @@
   <ItemGroup>
     <EmbeddedResource Include="MigrationAssistant\migrate_2to3.py" />
   </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="MigrationAssistant\reindent.py" />
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <Target Name="AfterBuild">
     <ItemGroup>

diff --git a/test/DynamoCoreTests/PythonMigrationAssistantTests.cs b/test/DynamoCoreTests/PythonMigrationAssistantTests.cs
@@ -1,6 +1,6 @@
-using Dynamo.PythonMigration.MigrationAssistant;
+using System;
+using Dynamo.PythonMigration.MigrationAssistant;
 using NUnit.Framework;
-using System.Collections.Generic;
 
 namespace Dynamo.Tests
 {
@@ -34,5 +34,34 @@ public void MigrationWillNotChangePython3CompatibleCode()
             // Assert
             Assert.AreEqual(expectedPython3Code, migratedScript);
         }
+
+        [Test]
+        public void MigrationWillNormalizeWhiteSpaceIfCodeContainsTabs()
+        {
+            var original = "import sys" + Environment.NewLine +
+                "class MyClass:" + Environment.NewLine +
+                "  def __init__(self):" + Environment.NewLine +
+                "\t  pass" + Environment.NewLine +
+                "MyClass()" + Environment.NewLine;
+            var expected = "import sys" + Environment.NewLine +
+                "class MyClass:" + Environment.NewLine +
+                "    def __init__(self):" + Environment.NewLine +
+                "        pass" + Environment.NewLine +
+                "MyClass()" + Environment.NewLine;
+            var actual = ScriptMigrator.MigrateCode(original);
+            Assert.AreEqual(expected, actual);
+        }
+
+        [Test]
+        public void MigrationWontChangeWhiteSpaceIfCodeDoesNotContainTabs()
+        {
+            var original = "import sys" + Environment.NewLine +
+                "class MyClass:" + Environment.NewLine +
+                "  def __init__(self):" + Environment.NewLine +
+                "    pass" + Environment.NewLine +
+                "MyClass()" + Environment.NewLine;
+            var actual = ScriptMigrator.MigrateCode(original);
+            Assert.AreEqual(original, actual);
+        }
     }
 }