wireservice · jpmckinney · Jul 13, 2024 · Jul 13, 2024 · Jul 13, 2024 · Jul 13, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,15 +1,12 @@
 Unreleased
 ----------
 
--  feat: :doc:`/scripts/csvsql` adds the options:
-
-   -  :code:`--min-col-len`
-   -  :code:`--col-len-multiplier`
-
--  feat: :doc:`/scripts/sql2csv` adds the `--engine-option` option.
+-  feat: :doc:`/scripts/csvsql` adds :code:`--min-col-len` and :code:`--col-len-multiplier` options.
+-  feat: :doc:`/scripts/sql2csv` adds a :code:`--engine-option` option.
 -  feat: Add a Docker image.
 -  feat: Add man pages to the sdist and wheel distributions.
 -  fix: :doc:`/scripts/csvstat` no longer errors when a column is a time delta and :code:`--json` is set.
+-  fix: When taking arguments from ``sys.argv`` on Windows, glob patterns, user directories, and environment variables are expanded.
 
 2.0.0 - May 1, 2024
 -------------------

diff --git a/csvkit/cli.py b/csvkit/cli.py
@@ -7,8 +7,11 @@
 import gzip
 import itertools
 import lzma
+import os
+import re
 import sys
 import warnings
+from glob import glob
 from os.path import splitext
 
 import agate
@@ -73,6 +76,11 @@ def __init__(self, args=None, output_file=None, error_file=None):
         """
         Perform argument processing and other setup for a CSVKitUtility.
         """
+        if args is None:
+            args = sys.argv[1:]
+        if os.name == 'nt':
+            args = _expand_args(args)
+
         self._init_common_parser()
         self.add_arguments()
         self.args = self.argparser.parse_args(args)
@@ -550,3 +558,24 @@ def parse_column_identifiers(ids, column_names, column_offset=1, excluded_column
                     excludes.append(match_column_identifier(column_names, x, column_offset))
 
     return [c for c in columns if c not in excludes]
+
+
+# Adapted from https://github.com/pallets/click/blame/main/src/click/utils.py
+def _expand_args(args):
+    out = []
+
+    for arg in args:
+        arg = os.path.expanduser(arg)
+        arg = os.path.expandvars(arg)
+
+        try:
+            matches = glob(arg, recursive=True)
+        except re.error:
+            matches = []
+
+        if matches:
+            out.extend(matches)
+        else:
+            out.append(arg)
+
+    return out
diff --git a/tests/test_utilities/test_csvformat.py b/tests/test_utilities/test_csvformat.py
@@ -59,12 +59,12 @@ def test_asv(self):
         ], newline_at_eof=False)
 
     def test_quotechar(self):
-        input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
+        input_file = io.BytesIO(b'a,b,c\n1#2,3,4\n')
 
         with stdin_as_string(input_file):
-            self.assertLines(['-Q', '*'], [
+            self.assertLines(['-Q', '#'], [
                 'a,b,c',
-                '*1**2*,3,4',
+                '#1##2#,3,4',
             ])
 
         input_file.close()
@@ -157,12 +157,12 @@ def test_asv(self):
         ], newline_at_eof=False)
 
     def test_quotechar(self):
-        input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
+        input_file = io.BytesIO(b'a,b,c\n1#2,3,4\n')
 
         with stdin_as_string(input_file):
-            self.assertLines(['-U', '2', '-Q', '*'], [
-                '*a*,*b*,*c*',
-                '*1**2*,3,4',
+            self.assertLines(['-U', '2', '-Q', '#'], [
+                '#a#,#b#,#c#',
+                '#1##2#,3,4',
             ])
 
         input_file.close()

diff --git a/tests/test_utilities/test_csvjoin.py b/tests/test_utilities/test_csvjoin.py
@@ -1,4 +1,6 @@
+import os
 import sys
+import unittest
 from unittest.mock import patch
 
 from csvkit.utilities.csvjoin import CSVJoin, launch_new_instance
@@ -34,6 +36,14 @@ def test_join_options(self):
                     'You must provide join column names when performing an outer join.',
                 )
 
+    @unittest.skipIf(os.name != 'nt', 'Windows only')
+    def test_glob(self):
+        self.assertRows(['--no-inference', '-c', 'a', 'examples/dummy?.csv'], [
+            ['a', 'b', 'c', 'b2', 'c2'],
+            ['1', '2', '3', '2', '3'],
+            ['1', '2', '3', '4', '5'],
+        ])
+
     def test_sequential(self):
         output = self.get_output_as_io(['examples/join_a.csv', 'examples/join_b.csv'])
         self.assertEqual(len(output.readlines()), 4)

diff --git a/tests/test_utilities/test_csvsql.py b/tests/test_utilities/test_csvsql.py
@@ -1,6 +1,7 @@
 import io
 import os
 import sys
+import unittest
 from textwrap import dedent
 from unittest.mock import patch
 
@@ -63,6 +64,23 @@ def tearDown(self):
         if os.path.exists(self.db_file):
             os.remove(self.db_file)
 
+    @unittest.skipIf(os.name != 'nt', 'Windows only')
+    def test_glob(self):
+        sql = self.get_output(['examples/dummy?.csv'])
+
+        self.assertEqual(sql.replace('\t', '  '), dedent('''\
+            CREATE TABLE dummy2 (
+              a BOOLEAN NOT NULL, 
+              b DECIMAL NOT NULL, 
+              c DECIMAL NOT NULL
+            );
+            CREATE TABLE dummy3 (
+              a BOOLEAN NOT NULL, 
+              b DECIMAL NOT NULL, 
+              c DECIMAL NOT NULL
+            );
+        '''))  # noqa: W291
+
     def test_create_table(self):
         sql = self.get_output(['--tables', 'foo', 'examples/testfixed_converted.csv'])
 

diff --git a/tests/test_utilities/test_csvstack.py b/tests/test_utilities/test_csvstack.py
@@ -1,4 +1,6 @@
+import os
 import sys
+import unittest
 from unittest.mock import patch
 
 from csvkit.utilities.csvstack import CSVStack, launch_new_instance
@@ -20,6 +22,18 @@ def test_options(self):
             'The number of grouping values must be equal to the number of CSV files being stacked.',
         )
 
+    @unittest.skipIf(os.name != 'nt', 'Windows only')
+    def test_glob(self):
+        self.assertRows(['examples/dummy*.csv'], [
+            ['a', 'b', 'c', 'd'],
+            ['1', '2', '3', ''],
+            ['1', '2', '3', ''],
+            ['1', '2', '3', ''],
+            ['1', '4', '5', ''],
+            ['1', '2', '3', ''],
+            ['1', '2', '3', '4'],
+        ])
+
     def test_skip_lines(self):
         self.assertRows(['--skip-lines', '3', 'examples/test_skip_lines.csv', 'examples/test_skip_lines.csv'], [
             ['a', 'b', 'c'],