From c29319eca0af6d6d507f8932a5b9662db5271664 Mon Sep 17 00:00:00 2001 From: Brian Hoffman Date: Fri, 8 Feb 2019 08:12:56 -0500 Subject: [PATCH 1/5] adds a trace filter to gather ruby gems and apps --- reprozip/reprozip/filters.py | 56 ++++++++++++++ reprozip/setup.py | 3 +- tests/test_rails_filter.py | 145 +++++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 tests/test_rails_filter.py diff --git a/reprozip/reprozip/filters.py b/reprozip/reprozip/filters.py index d52c458e5..6f7e30a47 100644 --- a/reprozip/reprozip/filters.py +++ b/reprozip/reprozip/filters.py @@ -6,6 +6,7 @@ import logging import re +from rpaths import Path from reprozip.tracer.trace import TracedFile from reprozip.utils import irange, iteritems @@ -65,3 +66,58 @@ def python(files, input_files, **kwargs): lst.append(path) input_files[i] = lst + + +def ruby_gems_and_apps(files, input_files, **kwargs): + extensions = list(map(lambda ext: ext.encode('utf-8'), + ['.rb', + '.haml', + '.slim', + '.erb', + '.js', + '.html'])) + ignored_dirs = list(map(lambda ext: ext.encode('utf-8'), + ['spec', + 'test', + 'tests', + 'guides', + 'doc-api', + 'rdoc', + 'doc'])) + + gemy_path = re.compile(r'.*\/ruby[-/]\d+\.\d+\.\d+\/gems') + appdir_paths = re.compile(r'.*\/app\/(views|' + 'controllers|models|' + 'helpers)$') + + seen_paths = [] + add = [] + + def consume(dir_or_file): + dir_or_file = Path(dir_or_file) + if (dir_or_file.is_file() and + dir_or_file.ext in extensions and + dir_or_file not in files): + logger.info("Adding %s", dir_or_file) + add.append(TracedFile(dir_or_file)) + elif (dir_or_file.is_dir() and + dir_or_file.name not in ignored_dirs): + for child in dir_or_file.listdir(): + consume(child) + + for path, fi in iteritems(files): + m1 = gemy_path.match(str(path)) + if m1 and m1.group(0) not in seen_paths: + consume(m1.group(0)) + seen_paths.append(m1.group(0)) + elif m1: + continue + + m2 = appdir_paths.match(str(path)) + if (m2 and m2.group(0) not in seen_paths and + (path.parent.parent / 'config/application.rb').is_file()): + consume(m2.group(0)) + seen_paths.append(m2.group(0)) + + for fi in add: + files[fi.path] = fi diff --git a/reprozip/setup.py b/reprozip/setup.py index 1d84c2ae3..bac1649f6 100644 --- a/reprozip/setup.py +++ b/reprozip/setup.py @@ -50,7 +50,8 @@ 'reprozip = reprozip.main:main'], 'reprozip.filters': [ 'python = reprozip.filters:python', - 'builtin = reprozip.filters:builtin']}, + 'builtin = reprozip.filters:builtin', + 'ruby = reprozip.filters:ruby_gems_and_apps']}, install_requires=req, description="Linux tool enabling reproducible experiments (packer)", author="Remi Rampin, Fernando Chirigati, Dennis Shasha, Juliana Freire", diff --git a/tests/test_rails_filter.py b/tests/test_rails_filter.py new file mode 100644 index 000000000..8f6f73cfb --- /dev/null +++ b/tests/test_rails_filter.py @@ -0,0 +1,145 @@ +# Copyright (C) 2014-2017 New York University +# This file is part of ReproZip which is released under the Revised BSD License +# See file LICENSE for full license details. + +from __future__ import print_function, unicode_literals + +import os +import unittest +import tempfile +import shutil +from rpaths import Path +# from reprozip.tracer.trace import TracedFile +from reprozip.common import File +from reprozip.filters import ruby_gems_and_apps + + +class MockTracedFile(File): + + def __init(self, path): + File.__init(self, path, None) + + +class RailsFilterTest(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.tmp = Path(tempfile.mkdtemp('reprozip-tests')) + + @classmethod + def tearDownClass(cls): + shutil.rmtree(str(cls.tmp)) + + @classmethod + def touch(cls, test_files): + for fi in test_files: + if not fi.parent.is_dir(): + fi.parent.mkdir(parents=True) + with open(str(fi), 'a'): + os.utime(str(fi), None) + + def test_consuming_entire_gem(self): + gemdir = self.__class__.tmp / \ + 'gems/ruby-2.2.3/gems/kaminari-0.16.3' + gemfiles = [ + 'app/views/kaminari/_first_page.html.erb', + 'app/views/kaminari/_first_page.html.haml', + 'app/views/kaminari/_first_page.html.slim', + 'app/views/kaminari/_gap.html.erb', + 'app/views/kaminari/_gap.html.haml', + 'app/views/kaminari/_gap.html.slim', + 'app/views/kaminari/_last_page.html.erb', + 'app/views/kaminari/_last_page.html.haml', + 'app/views/kaminari/_last_page.html.slim', + ] + + self.__class__.touch( + map(lambda f: gemdir / f, gemfiles)) + + input_files = {} + files = {} + + for path in gemdir.recursedir(): + if not path.name.find(b'_first'): + f = MockTracedFile(path) + files[f.path] = f + + ruby_gems_and_apps(files, input_files) + + for gf in gemfiles: + gfp = gemdir / gf + self.assertIn(gfp, files.keys()) + self.assertEqual(gfp, files[gfp].path) + + # sometimes it's a little different path + gemdir = self.__class__.tmp / 'gems/ruby/2.1.0/gems/kaminari-0.16.3' + + self.__class__.touch( + map(lambda f: gemdir / f, gemfiles)) + + input_files = {} + files = {} + + for path in gemdir.recursedir(): + if not path.name.find(b'_first'): + f = MockTracedFile(path) + files[f.path] = f + + ruby_gems_and_apps(files, input_files) + + for gf in gemfiles: + gfp = gemdir / gf + self.assertIn(gfp, files.keys()) + self.assertEqual(gfp, files[gfp].path) + + def test_consuming_rails_files(self): + railsdir = self.__class__.tmp / 'rails-app' + railsfiles = [ + 'config/application.rb', + 'app/views/application.html.erb', + 'app/views/discussion-sidebar.html.erb', + 'app/views/payments_listing.html.erb', + 'app/views/print-friendly.html.erb', + 'app/views/w-sidebar.html.erb', + 'app/views/widget.html.erb'] + + self.__class__.touch( + map(lambda f: railsdir / f, railsfiles)) + + input_files = {} + files = {} + + viewsdir = MockTracedFile(railsdir / 'app/views') + files[viewsdir.path] = viewsdir + + ruby_gems_and_apps(files, input_files) + + for fi in railsfiles[1:]: + fp = railsdir / fi + self.assertIn(fp, files.keys()) + self.assertEqual(fp, files[fp].path) + + norailsdir = self.__class__.tmp / 'no-rails-app' + norailsfiles = [ + # 'config/application.rb', + 'app/views/application.html.erb', + 'app/views/discussion-sidebar.html.erb', + 'app/views/payments_listing.html.erb', + 'app/views/print-friendly.html.erb', + 'app/views/w-sidebar.html.erb', + 'app/views/widget.html.erb'] + + self.__class__.touch( + map(lambda f: norailsdir / f, norailsfiles)) + + input_files = {} + files = {} + + viewsdir = MockTracedFile(norailsdir / 'app/views') + files[viewsdir.path] = viewsdir + + ruby_gems_and_apps(files, input_files) + + for fi in norailsfiles: + fp = norailsdir / fi + self.assertNotIn(fp, files.keys()) From bc0349000a91f942a683fbf1127aeae3f45a99c4 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Thu, 8 Jul 2021 16:01:14 -0400 Subject: [PATCH 2/5] Rename ruby_gems_and_apps() to ruby() --- reprozip/reprozip/filters.py | 2 +- reprozip/setup.py | 2 +- tests/test_rails_filter.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/reprozip/reprozip/filters.py b/reprozip/reprozip/filters.py index 6f7e30a47..5a04728d1 100644 --- a/reprozip/reprozip/filters.py +++ b/reprozip/reprozip/filters.py @@ -68,7 +68,7 @@ def python(files, input_files, **kwargs): input_files[i] = lst -def ruby_gems_and_apps(files, input_files, **kwargs): +def ruby(files, input_files, **kwargs): extensions = list(map(lambda ext: ext.encode('utf-8'), ['.rb', '.haml', diff --git a/reprozip/setup.py b/reprozip/setup.py index bac1649f6..3f5f38392 100644 --- a/reprozip/setup.py +++ b/reprozip/setup.py @@ -51,7 +51,7 @@ 'reprozip.filters': [ 'python = reprozip.filters:python', 'builtin = reprozip.filters:builtin', - 'ruby = reprozip.filters:ruby_gems_and_apps']}, + 'ruby = reprozip.filters:ruby']}, install_requires=req, description="Linux tool enabling reproducible experiments (packer)", author="Remi Rampin, Fernando Chirigati, Dennis Shasha, Juliana Freire", diff --git a/tests/test_rails_filter.py b/tests/test_rails_filter.py index 8f6f73cfb..5beec3077 100644 --- a/tests/test_rails_filter.py +++ b/tests/test_rails_filter.py @@ -11,7 +11,7 @@ from rpaths import Path # from reprozip.tracer.trace import TracedFile from reprozip.common import File -from reprozip.filters import ruby_gems_and_apps +from reprozip.filters import ruby class MockTracedFile(File): @@ -64,7 +64,7 @@ def test_consuming_entire_gem(self): f = MockTracedFile(path) files[f.path] = f - ruby_gems_and_apps(files, input_files) + ruby(files, input_files) for gf in gemfiles: gfp = gemdir / gf @@ -85,7 +85,7 @@ def test_consuming_entire_gem(self): f = MockTracedFile(path) files[f.path] = f - ruby_gems_and_apps(files, input_files) + ruby(files, input_files) for gf in gemfiles: gfp = gemdir / gf @@ -112,7 +112,7 @@ def test_consuming_rails_files(self): viewsdir = MockTracedFile(railsdir / 'app/views') files[viewsdir.path] = viewsdir - ruby_gems_and_apps(files, input_files) + ruby(files, input_files) for fi in railsfiles[1:]: fp = railsdir / fi @@ -138,7 +138,7 @@ def test_consuming_rails_files(self): viewsdir = MockTracedFile(norailsdir / 'app/views') files[viewsdir.path] = viewsdir - ruby_gems_and_apps(files, input_files) + ruby(files, input_files) for fi in norailsfiles: fp = norailsdir / fi From 115673fc9544ea3538a1c4286162a8e68222dc43 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Thu, 8 Jul 2021 16:38:17 -0400 Subject: [PATCH 3/5] Fix ruby filter --- reprozip/reprozip/filters.py | 77 +++++++++++++++--------------------- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/reprozip/reprozip/filters.py b/reprozip/reprozip/filters.py index 5a04728d1..6c5d71bf9 100644 --- a/reprozip/reprozip/filters.py +++ b/reprozip/reprozip/filters.py @@ -69,55 +69,42 @@ def python(files, input_files, **kwargs): def ruby(files, input_files, **kwargs): - extensions = list(map(lambda ext: ext.encode('utf-8'), - ['.rb', - '.haml', - '.slim', - '.erb', - '.js', - '.html'])) - ignored_dirs = list(map(lambda ext: ext.encode('utf-8'), - ['spec', - 'test', - 'tests', - 'guides', - 'doc-api', - 'rdoc', - 'doc'])) - - gemy_path = re.compile(r'.*\/ruby[-/]\d+\.\d+\.\d+\/gems') - appdir_paths = re.compile(r'.*\/app\/(views|' - 'controllers|models|' - 'helpers)$') - - seen_paths = [] - add = [] + extensions = set(ext.encode('utf-8') for ext in [ + '.rb', '.haml', '.slim', '.erb', '.js', '.html', + ]) + ignored_dirs = set(name.encode('utf-8') for name in [ + 'spec', 'test', 'tests', 'guides', 'doc-api', 'rdoc', 'doc', + ]) - def consume(dir_or_file): - dir_or_file = Path(dir_or_file) - if (dir_or_file.is_file() and - dir_or_file.ext in extensions and - dir_or_file not in files): - logger.info("Adding %s", dir_or_file) - add.append(TracedFile(dir_or_file)) - elif (dir_or_file.is_dir() and - dir_or_file.name not in ignored_dirs): - for child in dir_or_file.listdir(): - consume(child) + gemy_path = re.compile(r'^.*/ruby[-/]\d+\.\d+\.\d+/gems') + appdir_paths = re.compile(r'^.*/app/(views|controllers|models|helpers)') + + directories = set() for path, fi in iteritems(files): m1 = gemy_path.match(str(path)) - if m1 and m1.group(0) not in seen_paths: - consume(m1.group(0)) - seen_paths.append(m1.group(0)) - elif m1: - continue + if m1: + directories.add(Path(m1.group(0))) m2 = appdir_paths.match(str(path)) - if (m2 and m2.group(0) not in seen_paths and - (path.parent.parent / 'config/application.rb').is_file()): - consume(m2.group(0)) - seen_paths.append(m2.group(0)) + if m2: + app_root = Path(m2.group(0)).parent.parent + if (app_root / 'config/application.rb').is_file(): + directories.add(app_root) + + def add_recursive(dir_or_file): + if ( + dir_or_file.is_file() + and dir_or_file.ext in extensions + ): + logger.info("Adding %s", dir_or_file) + files[dir_or_file] = TracedFile(dir_or_file) + elif ( + dir_or_file.is_dir() + and dir_or_file.name not in ignored_dirs + ): + for child in dir_or_file.listdir(): + add_recursive(child) - for fi in add: - files[fi.path] = fi + for directory in directories: + add_recursive(directory) From 2d6a0e8ac2847311e3dc838d316ddbef23edee10 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Thu, 8 Jul 2021 16:42:56 -0400 Subject: [PATCH 4/5] Remove Ruby files from inputs --- reprozip/reprozip/filters.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/reprozip/reprozip/filters.py b/reprozip/reprozip/filters.py index 6c5d71bf9..c3c16ed1d 100644 --- a/reprozip/reprozip/filters.py +++ b/reprozip/reprozip/filters.py @@ -108,3 +108,13 @@ def add_recursive(dir_or_file): for directory in directories: add_recursive(directory) + + for i in irange(len(input_files)): + lst = [] + for path in input_files[i]: + if gemy_path.match(str(path)) or appdir_paths.match(str(path)): + logger.info("Removing input %s", path) + else: + lst.append(path) + + input_files[i] = lst From 900f9e3195efe462c5ed051098e0b85297ccefb7 Mon Sep 17 00:00:00 2001 From: Remi Rampin Date: Thu, 8 Jul 2021 17:37:55 -0400 Subject: [PATCH 5/5] Fix test --- tests/test_rails_filter.py | 139 +++++++++++++------------------------ 1 file changed, 47 insertions(+), 92 deletions(-) diff --git a/tests/test_rails_filter.py b/tests/test_rails_filter.py index 5beec3077..0bac163c2 100644 --- a/tests/test_rails_filter.py +++ b/tests/test_rails_filter.py @@ -2,45 +2,36 @@ # This file is part of ReproZip which is released under the Revised BSD License # See file LICENSE for full license details. -from __future__ import print_function, unicode_literals +from __future__ import division, print_function, unicode_literals -import os -import unittest -import tempfile -import shutil -from rpaths import Path -# from reprozip.tracer.trace import TracedFile from reprozip.common import File from reprozip.filters import ruby +from rpaths import Path +import unittest class MockTracedFile(File): - - def __init(self, path): - File.__init(self, path, None) + def __init__(self, path): + File.__init__(self, path, None) class RailsFilterTest(unittest.TestCase): + def setUp(self): + self.tmp = Path.tempdir(prefix='reprozip_tests_') - @classmethod - def setUpClass(cls): - cls.tmp = Path(tempfile.mkdtemp('reprozip-tests')) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(str(cls.tmp)) + def tearDown(self): + self.tmp.rmtree() @classmethod def touch(cls, test_files): for fi in test_files: if not fi.parent.is_dir(): fi.parent.mkdir(parents=True) - with open(str(fi), 'a'): - os.utime(str(fi), None) + with fi.open('a'): + pass def test_consuming_entire_gem(self): - gemdir = self.__class__.tmp / \ - 'gems/ruby-2.2.3/gems/kaminari-0.16.3' + gemdir = self.tmp / 'gems/ruby-2.2.3/gems/kaminari-0.16.3' gemfiles = [ 'app/views/kaminari/_first_page.html.erb', 'app/views/kaminari/_first_page.html.haml', @@ -53,93 +44,57 @@ def test_consuming_entire_gem(self): 'app/views/kaminari/_last_page.html.slim', ] - self.__class__.touch( - map(lambda f: gemdir / f, gemfiles)) + self.touch(gemdir / f for f in gemfiles) - input_files = {} + input_files = [[]] files = {} for path in gemdir.recursedir(): - if not path.name.find(b'_first'): + if b'_first' in path.name: f = MockTracedFile(path) files[f.path] = f + input_files[0].append(path) - ruby(files, input_files) - - for gf in gemfiles: - gfp = gemdir / gf - self.assertIn(gfp, files.keys()) - self.assertEqual(gfp, files[gfp].path) - - # sometimes it's a little different path - gemdir = self.__class__.tmp / 'gems/ruby/2.1.0/gems/kaminari-0.16.3' + ruby(files=files, input_files=input_files) - self.__class__.touch( - map(lambda f: gemdir / f, gemfiles)) - - input_files = {} - files = {} - - for path in gemdir.recursedir(): - if not path.name.find(b'_first'): - f = MockTracedFile(path) - files[f.path] = f - - ruby(files, input_files) - - for gf in gemfiles: - gfp = gemdir / gf - self.assertIn(gfp, files.keys()) - self.assertEqual(gfp, files[gfp].path) + self.assertEqual(set(files.keys()), set(gemdir / f for f in gemfiles)) def test_consuming_rails_files(self): - railsdir = self.__class__.tmp / 'rails-app' + # Should be recognized: has a config file railsfiles = [ - 'config/application.rb', - 'app/views/application.html.erb', - 'app/views/discussion-sidebar.html.erb', - 'app/views/payments_listing.html.erb', - 'app/views/print-friendly.html.erb', - 'app/views/w-sidebar.html.erb', - 'app/views/widget.html.erb'] - - self.__class__.touch( - map(lambda f: railsdir / f, railsfiles)) - - input_files = {} - files = {} - - viewsdir = MockTracedFile(railsdir / 'app/views') - files[viewsdir.path] = viewsdir + 'yes/config/application.rb', + 'yes/app/views/application.html.erb', + 'yes/app/views/discussion-sidebar.html.erb', + 'yes/app/views/payments_listing.html.erb', + 'yes/app/views/print-friendly.html.erb', + 'yes/app/views/w-sidebar.html.erb', + 'yes/app/views/widget.html.erb', + ] + # Should NOT be: no config file + notrailsfiles = [ + # 'no/config/application.rb', + 'no/app/views/application.html.erb', + 'no/app/views/discussion-sidebar.html.erb', + 'no/app/views/payments_listing.html.erb', + 'no/app/views/print-friendly.html.erb', + 'no/app/views/w-sidebar.html.erb', + 'no/app/views/widget.html.erb', + ] - ruby(files, input_files) + self.touch(self.tmp / f for f in railsfiles) + self.touch(self.tmp / f for f in notrailsfiles) - for fi in railsfiles[1:]: - fp = railsdir / fi - self.assertIn(fp, files.keys()) - self.assertEqual(fp, files[fp].path) - - norailsdir = self.__class__.tmp / 'no-rails-app' - norailsfiles = [ - # 'config/application.rb', - 'app/views/application.html.erb', - 'app/views/discussion-sidebar.html.erb', - 'app/views/payments_listing.html.erb', - 'app/views/print-friendly.html.erb', - 'app/views/w-sidebar.html.erb', - 'app/views/widget.html.erb'] - - self.__class__.touch( - map(lambda f: norailsdir / f, norailsfiles)) - - input_files = {} + input_files = [[]] files = {} - viewsdir = MockTracedFile(norailsdir / 'app/views') + viewsdir = MockTracedFile(self.tmp / railsfiles[-1]) + files[viewsdir.path] = viewsdir + viewsdir = MockTracedFile(self.tmp / notrailsfiles[-1]) files[viewsdir.path] = viewsdir ruby(files, input_files) - for fi in norailsfiles: - fp = norailsdir / fi - self.assertNotIn(fp, files.keys()) + self.assertEqual( + set(files.keys()), + set(self.tmp / f for f in railsfiles + [notrailsfiles[-1]]), + )