Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GRAMEX-97 ⁃ ENH: Update multiple rows in files and DBs with data.update #456

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions gramex/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,13 +741,10 @@ def _filter_frame(data, meta, controls, args, source='select', id=[]):
# Apply filters
data = _filter_frame_col(data, key, col, op, vals, meta)
elif source == 'update':
# Update values should only contain 1 value. 2nd onwards are ignored
if key not in data.columns or len(vals) == 0:
meta['ignored'].append((key, vals))
else:
cols_for_update[key] = vals[0]
if len(vals) > 1:
meta['ignored'].append((key, vals[1:]))
cols_for_update[key] = vals
else:
meta['ignored'].append((key, vals))
meta['count'] = len(data)
Expand Down Expand Up @@ -868,20 +865,24 @@ def _filter_db(engine, table, meta, controls, args, source='select', id=[]):
query = _filter_db_col(query, query.where, key, col, op, vals,
cols[col], cols[col].type.python_type, meta)
elif source == 'update':
# Update values should only contain 1 value. 2nd onwards are ignored
if key not in cols or len(vals) == 0:
meta['ignored'].append((key, vals))
else:
cols_for_update[key] = vals[0]
if len(vals) > 1:
meta['ignored'].append((key, vals[1:]))
cols_for_update[key] = vals
else:
meta['ignored'].append((key, vals))
if source == 'delete':
res = engine.execute(query)
return res.rowcount
elif source == 'update':
query = query.values(cols_for_update)
id_name = id[0]
id_col = getattr(table.c, id_name)
cases = {
k: sa.case(
[(id_col == i, j) for i, j in zip(args[id_name], v)]
) for k, v in cols_for_update.items()
}
query = query.values(**cases)
res = engine.execute(query)
return res.rowcount
else:
Expand Down
43 changes: 43 additions & 0 deletions testlib/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,49 @@ def test_update(self):
gramex.data.update(data, args=args, id=['देश', 'city', 'product'])
ase(types_original, data.dtypes)

def test_update_multiple_file(self):
# Test on a file

update_file = os.path.join(folder, 'actors.update.csv')
shutil.copy(os.path.join(folder, '..', 'tests/actors.csv'), update_file)
self.tmpfiles.append(update_file)

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99, 1.11]
gramex.data.update(
update_file,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name']
)
df = gramex.data.filter(update_file, args={'name': names})
self.assertEqual(df['category'].tolist(), categories)
self.assertEqual(df['rating'].tolist(), ratings)

def test_update_multiple_db(self):
actors = gramex.cache.open(os.path.join(folder, '../tests/actors.csv'))
temp_db = f'sqlite:///{folder}/actors.db'
self.tmpfiles.append(os.path.join(folder, 'actors.db'))
actors.to_sql('actors', sa.create_engine(temp_db), index=False)

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99, 1.11]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a test case where

  • ratings has only 2 values [1, 0.99] and the third is missing. Audrey's rating should not get updated, but her categories should be.
  • names has only 2 values -- in which case, the 3rd category and rating are ignored

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sanand0 This works for files, but for sqlalchemy, to deal with uneven args, we might have to do column-wise update queries. Is this acceptable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaidevd -- yes, column-wise update queries are fine. I don't anticipate this to be used often and we can optimize later

gramex.data.update(
temp_db,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name'], table='actors'
)
df = gramex.data.filter(temp_db, args={'name': names}, table='actors')
self.assertEqual(df['category'].tolist(), categories)
self.assertEqual(df['rating'].tolist(), ratings)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add test cases for MySQL, PostgreSQL.

def test_delete(self):
raise SkipTest('TODO: write delete test cases')

Expand Down