Skip to content
This repository has been archived by the owner on Feb 28, 2018. It is now read-only.

Commit

Permalink
Merge pull request #242 from datasciencebr/cuducos-rows
Browse files Browse the repository at this point in the history
Use rows to serialize reimbursements
  • Loading branch information
anaschwendler authored Oct 23, 2017
2 parents 7172d20 + cf66950 commit 9d31da5
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 95 deletions.
25 changes: 25 additions & 0 deletions jarbas/core/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from datetime import date

from rows import fields


class IntegerField(fields.IntegerField):

@classmethod
def deserialize(cls, value, *args, **kwargs):
try: # Rows cannot convert values such as '2011.0' to integer
value = int(float(value))
except:
pass
return super(IntegerField, cls).deserialize(value)


class DateAsStringField(fields.DateField):
INPUT_FORMAT = '%Y-%m-%dT%H:%M:%S'
OUTPUT_FORMAT = '%Y-%m-%d'

@classmethod
def deserialize(cls, value, *args, **kwargs):
value = super(DateAsStringField, cls).deserialize(value)
if value: # useful when serializing it to Celery
return value.strftime(cls.OUTPUT_FORMAT)
22 changes: 19 additions & 3 deletions jarbas/core/management/commands/reimbursements.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import csv
import lzma

from django.utils.timezone import now
from rows import import_from_csv
from rows.fields import FloatField, TextField

from jarbas.core.fields import DateAsStringField, IntegerField
from jarbas.core.management.commands import LoadCommand
from jarbas.core.models import Reimbursement
from jarbas.core.tasks import create_or_update_reimbursement
Expand All @@ -24,8 +26,22 @@ def handle(self, *args, **options):
@property
def reimbursements(self):
"""Returns a Generator with a dict object for each row."""
with lzma.open(self.path, mode='rt', encoding='utf-8') as file_handler:
yield from csv.DictReader(file_handler)
force_types = {
'cnpj_cpf': TextField,
'document_number': TextField,
'leg_of_the_trip': TextField,
'congressperson_id': IntegerField,
'congressperson_document': IntegerField,
'reimbursement_value_total': FloatField,
'reimbursement_values': TextField,
'issue_date': DateAsStringField,
'term': IntegerField,
'term_id': IntegerField
}
with lzma.open(self.path) as file_handler:
for row in import_from_csv(file_handler, force_types=force_types):
as_dict = dict(row._asdict()) # _asdict returns OrderedDict
yield as_dict

def create_or_update(self, rows):
for count, row in enumerate(rows, 1):
Expand Down
2 changes: 1 addition & 1 deletion jarbas/core/querysets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def list_distinct(self, field, order_by_field, query=None):

self = self.values(field, order_by_field).order_by(order_by_field)
return self.distinct()

def suspicions(self, boolean):
if not boolean:
return self.filter(suspicions=None)
Expand Down
53 changes: 8 additions & 45 deletions jarbas/core/tasks.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,25 @@
from celery import shared_task

from jarbas.core.models import Reimbursement
from jarbas.core.management.commands import LoadCommand


@shared_task
def create_or_update_reimbursement(data):
def create_or_update_reimbursement(row):
"""
:param data: (dict) reimbursement data (keys and data types must mach
Reimbursement model)
:param row: (dict) key/values maching Reimbursement model
"""
serialized = serialize_reimbursement(data)
kwargs = dict(document_id=serialized['document_id'], defaults=serialized)
data = transform_row(row)
kwargs = dict(document_id=data['document_id'], defaults=data)
Reimbursement.objects.update_or_create(**kwargs)


def serialize_reimbursement(data):
"""Read the dict generated by DictReader and fix types"""

missing = ('probability', 'suspicions')
for key in missing:
data[key] = None

def transform_row(row):
"""Read the dict generated by rows and fix some keys"""
rename = (
('subquota_number', 'subquota_id'),
('reimbursement_value_total', 'total_reimbursement_value')
)
for old, new in rename:
data[new] = data.pop(old)

integers = (
'applicant_id',
'batch_number',
'congressperson_document',
'congressperson_id',
'document_id',
'document_type',
'installment',
'month',
'subquota_group_id',
'subquota_id',
'term',
'term_id',
'year'
)
for key in integers:
data[key] = LoadCommand.to_number(data[key], int)

floats = (
'document_value',
'remark_value',
'total_net_value',
'total_reimbursement_value'
)
for key in floats:
data[key] = LoadCommand.to_number(data[key])

issue_date = LoadCommand.to_date(data['issue_date'])
data['issue_date'] = issue_date.strftime('%Y-%m-%d')
row[new] = row.pop(old)

return data
return row
Binary file added jarbas/core/tests/fixtures/reimbursements.xz
Binary file not shown.
67 changes: 30 additions & 37 deletions jarbas/core/tests/test_reimbursement_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,48 @@
from mixer.backend.django import mixer

from jarbas.core.models import Reimbursement
from jarbas.core.tasks import (
create_or_update_reimbursement,
serialize_reimbursement
)
from jarbas.core.tasks import create_or_update_reimbursement, transform_row


class TestCreateOrUpdateTask(TestCase):

def setUp(self):
self.csv_row_as_dict = {
'applicant_id': '13',
'batch_number': '9',
self.data = {
'applicant_id': 13,
'batch_number': 9,
'cnpj_cpf': '11111111111111',
'congressperson_document': '2',
'congressperson_id': '1',
'congressperson_document': 2,
'congressperson_id': 1,
'congressperson_name': 'Roger That',
'document_id': '42',
'document_id': 42,
'document_number': '6',
'document_type': '7',
'document_value': '8.90',
'installment': '7',
'issue_date': '01/01/1970',
'document_type': 7,
'document_value': 8.90,
'installment': 7,
'issue_date': '1970-01-01',
'leg_of_the_trip': '8',
'month': '1',
'month': 1,
'net_values': '1.99,2.99',
'party': 'Partido',
'passenger': 'John Doe',
'reimbursement_numbers': '10,11',
'reimbursement_values': '12.13,14.15',
'remark_value': '1.23',
'remark_value': 1.23,
'state': 'UF',
'subquota_description': 'Subquota description',
'subquota_group_description': 'Subquota group desc',
'subquota_group_id': '5',
'subquota_number': '4',
'subquota_group_id': 5,
'subquota_number': 4,
'supplier': 'Acme',
'term': '1970',
'term_id': '3',
'total_net_value': '4.56',
'reimbursement_value_total': 'NaN',
'year': '1970'
'term': 1970,
'term_id': 3,
'total_net_value': 4.56,
'reimbursement_value_total': None,
'year': 1970
}

def test_serializer(self):
def test_transform_row(self):
data = self.data.copy()
expected = {
'applicant_id': 13,
'batch_number': 9,
Expand Down Expand Up @@ -77,28 +75,23 @@ def test_serializer(self):
'term_id': 3,
'total_net_value': 4.56,
'total_reimbursement_value': None,
'year': 1970,
'probability': None,
'suspicions': None
'year': 1970
}
self.maxDiff = 2 ** 10
data = self.csv_row_as_dict.copy()
self.assertEqual(expected, serialize_reimbursement(data))
self.assertEqual(expected, transform_row(data))


def test_create(self):
self.assertEqual(0, Reimbursement.objects.count())
data = self.csv_row_as_dict.copy()
create_or_update_reimbursement(data)
create_or_update_reimbursement(self.data)
self.assertEqual(1, Reimbursement.objects.count())

def test_update(self):
self.assertEqual(0, Reimbursement.objects.count())

data = self.csv_row_as_dict.copy()
serialized = serialize_reimbursement(data)
serialized['search_vector'] = None
mixer.blend(Reimbursement, **serialized)
data = self.data.copy()
data['search_vector'] = None
mixer.blend(Reimbursement, **data)
self.assertEqual(1, Reimbursement.objects.count())

create_or_update_reimbursement(self.csv_row_as_dict)
create_or_update_reimbursement(self.data)
self.assertEqual(1, Reimbursement.objects.count())
57 changes: 48 additions & 9 deletions jarbas/core/tests/test_reimbursements_command.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
from datetime import date
from io import StringIO
from unittest.mock import MagicMock, call, patch

from django.conf import settings
from django.test import TestCase

from jarbas.core.management.commands.reimbursements import Command
Expand Down Expand Up @@ -63,11 +64,49 @@ def test_handler_with_options(self, mark, drop_all, create, reimbursements):

class TestFileLoader(TestCommand):

@patch('jarbas.core.management.commands.reimbursements.lzma')
@patch('jarbas.core.management.commands.reimbursements.csv.DictReader')
def test_reimbursement_property(self, row, lzma):
lzma.return_value = StringIO()
row.return_value = dict(ahoy=42)
self.command.path = 'reimbursements.xz'
reimbursements = tuple(self.command.reimbursements)
self.assertEqual(1, len(reimbursements))
@patch('jarbas.core.management.commands.reimbursements.print')
def test_reimbursement_property(self, print_):
self.command.path = os.path.join(
settings.BASE_DIR,
'jarbas',
'core',
'tests',
'fixtures',
'reimbursements.xz'
)
output = list(self.command.reimbursements)
expected = {
'applicant_id': 13,
'batch_number': 9,
'cnpj_cpf': '11111111111111',
'congressperson_document': 2,
'congressperson_id': 1,
'congressperson_name': 'Roger That',
'document_id': 42,
'document_number': '6',
'document_type': 7,
'document_value': 8.90,
'installment': 7,
'issue_date': '2014-02-12',
'leg_of_the_trip': '8',
'month': 1,
'net_values': '1.99,2.99',
'party': 'Partido',
'passenger': 'John Doe',
'reimbursement_numbers': '10,11',
'reimbursement_values': '12.13,14.15',
'remark_value': 1.23,
'state': 'UF',
'subquota_description': 'Subquota description',
'subquota_group_description': 'Subquota group desc',
'subquota_group_id': 5,
'subquota_number': 4,
'supplier': 'Acme',
'term': 1970,
'term_id': 3,
'total_net_value': 4.56,
'reimbursement_value_total': None,
'year': 1970
}
self.maxDiff = 1024
self.assertEqual(output[0], expected)
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
-r requirements.txt
django-test-without-migrations==0.6
ipdb==0.10.3
mixer==5.6.6
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ python-memcached==1.58
python-twitter==3.3
reprint==0.3.0 # pyup: ignore
requests==2.18.4
rows==0.3.1

0 comments on commit 9d31da5

Please sign in to comment.