-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LTD-5730-eu-xml-bad-chars #2287
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from core.helpers import remove_non_printable_characters | ||
from crispy_forms_gds.choices import Choice | ||
from crispy_forms_gds.helper import FormHelper | ||
from crispy_forms_gds.layout import Layout, Submit, HTML | ||
|
@@ -225,6 +226,10 @@ class PartyNameForm(BaseForm): | |
], | ||
) | ||
|
||
def clean_name(self): | ||
name = self.cleaned_data["name"] | ||
return remove_non_printable_characters(name) | ||
|
||
def get_layout_fields(self): | ||
return ("name",) | ||
|
||
|
@@ -315,6 +320,10 @@ def __init__(self, *args, **kwargs): | |
country_choices = [(country["id"], country["name"]) for country in countries] | ||
self.fields["country"].choices += country_choices | ||
|
||
def clean_address(self): | ||
address = self.cleaned_data["address"] | ||
return remove_non_printable_characters(address) | ||
|
||
def get_layout_fields(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we be adding these for exporter address and name inputs too? Does that have an impact on the xml data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those form components are different during registration . I guess the risk here comes from the fact they copy and paste party details from other forms during Licence submission. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand. You're probably doing the right thing in only fixing one thing at a time. Gives comfort that you've investigated though. |
||
return ( | ||
"address", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,26 @@ def test_consignee_name_form(data, valid, errors): | |
assert form.errors == errors | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data, expected", | ||
( | ||
({"name": "\x02 test1"}, " test1"), | ||
({"name": "\x02test2"}, "test2"), | ||
({"name": "this is \n test3"}, "this is \n test3"), | ||
({"name": "this is \t test4"}, "this is \t test4"), | ||
({"name": "this is \r test5"}, "this is \r test5"), | ||
({"name": "namé 6"}, "namé 6"), | ||
({"name": "namé's"}, "namé's"), | ||
({"name": "test 8"}, "test 8"), | ||
), | ||
) | ||
def test_consignee_name_removes_non_printable(data, expected): | ||
form = parties.ConsigneeNameForm(data=data) | ||
|
||
form.is_valid() | ||
assert form.cleaned_data["name"] == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data, valid, errors", | ||
( | ||
|
@@ -186,6 +206,31 @@ class Request: | |
assert form.errors == errors | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data, expected", | ||
( | ||
({"address": "1 somewhere", "country": "aus"}, "1 somewhere"), | ||
({"address": "1 \x02 somewhere", "country": "aus"}, "1 somewhere"), | ||
({"address": "1 \x02 \n somewhere's", "country": "aus"}, "1 \n somewhere's"), | ||
({"address": "1 \x01somewhere", "country": "aus"}, "somewhere"), | ||
({"address": "1 \x03 \n somewhere", "country": "aus"}, "1 somewhere"), | ||
({"address": "1 \x03 \n ô somewhere", "country": "aus"}, "1 ô somewhere"), | ||
({"address": "1 \x02 \r somewhere's", "country": "aus"}, "1 \r somewhere's"), | ||
({"address": "1 \x02 \t somewhere's", "country": "aus"}, "1 \t somewhere's"), | ||
), | ||
) | ||
@patch("exporter.applications.forms.parties.get_countries") | ||
def test_end_user_address_removes_non_printable(mock_get_countries, data, expected): | ||
class Request: | ||
csp_nonce = "test" | ||
|
||
request = Request() | ||
mock_get_countries.return_value = [{"id": "aus", "name": "Austria"}, {"id": "fr", "name": "France"}] | ||
form = parties.EndUserAddressForm(request=request, data=data) | ||
|
||
form.is_valid() | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there an assert missing for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no this test doesn't check if form is valid as everything is valid but we need to call .is_valid() so I can access cleaned_data and check the output. |
||
|
||
@pytest.mark.parametrize( | ||
"data, valid, errors", | ||
( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cleaning the data is the best approach I can think of as the user probably wouldn't know how to react to the validation message.
If I understand this part correctly though it seems to me that the tab, newline and carriage return ascii are being removed completely rather than being replaced by something that can be parsed in python e.g spaces, /n or /r
I know that this is to output correctly in xml but I'm think our data should show as an accurate representation of what the user inputs maybe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no it's the other way round anything less 31 with the exception of [9, 10, 13] is removed from printable chars.
If you have a look in the tests it confirms these chars stay but everything else is removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Anything less than 31 and 9, 10 and 13 is being removed but they aren't being replaced with anything. Your test does confirm this but what I'm saying is that it might better represent user input if they are replaced with equivalent python parseable version of what the ascii char represents
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No still not correct anything less then 31 is removed (because these are meaningless unix chars i.e /x02
how we want [9, 10, 13] to stay as this are carriage return linefeed , tab etc and hence stays as is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May I suggest including the character instead of ascii value so it is clear what the character is?
eg
ord('\n')
for10
If that is not possible for all characters then add a comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ahh yes added comment to make it clearer what we are doing here.