From f8fd116f3c6f1d99db54e83612451c4c7efa1a68 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 29 Jul 2019 17:28:23 -0300
Subject: [PATCH 01/31] fix some type hints after running mypy

---
 src/arche/readers/items.py | 5 +++--
 src/arche/tools/api.py     | 5 +++--
 src/arche/tools/helpers.py | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py
index c33ac24..f2253dd 100755
--- a/src/arche/readers/items.py
+++ b/src/arche/readers/items.py
@@ -47,6 +47,7 @@ def origin_column_name(self, new: str) -> str:
         for column in self.df.columns:
             if column in new:
                 return column
+        return ''
 
     @classmethod
     def from_df(cls, df: pd.DataFrame):
@@ -66,7 +67,7 @@ def __init__(
     ):
         self.key = key
         self._count = count
-        self._limit = None
+        self._limit: Any = None
         self.filters = filters
         raw = self.fetch_data()
         df = pd.DataFrame(list(raw))
@@ -104,7 +105,7 @@ def __init__(
         filters: Optional[api.Filters] = None,
     ):
         self.start_index = start_index
-        self.start: int = f"{key}/{start_index}"
+        self.start: str = f"{key}/{start_index}"
         self._job: Job = None
         super().__init__(key, count, filters)
 
diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index cd56b2a..4a10be1 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -3,7 +3,7 @@
 import math
 from multiprocessing import Pool
 import time
-from typing import Dict, List, Tuple, Optional, Union
+from typing import Dict, List, Tuple, Optional, Union, cast
 
 from arche.tools import helpers
 from dateutil.relativedelta import relativedelta
@@ -144,7 +144,8 @@ def get_items_with_pool(
         A numpy array of items
     """
     active_connections_limit = 10
-    processes_count = min(max(helpers.cpus_count(), workers), active_connections_limit)
+    processes_count: int = cast(
+        int, min(max(helpers.cpus_count(), workers), active_connections_limit))
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)
diff --git a/src/arche/tools/helpers.py b/src/arche/tools/helpers.py
index 03308f8..847d6cb 100755
--- a/src/arche/tools/helpers.py
+++ b/src/arche/tools/helpers.py
@@ -76,7 +76,7 @@ def is_number(s):
     return True
 
 
-def cpus_count() -> int:
+def cpus_count() -> Optional[int]:
     try:
         return len(os.sched_getaffinity(0))
     except AttributeError:

From fb53167bdd1de3bb0a702944e2abb4461f3b2418 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 29 Jul 2019 17:43:00 -0300
Subject: [PATCH 02/31] fix type hints at readers/schema and rules/price

---
 src/arche/readers/schema.py | 11 ++++++-----
 src/arche/rules/price.py    | 16 +++++++++-------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index 06b8883..99e1af6 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -3,7 +3,7 @@
 import json
 import os
 import pprint
-from typing import Dict, List, Union
+from typing import Dict, List, Union, DefaultDict, cast
 import urllib.request
 
 from arche.tools import s3
@@ -58,17 +58,18 @@ def __repr__(self):
     def get_enums(self) -> List[str]:
         enums = []
         for k, v in self.raw["properties"].items():
-            if "enum" in v.keys():
+            if "enum" in v.keys():  # type: ignore
                 enums.append(k)
         return enums
 
     @staticmethod
     def get_tags(schema: RawSchema) -> TaggedFields:
-        tagged_fields = defaultdict(list)
+        tagged_fields: DefaultDict[str, List[str]] = defaultdict(list)
         for key, value in schema["properties"].items():
-            property_tags = value.get("tag", [])
+            property_tags = value.get("tag", [])  # type: ignore
             if property_tags:
-                tagged_fields = Schema.get_field_tags(property_tags, key, tagged_fields)
+                tagged_fields = cast(
+                    DefaultDict[str, List[str]], Schema.get_field_tags(property_tags, key, tagged_fields))
         return tagged_fields
 
     @classmethod
diff --git a/src/arche/rules/price.py b/src/arche/rules/price.py
index 086d33f..c3b0a0f 100755
--- a/src/arche/rules/price.py
+++ b/src/arche/rules/price.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from arche.readers.schema import TaggedFields
 from arche.rules.result import Result, Outcome
 from arche.tools.helpers import is_number, ratio_diff
@@ -74,13 +76,13 @@ def compare_prices_for_same_urls(
         missing and new `product_url_field` tagged fields.
     """
     result = Result("Compare Prices For Same Urls")
-    url_field = tagged_fields.get("product_url_field")
+    url_field: Any = tagged_fields.get("product_url_field")
     if not url_field:
         result.add_info(Outcome.SKIPPED)
         return result
 
     url_field = url_field[0]
-    price_field = tagged_fields.get("product_price_field")
+    price_field: Any = tagged_fields.get("product_price_field")
 
     source_df = source_df.dropna(subset=[url_field])
     target_df = target_df.dropna(subset=[url_field])
@@ -152,8 +154,8 @@ def compare_names_for_same_urls(
     compare `name_field` field"""
 
     result = Result("Compare Names Per Url")
-    url_field = tagged_fields.get("product_url_field")
-    name_field = tagged_fields.get("name_field")
+    url_field: Any = tagged_fields.get("product_url_field")
+    name_field: Any = tagged_fields.get("name_field")
     if not url_field or not name_field:
         result.add_info(Outcome.SKIPPED)
         return result
@@ -199,14 +201,14 @@ def compare_prices_for_same_names(
     source_df: pd.DataFrame, target_df: pd.DataFrame, tagged_fields: TaggedFields
 ):
     result = Result("Compare Prices For Same Names")
-    name_field = tagged_fields.get("name_field")
+    name_field: Any = tagged_fields.get("name_field")
     if not name_field:
         result.add_info(Outcome.SKIPPED)
         return result
 
     name_field = name_field[0]
 
-    product_url_field = tagged_fields.get("product_url_field")
+    product_url_field: Any = tagged_fields.get("product_url_field")
     if not product_url_field:
         result.add_info("product_url_field tag is not set")
     else:
@@ -242,7 +244,7 @@ def compare_prices_for_same_names(
     result.add_info(f"{len(same_names)} same names in both jobs")
 
     price_tag = "product_price_field"
-    price_field = tagged_fields.get(price_tag)
+    price_field: Any = tagged_fields.get(price_tag)
     if not price_field:
         result.add_info("product_price_field tag is not set")
         return result

From 429faf794827d33ba6716a9884f4fb3a755d54ab Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 29 Jul 2019 17:48:36 -0300
Subject: [PATCH 03/31] fix type hints at rules module

---
 src/arche/rules/duplicates.py | 4 ++--
 src/arche/rules/others.py     | 3 ++-
 src/arche/rules/result.py     | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/arche/rules/duplicates.py b/src/arche/rules/duplicates.py
index f9c4270..5ba5492 100755
--- a/src/arche/rules/duplicates.py
+++ b/src/arche/rules/duplicates.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Set
 
 from arche.readers.schema import TaggedFields
 from arche.rules.result import Result, Outcome
@@ -18,7 +18,7 @@ def find_by_unique(df: pd.DataFrame, tagged_fields: TaggedFields) -> Result:
         result.add_info(Outcome.SKIPPED)
         return result
 
-    err_keys = set()
+    err_keys: Set = set()
     for field in unique_fields:
         result.items_count = df[field].count()
         duplicates = df[df.duplicated(field, keep=False)][[field]]
diff --git a/src/arche/rules/others.py b/src/arche/rules/others.py
index bc9ec53..95d0509 100755
--- a/src/arche/rules/others.py
+++ b/src/arche/rules/others.py
@@ -1,5 +1,6 @@
 import codecs
 import re
+from typing import Set
 
 from arche.rules.result import Outcome, Result
 import numpy as np
@@ -90,7 +91,7 @@ def garbage_symbols(df: pd.DataFrame) -> Result:
     )
 
     errors = {}
-    row_keys = set()
+    row_keys: Set = set()
     rule_result = Result("Garbage Symbols", items_count=len(df))
 
     for column in tqdm_notebook(
diff --git a/src/arche/rules/result.py b/src/arche/rules/result.py
index 05c09f3..1785c5e 100755
--- a/src/arche/rules/result.py
+++ b/src/arche/rules/result.py
@@ -225,7 +225,7 @@ def build_stack_bar_data(values_counts: List[pd.Series]) -> List[go.Bar]:
         Returns:
             A list of Bar objects.
         """
-        data = []
+        data: List[go.Bar] = []
         for vc in values_counts:
             data = data + [
                 go.Bar(

From 52ce0803ed7bb98a5145d911a395649a53ac1d97 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 29 Jul 2019 18:15:25 -0300
Subject: [PATCH 04/31] fix type hints at dqr

---
 src/arche/data_quality_report.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arche/data_quality_report.py b/src/arche/data_quality_report.py
index 3aa8e4c..0e6b160 100755
--- a/src/arche/data_quality_report.py
+++ b/src/arche/data_quality_report.py
@@ -1,6 +1,6 @@
 from io import StringIO
 import json
-from typing import Optional
+from typing import Optional, List
 
 
 from arche.figures import tables
@@ -36,7 +36,7 @@ def __init__(
         """
         self.schema = schema
         self.report = report
-        self.figures = []
+        self.figures: Optional[List] = []
         self.appendix = self.create_appendix(self.schema.raw)
         self.create_figures(items)
         self.plot_to_notebook()

From fbf5484bf0782b5568817376b9b6a34011deb8c4 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 29 Jul 2019 19:45:52 -0300
Subject: [PATCH 05/31] adding more type annotation

---
 src/arche/data_quality_report.py | 10 +++++-----
 src/arche/report.py              |  2 +-
 src/arche/tools/schema.py        |  8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/arche/data_quality_report.py b/src/arche/data_quality_report.py
index 0e6b160..54ce601 100755
--- a/src/arche/data_quality_report.py
+++ b/src/arche/data_quality_report.py
@@ -36,7 +36,7 @@ def __init__(
         """
         self.schema = schema
         self.report = report
-        self.figures: Optional[List] = []
+        self.figures: List = []
         self.appendix = self.create_appendix(self.schema.raw)
         self.create_figures(items)
         self.plot_to_notebook()
@@ -44,7 +44,7 @@ def __init__(
         if bucket:
             self.save_report_to_bucket(
                 project_id=items.key.split("/")[0],
-                spider=items.job.metadata.get("spider"),
+                spider=items.job.metadata.get("spider"),  # type: ignore
                 bucket=bucket,
             )
 
@@ -63,7 +63,7 @@ def create_figures(self, items: CloudItems):
         no_of_price_warns = price_was_now_result.err_items_count
         no_of_checked_price_items = price_was_now_result.items_count
 
-        crawlera_user = api.get_crawlera_user(items.job)
+        crawlera_user = api.get_crawlera_user(items.job)  # type: ignore
 
         validation_errors = self.report.results.get(
             "JSON Schema Validation",
@@ -77,7 +77,7 @@ def create_figures(self, items: CloudItems):
         )
 
         quality_estimation, field_accuracy = generate_quality_estimation(
-            items.job,
+            items.job,  # type: ignore
             crawlera_user,
             validation_errors,
             name_url_dups.err_items_count,
@@ -91,7 +91,7 @@ def create_figures(self, items: CloudItems):
         )
 
         self.score_table(quality_estimation, field_accuracy)
-        self.job_summary_table(items.job)
+        self.job_summary_table(items.job)  # type: ignore
         self.rules_summary_table(
             items.df,
             validation_errors,
diff --git a/src/arche/report.py b/src/arche/report.py
index da34131..78dd894 100755
--- a/src/arche/report.py
+++ b/src/arche/report.py
@@ -38,7 +38,7 @@ def write_summaries(self) -> None:
     def write_summary(cls, result: Result) -> None:
         cls.write_rule_name(result.name)
         if not result.messages:
-            cls.write_rule_outcome(Outcome.PASSED, Level.INFO)
+            cls.write_rule_outcome(Outcome.PASSED, Level.INFO)  #type: ignore
         for level, rule_msgs in result.messages.items():
             for rule_msg in rule_msgs:
                 cls.write_rule_outcome(rule_msg.summary, level)
diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index 3b607dc..bd55e90 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 import random
-from typing import Any, Deque, Dict, List, Optional
+from typing import Any, Deque, Dict, List, Optional, DefaultDict
 
 from arche.readers.items import RawItems
 from arche.readers.schema import Schema
@@ -92,7 +92,7 @@ def fast_validate(
     Returns:
         A dictionary of errors with message and item keys
     """
-    errors = defaultdict(set)
+    errors: DefaultDict = defaultdict(set)
 
     validate = fastjsonschema.compile(schema)
     for i, raw_item in enumerate(
@@ -113,7 +113,7 @@ def full_validate(
     """This function uses jsonschema validator which returns all found error per item.
     See `fast_validate()` for arguments descriptions.
     """
-    errors = defaultdict(set)
+    errors: DefaultDict = defaultdict(set)
 
     validator = validators.validator_for(schema)(schema)
     validator.format_checker = FormatChecker()
@@ -134,7 +134,7 @@ def format_validation_message(
     error_msg: str, path: Deque, schema_path: Deque, validator: str
 ) -> str:
     str_path = "/".join(p for p in path if isinstance(p, str))
-    schema_path = "/".join(p for p in schema_path)
+    schema_path = "/".join(p for p in schema_path)  # type: ignore
 
     if validator == "anyOf":
         if str_path:

From c4af9e9d42ecc42d44625a2d195886bc6a2e3a93 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 16 Sep 2019 18:44:39 -0300
Subject: [PATCH 06/31] fix mypy typing - partial commit

---
 Pipfile                     |  2 ++
 src/arche/readers/items.py  |  2 +-
 src/arche/readers/schema.py | 21 ++++++++++++---------
 src/arche/report.py         |  2 +-
 src/arche/tools/api.py      |  3 ++-
 tox.ini                     | 12 +++++++++++-
 6 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/Pipfile b/Pipfile
index 1ff4a44..64dad58 100755
--- a/Pipfile
+++ b/Pipfile
@@ -14,6 +14,7 @@ fastjsonschema = "*"
 perfect-jsonschema = "*"
 tqdm = "*"
 ipywidgets = "*"
+mypy = "*"
 
 [dev-packages]
 jupyterlab = "*"
@@ -42,6 +43,7 @@ pyarrow = "*"
 cufflinks = "*"
 tables = "*"
 nb-black = "*"
+pylint = "*"
 
 [requires]
 python_version = "3.7"
diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py
index f2253dd..e187241 100755
--- a/src/arche/readers/items.py
+++ b/src/arche/readers/items.py
@@ -47,7 +47,7 @@ def origin_column_name(self, new: str) -> str:
         for column in self.df.columns:
             if column in new:
                 return column
-        return ''
+        return ""
 
     @classmethod
     def from_df(cls, df: pd.DataFrame):
diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index ad7c637..359abbb 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -1,10 +1,8 @@
 from collections import defaultdict
 from enum import Enum
 import json
-import os
 import pprint
-from typing import Dict, List, Union, DefaultDict, cast
-import urllib.request
+from typing import Dict, List, Union, DefaultDict, cast, Tuple, Any, ItemsView
 
 from arche.tools import s3
 import perfect_jsonschema
@@ -44,19 +42,24 @@ def __repr__(self):
 
     def get_enums(self) -> List[str]:
         enums = []
-        for k, v in self.raw["properties"].items():
-            if "enum" in v.keys():  # type: ignore
+        # self.raw["properties"].items() has type: 
+        # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
+        properties = cast(ItemsView[str, Dict[str, Any]], self.raw["properties"].items())
+        for k, v in properties:
+            if "enum" in v.keys():
                 enums.append(k)
         return enums
 
     @staticmethod
     def get_tags(schema: RawSchema) -> TaggedFields:
         tagged_fields: DefaultDict[str, List[str]] = defaultdict(list)
-        for key, value in schema["properties"].items():
-            property_tags = value.get("tag", [])  # type: ignore
+        # schema["properties"].items() has type: 
+        # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
+        properties = cast(ItemsView[str, Dict[str, Any]], schema["properties"].items())
+        for key, value in properties:
+            property_tags = value.get("tag", [])
             if property_tags:
-                tagged_fields = cast(
-                    DefaultDict[str, List[str]], Schema.get_field_tags(property_tags, key, tagged_fields))
+                tagged_fields: Dict[str, List[str]] = Schema.get_field_tags(property_tags, key, tagged_fields)
         return tagged_fields
 
     @classmethod
diff --git a/src/arche/report.py b/src/arche/report.py
index 9d1afd3..5669a24 100755
--- a/src/arche/report.py
+++ b/src/arche/report.py
@@ -38,7 +38,7 @@ def write_summaries(self) -> None:
     def write_summary(cls, result: Result) -> None:
         cls.write_rule_name(result.name)
         if not result.messages:
-            cls.write_rule_outcome(Outcome.PASSED, Level.INFO)  #type: ignore
+            cls.write_rule_outcome(Outcome.PASSED, Level.INFO)
         for level, rule_msgs in result.messages.items():
             for rule_msg in rule_msgs:
                 cls.write_rule_outcome(rule_msg.summary, level)
diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index 4a10be1..6c181ba 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -145,7 +145,8 @@ def get_items_with_pool(
     """
     active_connections_limit = 10
     processes_count: int = cast(
-        int, min(max(helpers.cpus_count(), workers), active_connections_limit))
+        int, min(max(helpers.cpus_count(), workers), active_connections_limit)
+    )
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)
diff --git a/tox.ini b/tox.ini
index 077c79c..5aa06e9 100755
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py37, pep8
+envlist = py37, pep8, mypy
 skipsdist = false
 
 [testenv]
@@ -22,6 +22,16 @@ extras = docs
 commands = 
     sphinx-build docs/source docs/_build -b linkcheck -b html
 
+[testenv:mypy]
+deps =
+    mypy
+commands = mypy --ignore-missing-imports src/arche tests
+
+[mypy]
+deps =
+    mypy
+commands = mypy src/arche
+
 [flake8]
 select = C,E,F,W,I,D,B,B9
 ignore = W503, E741, E501, E203, I101

From f11f278559801e9bec49ba53197131d1782ab885 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Tue, 17 Sep 2019 14:07:34 -0300
Subject: [PATCH 07/31] fix typing at schema.py

---
 src/arche/readers/schema.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index 359abbb..d8722f3 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -42,9 +42,11 @@ def __repr__(self):
 
     def get_enums(self) -> List[str]:
         enums = []
-        # self.raw["properties"].items() has type: 
+        # self.raw["properties"].items() has type:
         # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
-        properties = cast(ItemsView[str, Dict[str, Any]], self.raw["properties"].items())
+        properties = cast(
+            ItemsView[str, Dict[str, Any]], self.raw["properties"].items()
+        )
         for k, v in properties:
             if "enum" in v.keys():
                 enums.append(k)
@@ -52,19 +54,19 @@ def get_enums(self) -> List[str]:
 
     @staticmethod
     def get_tags(schema: RawSchema) -> TaggedFields:
-        tagged_fields: DefaultDict[str, List[str]] = defaultdict(list)
-        # schema["properties"].items() has type: 
+        tagged_fields: Dict[str, List[str]] = defaultdict(list)
+        # schema["properties"].items() has type:
         # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
         properties = cast(ItemsView[str, Dict[str, Any]], schema["properties"].items())
         for key, value in properties:
             property_tags = value.get("tag", [])
             if property_tags:
-                tagged_fields: Dict[str, List[str]] = Schema.get_field_tags(property_tags, key, tagged_fields)
+                tagged_fields = Schema.get_field_tags(property_tags, key, tagged_fields)
         return tagged_fields
 
     @classmethod
     def get_field_tags(
-        cls, tags: List[str], field: str, tagged_fields: defaultdict
+        cls, tags: List[str], field: str, tagged_fields: Dict
     ) -> TaggedFields:
         tags = cls.parse_tag(tags)
         if not tags:

From 06aacf881c57ceb99f4fbe7cd509377259d4183c Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Tue, 17 Sep 2019 17:10:02 -0300
Subject: [PATCH 08/31] fix typing

---
 src/arche/readers/schema.py |  2 +-
 src/arche/report.py         |  6 ++++--
 src/arche/rules/price.py    | 18 +++++++++---------
 src/arche/rules/result.py   |  8 ++++++--
 tests/conftest.py           |  4 ++--
 tests/test_arche.py         |  3 ++-
 6 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index d8722f3..8368b91 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import json
 import pprint
-from typing import Dict, List, Union, DefaultDict, cast, Tuple, Any, ItemsView
+from typing import Dict, List, Union, cast, Any, ItemsView
 
 from arche.tools import s3
 import perfect_jsonschema
diff --git a/src/arche/report.py b/src/arche/report.py
index 5669a24..e457f02 100755
--- a/src/arche/report.py
+++ b/src/arche/report.py
@@ -1,5 +1,5 @@
 from functools import partial
-from typing import Dict
+from typing import Dict, Union
 
 from arche import SH_URL
 from arche.rules.result import Level, Outcome, Result
@@ -44,7 +44,9 @@ def write_summary(cls, result: Result) -> None:
                 cls.write_rule_outcome(rule_msg.summary, level)
 
     @classmethod
-    def write_rule_outcome(cls, outcome: str, level: Level = Level.INFO) -> None:
+    def write_rule_outcome(
+        cls, outcome: Union[str, Outcome], level: Level = Level.INFO
+    ) -> None:
         if isinstance(outcome, Outcome):
             outcome = outcome.name
         msg = outcome
diff --git a/src/arche/rules/price.py b/src/arche/rules/price.py
index 517a5e5..732677f 100755
--- a/src/arche/rules/price.py
+++ b/src/arche/rules/price.py
@@ -110,11 +110,11 @@ def compare_prices_for_same_urls(
     result.add_info(f"{len(same_urls)} same urls in both jobs")
 
     diff_prices_count = 0
-    price_field = tagged_fields.get("product_price_field")
-    if not price_field:
+    price_field_tag = tagged_fields.get("product_price_field")
+    if not price_field_tag:
         result.add_info("product_price_field tag is not set")
     else:
-        price_field = price_field[0]
+        price_field = price_field_tag[0]
         detailed_messages = []
         for url in same_urls:
             if url.strip() != "nan":
@@ -202,12 +202,12 @@ def compare_prices_for_same_names(
     source_df: pd.DataFrame, target_df: pd.DataFrame, tagged_fields: TaggedFields
 ):
     result = Result("Compare Prices For Same Names")
-    name_field = tagged_fields.get("name_field")
-    if not name_field:
+    name_field_tag = tagged_fields.get("name_field")
+    if not name_field_tag:
         result.add_info(Outcome.SKIPPED)
         return result
 
-    name_field = name_field[0]
+    name_field = name_field_tag[0]
     source_df = source_df[source_df[name_field].notnull()]
     target_df = target_df[target_df[name_field].notnull()]
 
@@ -234,12 +234,12 @@ def compare_prices_for_same_names(
     result.add_info(f"{len(same_names)} same names in both jobs")
 
     price_tag = "product_price_field"
-    price_field = tagged_fields.get(price_tag)
-    if not price_field:
+    price_field_tag = tagged_fields.get(price_tag)
+    if not price_field_tag:
         result.add_info("product_price_field tag is not set")
         return result
 
-    price_field = price_field[0]
+    price_field = price_field_tag[0]
     count = 0
 
     detailed_messages = []
diff --git a/src/arche/rules/result.py b/src/arche/rules/result.py
index 9b6dc78..2c219b7 100755
--- a/src/arche/rules/result.py
+++ b/src/arche/rules/result.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import itertools
 import math
-from typing import Dict, List, Optional, Set, Union
+from typing import Dict, List, Optional, Set, Union, cast
 
 import IPython
 import numpy as np
@@ -40,7 +40,11 @@ class Message:
     summary: str
     detailed: Optional[str] = None
     errors: Optional[Dict[str, Set]] = None
-    _err_keys: Optional[Set[Union[str, int]]] = field(default_factory=set)
+
+    # expression "field(default_factory=set)" has type "Set[_T]", so we have to cast
+    _err_keys: Optional[Set[Union[str, int]]] = cast(
+        Optional[Set[Union[str, int]]], field(default_factory=set)
+    )
 
     @property
     def err_keys(self):
diff --git a/tests/conftest.py b/tests/conftest.py
index 1c53809..8837e97 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -211,8 +211,8 @@ def create_result(
     items_count: Optional[int] = None,
 ) -> Result:
     result = Result(rule_name)
-    for level, messages in messages.items():
-        for message in messages:
+    for level, messages_list in messages.items():
+        for message in messages_list:
             result.add_message(level, *message)
 
     if stats:
diff --git a/tests/test_arche.py b/tests/test_arche.py
index fa2c736..7cebe32 100755
--- a/tests/test_arche.py
+++ b/tests/test_arche.py
@@ -1,3 +1,4 @@
+from typing import Dict, List
 from arche import arche, SH_URL
 from arche.arche import Arche
 from arche.rules.result import Level
@@ -34,7 +35,7 @@ def test_arche_df(get_df):
     pd.testing.assert_frame_equal(a.target_items.df, get_df)
 
 
-schema_dummies = [{"properties": {"name": {}}}, {"properties": {"url": {}}}]
+schema_dummies: List[Dict] = [{"properties": {"name": {}}}, {"properties": {"url": {}}}]
 
 
 def test_schema():

From 9b01ae1433a0c6c2751594efe22397a41e7bf360 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Tue, 17 Sep 2019 17:26:20 -0300
Subject: [PATCH 09/31] pep8

---
 tests/test_arche.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_arche.py b/tests/test_arche.py
index 7cebe32..235de2a 100755
--- a/tests/test_arche.py
+++ b/tests/test_arche.py
@@ -1,4 +1,5 @@
 from typing import Dict, List
+
 from arche import arche, SH_URL
 from arche.arche import Arche
 from arche.rules.result import Level

From 3c43406f184295de02d1051aac4ff457fe8b9b6c Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Wed, 25 Sep 2019 21:32:29 -0300
Subject: [PATCH 10/31] fix mypy at arche.py

---
 src/arche/arche.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/arche/arche.py b/src/arche/arche.py
index 3e67ee5..996e7bb 100755
--- a/src/arche/arche.py
+++ b/src/arche/arche.py
@@ -1,6 +1,6 @@
 from functools import lru_cache
 import logging
-from typing import Iterable, Optional, Union
+from typing import Iterable, Optional, Union, cast
 
 from arche.data_quality_report import DataQualityReport
 from arche.readers.items import Items, CollectionItems, JobItems, RawItems
@@ -106,15 +106,15 @@ def schema(self, schema_source):
     def get_items(
         source: Union[str, pd.DataFrame, RawItems],
         count: Optional[int],
-        start: Union[str, int],
+        start: Optional[str],
         filters: Optional[api.Filters],
     ) -> Items:
         if isinstance(source, pd.DataFrame):
             return Items.from_df(source)
         elif isinstance(source, Iterable) and not isinstance(source, str):
-            return Items.from_array(source)
+            return Items.from_array(cast(RawItems, source))
         elif helpers.is_job_key(source):
-            return JobItems(source, count, start or 0, filters)
+            return JobItems(source, count, int(start or 0), filters)
         elif helpers.is_collection_key(source):
             return CollectionItems(source, count, start, filters)
         else:
@@ -140,7 +140,7 @@ def run_all_rules(self):
         self.run_schema_rules()
 
     def data_quality_report(self, bucket: Optional[str] = None):
-        if helpers.is_collection_key(self.source):
+        if helpers.is_collection_key(str(self.source or '')):
             raise ValueError("Collections are not supported")
         if not self.schema:
             raise ValueError("Schema is empty")

From 82de89a1a57bb6a8f111754e277b95d3d935b64f Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 26 Sep 2019 19:53:37 -0300
Subject: [PATCH 11/31] fix pep8; improve mypy typinh

---
 src/arche/arche.py               |  2 +-
 src/arche/data_quality_report.py | 14 +++++++-------
 src/arche/readers/schema.py      |  4 ++--
 src/arche/tools/bitbucket.py     |  2 +-
 src/arche/tools/schema.py        |  2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/arche/arche.py b/src/arche/arche.py
index 996e7bb..7ab2375 100755
--- a/src/arche/arche.py
+++ b/src/arche/arche.py
@@ -140,7 +140,7 @@ def run_all_rules(self):
         self.run_schema_rules()
 
     def data_quality_report(self, bucket: Optional[str] = None):
-        if helpers.is_collection_key(str(self.source or '')):
+        if helpers.is_collection_key(str(self.source or "")):
             raise ValueError("Collections are not supported")
         if not self.schema:
             raise ValueError("Schema is empty")
diff --git a/src/arche/data_quality_report.py b/src/arche/data_quality_report.py
index 54ce601..9f70aa2 100755
--- a/src/arche/data_quality_report.py
+++ b/src/arche/data_quality_report.py
@@ -5,7 +5,7 @@
 
 from arche.figures import tables
 from arche.quality_estimation_algorithm import generate_quality_estimation
-from arche.readers.items import CloudItems
+from arche.readers.items import JobItems
 from arche.readers.schema import Schema
 from arche.report import Report
 import arche.rules.coverage as coverage_rules
@@ -23,7 +23,7 @@
 class DataQualityReport:
     def __init__(
         self,
-        items: CloudItems,
+        items: JobItems,
         schema: Schema,
         report: Report,
         bucket: Optional[str] = None,
@@ -44,11 +44,11 @@ def __init__(
         if bucket:
             self.save_report_to_bucket(
                 project_id=items.key.split("/")[0],
-                spider=items.job.metadata.get("spider"),  # type: ignore
+                spider=items.job.metadata.get("spider"),
                 bucket=bucket,
             )
 
-    def create_figures(self, items: CloudItems):
+    def create_figures(self, items: JobItems):
         name_url_dups = self.report.results.get(
             "Duplicates By **name_field, product_url_field** Tags",
             duplicate_rules.find_by_name_url(items.df, self.schema.tags),
@@ -63,7 +63,7 @@ def create_figures(self, items: CloudItems):
         no_of_price_warns = price_was_now_result.err_items_count
         no_of_checked_price_items = price_was_now_result.items_count
 
-        crawlera_user = api.get_crawlera_user(items.job)  # type: ignore
+        crawlera_user = api.get_crawlera_user(items.job)
 
         validation_errors = self.report.results.get(
             "JSON Schema Validation",
@@ -77,7 +77,7 @@ def create_figures(self, items: CloudItems):
         )
 
         quality_estimation, field_accuracy = generate_quality_estimation(
-            items.job,  # type: ignore
+            items.job,
             crawlera_user,
             validation_errors,
             name_url_dups.err_items_count,
@@ -91,7 +91,7 @@ def create_figures(self, items: CloudItems):
         )
 
         self.score_table(quality_estimation, field_accuracy)
-        self.job_summary_table(items.job)  # type: ignore
+        self.job_summary_table(items.job)
         self.rules_summary_table(
             items.df,
             validation_errors,
diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index 8368b91..7d6b08c 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import json
 import pprint
-from typing import Dict, List, Union, cast, Any, ItemsView
+from typing import Dict, List, Union, cast, Any, ItemsView, Set
 
 from arche.tools import s3
 import perfect_jsonschema
@@ -66,7 +66,7 @@ def get_tags(schema: RawSchema) -> TaggedFields:
 
     @classmethod
     def get_field_tags(
-        cls, tags: List[str], field: str, tagged_fields: Dict
+        cls, tags: Set[Any], field: str, tagged_fields: Dict
     ) -> TaggedFields:
         tags = cls.parse_tag(tags)
         if not tags:
diff --git a/src/arche/tools/bitbucket.py b/src/arche/tools/bitbucket.py
index 8ea5b8f..117db05 100644
--- a/src/arche/tools/bitbucket.py
+++ b/src/arche/tools/bitbucket.py
@@ -23,7 +23,7 @@ def prepare_request(url: str) -> urllib.request.Request:
 def convert_to_api_url(url: str, netloc: str, api_netloc: str) -> str:
     """Support both regular and raw URLs"""
     try:
-        user, repo, path = re.search(
+        user, repo, path = re.search(  # type: ignore
             f"https://{netloc}/(.*?)/(.*?)/(?:raw|src)/(.*)", url
         ).groups()
     except AttributeError:
diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index bd55e90..cb910c7 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -134,7 +134,7 @@ def format_validation_message(
     error_msg: str, path: Deque, schema_path: Deque, validator: str
 ) -> str:
     str_path = "/".join(p for p in path if isinstance(p, str))
-    schema_path = "/".join(p for p in schema_path)  # type: ignore
+    schema_path = "/".join(p for p in schema_path)
 
     if validator == "anyOf":
         if str_path:

From 26066000084db063f5e120bfba8d0c7ac23ab8f0 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 26 Sep 2019 20:08:21 -0300
Subject: [PATCH 12/31] update typing at tools/schema.py

---
 src/arche/tools/schema.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index cb910c7..e295462 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -1,9 +1,9 @@
 from collections import defaultdict
 import random
-from typing import Any, Deque, Dict, List, Optional, DefaultDict
+from typing import Any, Deque, Dict, List, Optional, DefaultDict, Union
 
 from arche.readers.items import RawItems
-from arche.readers.schema import Schema
+from arche.readers.schema import Schema, RawSchema
 from arche.schema_definitions import extension
 from arche.tools import api, helpers
 import fastjsonschema
@@ -26,7 +26,7 @@ def basic_json_schema(data_source: str, items_numbers: List[int] = None) -> Sche
 
 def create_json_schema(
     source_key: str, items_numbers: Optional[List[int]] = None
-) -> Schema:
+) -> Union[str, RawSchema]:
     if helpers.is_collection_key(source_key):
         store = api.get_collection(source_key)
         items_count = store.count()
@@ -58,7 +58,7 @@ def create_json_schema(
     return infer_schema(samples)
 
 
-def infer_schema(samples: List[Dict[str, Any]]) -> Schema:
+def infer_schema(samples: List[Dict[str, Any]]) -> Union[str, RawSchema]:
     builder = SchemaBuilder("http://json-schema.org/draft-07/schema#")
     for sample in samples:
         builder.add_object(sample)
@@ -134,13 +134,13 @@ def format_validation_message(
     error_msg: str, path: Deque, schema_path: Deque, validator: str
 ) -> str:
     str_path = "/".join(p for p in path if isinstance(p, str))
-    schema_path = "/".join(p for p in schema_path)
+    schema_path_str: str = "/".join(p for p in schema_path)
 
     if validator == "anyOf":
         if str_path:
-            return f"'{str_path}' does not satisfy 'schema/{schema_path}'"
+            return f"'{str_path}' does not satisfy 'schema/{schema_path_str}'"
         else:
-            return f"'schema/{schema_path}' failed"
+            return f"'schema/{schema_path_str}' failed"
 
     if "Additional properties are not allowed" in error_msg:
         return error_msg

From f092fe849b23310c5b0e2658b3e0d3012e5bda7e Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 27 Sep 2019 11:47:21 -0300
Subject: [PATCH 13/31] updating typing

---
 Pipfile                        | 1 -
 src/arche/rules/json_schema.py | 2 +-
 src/arche/tools/schema.py      | 4 ++--
 tests/conftest.py              | 4 ++--
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/Pipfile b/Pipfile
index 64dad58..26cbf83 100755
--- a/Pipfile
+++ b/Pipfile
@@ -43,7 +43,6 @@ pyarrow = "*"
 cufflinks = "*"
 tables = "*"
 nb-black = "*"
-pylint = "*"
 
 [requires]
 python_version = "3.7"
diff --git a/src/arche/rules/json_schema.py b/src/arche/rules/json_schema.py
index 8458aaa..043a32a 100755
--- a/src/arche/rules/json_schema.py
+++ b/src/arche/rules/json_schema.py
@@ -25,7 +25,7 @@ def validate(
     err_items = len(set(itertools.chain.from_iterable(errors.values())))
     if errors:
         result.add_error(
-            f"{err_items} ({err_items/len(raw_items):.0%}) items have {len(errors)} errors",
+            f"{err_items} ({err_items/len(list(raw_items)):.0%}) items have {len(errors)} errors",  # noqa
             errors=errors,
         )
     return result
diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index e295462..95dbc77 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -79,7 +79,7 @@ def set_item_no(items_count: int) -> List[int]:
 
 
 def fast_validate(
-    schema: Schema, raw_items: RawItems, keys: pd.Index
+    schema: RawSchema, raw_items: RawItems, keys: pd.Index
 ) -> Dict[str, set]:
     """Verify items one by one. It stops after the first error in an item in most cases.
     Faster than jsonschema validation
@@ -108,7 +108,7 @@ def fast_validate(
 
 
 def full_validate(
-    schema: Schema, raw_items: RawItems, keys: pd.Index
+    schema: RawSchema, raw_items: RawItems, keys: pd.Index
 ) -> Dict[str, set]:
     """This function uses jsonschema validator which returns all found error per item.
     See `fast_validate()` for arguments descriptions.
diff --git a/tests/conftest.py b/tests/conftest.py
index 8837e97..512fbb3 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 from copy import deepcopy
 from itertools import zip_longest
-from typing import Dict, Iterable, List, Optional
+from typing import Dict, List, Optional
 
 from arche.readers.items import CollectionItems, JobItems
 from arche.rules.result import Level, Message, Result, Stat
@@ -50,7 +50,7 @@ def get_df():
 class Job:
     def __init__(
         self,
-        items: Optional[Iterable] = None,
+        items: Optional[List[Dict]] = None,
         metadata: Optional[Dict] = None,
         stats: Optional[Dict] = None,
         key: str = "112358/13/21",

From 6affcdf7bcaece87f0b2b0ca8c42cfde9cc9f886 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 27 Sep 2019 18:48:41 -0300
Subject: [PATCH 14/31] remove cast

---
 src/arche/rules/result.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/arche/rules/result.py b/src/arche/rules/result.py
index 2c219b7..8dd1109 100755
--- a/src/arche/rules/result.py
+++ b/src/arche/rules/result.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import itertools
 import math
-from typing import Dict, List, Optional, Set, Union, cast
+from typing import Dict, List, Optional, Set, Union
 
 import IPython
 import numpy as np
@@ -42,9 +42,7 @@ class Message:
     errors: Optional[Dict[str, Set]] = None
 
     # expression "field(default_factory=set)" has type "Set[_T]", so we have to cast
-    _err_keys: Optional[Set[Union[str, int]]] = cast(
-        Optional[Set[Union[str, int]]], field(default_factory=set)
-    )
+    _err_keys: Set[Union[str, int]] = field(default_factory=set)
 
     @property
     def err_keys(self):

From 80a56289237c7a6e3040efaf2792186a7d0e398e Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 27 Sep 2019 19:03:09 -0300
Subject: [PATCH 15/31] fixing type

---
 src/arche/readers/items.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py
index e187241..1905eaa 100755
--- a/src/arche/readers/items.py
+++ b/src/arche/readers/items.py
@@ -67,7 +67,7 @@ def __init__(
     ):
         self.key = key
         self._count = count
-        self._limit: Any = None
+        self._limit: int
         self.filters = filters
         raw = self.fetch_data()
         df = pd.DataFrame(list(raw))

From fa1597431636b11dd2cbf73d1c2e534bfc852886 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Sun, 29 Sep 2019 12:07:44 -0300
Subject: [PATCH 16/31] fix typing at price.py

---
 src/arche/rules/price.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/arche/rules/price.py b/src/arche/rules/price.py
index 732677f..dab1b9d 100755
--- a/src/arche/rules/price.py
+++ b/src/arche/rules/price.py
@@ -1,4 +1,4 @@
-from typing import Any
+from typing import Any, Optional, List
 
 from arche.readers.schema import TaggedFields
 from arche.rules.result import Result, Outcome
@@ -77,12 +77,12 @@ def compare_prices_for_same_urls(
         missing and new `product_url_field` tagged fields.
     """
     result = Result("Compare Prices For Same Urls")
-    url_field: Any = tagged_fields.get("product_url_field")
-    if not url_field:
+    url_field_list: Optional[List[str]] = tagged_fields.get("product_url_field")
+    if not url_field_list:
         result.add_info(Outcome.SKIPPED)
         return result
 
-    url_field = url_field[0]
+    url_field = url_field_list[0]
 
     source_df = source_df.dropna(subset=[url_field])
     target_df = target_df.dropna(subset=[url_field])

From e9c47d383391e4fa7f84e068e4f0794692c21cad Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Sun, 29 Sep 2019 12:08:28 -0300
Subject: [PATCH 17/31] fix typing at price.py

---
 src/arche/tools/api.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index 6c181ba..756d011 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -144,9 +144,7 @@ def get_items_with_pool(
         A numpy array of items
     """
     active_connections_limit = 10
-    processes_count: int = cast(
-        int, min(max(helpers.cpus_count(), workers), active_connections_limit)
-    )
+    processes_count: int = min(max(helpers.cpus_count(), workers), active_connections_limit)
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)

From 95588625c5e21bd2b091d6d0328184a544f1e8ee Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Sun, 29 Sep 2019 12:09:27 -0300
Subject: [PATCH 18/31] fix typing at api.py

---
 src/arche/tools/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index 756d011..86c32b9 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -144,7 +144,7 @@ def get_items_with_pool(
         A numpy array of items
     """
     active_connections_limit = 10
-    processes_count: int = min(max(helpers.cpus_count(), workers), active_connections_limit)
+    processes_count: int = int(min(max(helpers.cpus_count(), workers), active_connections_limit) or 0)
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)

From d6e60278dcf1b05cf35c76f765860102ae3db212 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Sun, 29 Sep 2019 13:02:06 -0300
Subject: [PATCH 19/31] fix tests and pep8

---
 src/arche/readers/items.py | 4 +++-
 src/arche/tools/api.py     | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py
index 1905eaa..392c591 100755
--- a/src/arche/readers/items.py
+++ b/src/arche/readers/items.py
@@ -67,7 +67,7 @@ def __init__(
     ):
         self.key = key
         self._count = count
-        self._limit: int
+        self._limit: int = 0
         self.filters = filters
         raw = self.fetch_data()
         df = pd.DataFrame(list(raw))
@@ -107,6 +107,7 @@ def __init__(
         self.start_index = start_index
         self.start: str = f"{key}/{start_index}"
         self._job: Job = None
+        self._limit: int = 0
         super().__init__(key, count, filters)
 
     @property
@@ -155,6 +156,7 @@ def __init__(
         filters: Optional[api.Filters] = None,
     ):
         self.start = start
+        self._limit: int = 0
         super().__init__(key, count, filters)
 
     @property
diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index 86c32b9..f97d210 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -3,7 +3,7 @@
 import math
 from multiprocessing import Pool
 import time
-from typing import Dict, List, Tuple, Optional, Union, cast
+from typing import Dict, List, Tuple, Optional, Union
 
 from arche.tools import helpers
 from dateutil.relativedelta import relativedelta
@@ -144,7 +144,9 @@ def get_items_with_pool(
         A numpy array of items
     """
     active_connections_limit = 10
-    processes_count: int = int(min(max(helpers.cpus_count(), workers), active_connections_limit) or 0)
+    processes_count: int = int(
+        min(max(helpers.cpus_count(), workers), active_connections_limit) or 0
+    )
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)

From e432ece4e0339cde19c635beaa9f6509a56e1d17 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Mon, 30 Sep 2019 17:34:24 -0300
Subject: [PATCH 20/31] fix typing at conftest

---
 tests/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 512fbb3..f858d53 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,9 +1,9 @@
 from copy import deepcopy
 from itertools import zip_longest
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 from arche.readers.items import CollectionItems, JobItems
-from arche.rules.result import Level, Message, Result, Stat
+from arche.rules.result import Level, Result, Stat
 import numpy as np
 import pandas as pd
 import pytest
@@ -206,7 +206,7 @@ def get_collection_items(mocker):
 
 def create_result(
     rule_name: str,
-    messages: Dict[Level, List[Message]],
+    messages: Dict[Level, List[Tuple]],
     stats: Optional[List[Stat]] = None,
     items_count: Optional[int] = None,
 ) -> Result:

From 1f4611799748f9736b3d5b77cbe96c51111a2b44 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 3 Oct 2019 09:56:08 -0300
Subject: [PATCH 21/31] refactor typing at schema.py

---
 src/arche/tools/schema.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index 95dbc77..bb6999d 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -26,7 +26,7 @@ def basic_json_schema(data_source: str, items_numbers: List[int] = None) -> Sche
 
 def create_json_schema(
     source_key: str, items_numbers: Optional[List[int]] = None
-) -> Union[str, RawSchema]:
+) -> RawSchema:
     if helpers.is_collection_key(source_key):
         store = api.get_collection(source_key)
         items_count = store.count()
@@ -58,7 +58,7 @@ def create_json_schema(
     return infer_schema(samples)
 
 
-def infer_schema(samples: List[Dict[str, Any]]) -> Union[str, RawSchema]:
+def infer_schema(samples: List[Dict[str, Any]]) -> RawSchema:
     builder = SchemaBuilder("http://json-schema.org/draft-07/schema#")
     for sample in samples:
         builder.add_object(sample)

From 2fe5ba9df1e85163c932bf7ff2daf686216d977d Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 3 Oct 2019 10:06:12 -0300
Subject: [PATCH 22/31] fix typing at price.py and result.py

---
 src/arche/rules/price.py  | 10 +++++-----
 src/arche/rules/result.py |  2 --
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/arche/rules/price.py b/src/arche/rules/price.py
index dab1b9d..de20ed7 100755
--- a/src/arche/rules/price.py
+++ b/src/arche/rules/price.py
@@ -155,14 +155,14 @@ def compare_names_for_same_urls(
     compare `name_field` field"""
 
     result = Result("Compare Names Per Url")
-    url_field: Any = tagged_fields.get("product_url_field")
-    name_field: Any = tagged_fields.get("name_field")
-    if not url_field or not name_field:
+    url_field_list: Optional[List[str]] = tagged_fields.get("product_url_field")
+    name_field_list: Optional[List[str]] = tagged_fields.get("name_field")
+    if not url_field_list or not name_field_list:
         result.add_info(Outcome.SKIPPED)
         return result
 
-    name_field = name_field[0]
-    url_field = url_field[0]
+    name_field: str = name_field_list[0]
+    url_field: str = url_field_list[0]
     diff_names_count = 0
 
     same_urls = source_df[(source_df[url_field].isin(target_df[url_field].values))][
diff --git a/src/arche/rules/result.py b/src/arche/rules/result.py
index 8dd1109..87c5282 100755
--- a/src/arche/rules/result.py
+++ b/src/arche/rules/result.py
@@ -40,8 +40,6 @@ class Message:
     summary: str
     detailed: Optional[str] = None
     errors: Optional[Dict[str, Set]] = None
-
-    # expression "field(default_factory=set)" has type "Set[_T]", so we have to cast
     _err_keys: Set[Union[str, int]] = field(default_factory=set)
 
     @property

From 774f8c88725889f0291757e3b459a2cfbd87b630 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 3 Oct 2019 10:20:25 -0300
Subject: [PATCH 23/31] refactor

---
 src/arche/readers/schema.py | 2 +-
 src/arche/rules/price.py    | 2 +-
 src/arche/tools/schema.py   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index 7d6b08c..fd48cdd 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -59,7 +59,7 @@ def get_tags(schema: RawSchema) -> TaggedFields:
         # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
         properties = cast(ItemsView[str, Dict[str, Any]], schema["properties"].items())
         for key, value in properties:
-            property_tags = value.get("tag", [])
+            property_tags = value.get("tag")
             if property_tags:
                 tagged_fields = Schema.get_field_tags(property_tags, key, tagged_fields)
         return tagged_fields
diff --git a/src/arche/rules/price.py b/src/arche/rules/price.py
index de20ed7..7835c0d 100755
--- a/src/arche/rules/price.py
+++ b/src/arche/rules/price.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, List
+from typing import Optional, List
 
 from arche.readers.schema import TaggedFields
 from arche.rules.result import Result, Outcome
diff --git a/src/arche/tools/schema.py b/src/arche/tools/schema.py
index bb6999d..dc0c09a 100755
--- a/src/arche/tools/schema.py
+++ b/src/arche/tools/schema.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 import random
-from typing import Any, Deque, Dict, List, Optional, DefaultDict, Union
+from typing import Any, Deque, Dict, List, Optional, DefaultDict
 
 from arche.readers.items import RawItems
 from arche.readers.schema import Schema, RawSchema

From 797079eb292bffa325ac30b7dff9cd96739208ed Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Thu, 3 Oct 2019 10:35:25 -0300
Subject: [PATCH 24/31] refactor

---
 src/arche/arche.py          |  2 +-
 src/arche/readers/items.py  | 10 ----------
 tests/readers/test_items.py |  8 --------
 3 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/src/arche/arche.py b/src/arche/arche.py
index 7ab2375..2bc55f5 100755
--- a/src/arche/arche.py
+++ b/src/arche/arche.py
@@ -140,7 +140,7 @@ def run_all_rules(self):
         self.run_schema_rules()
 
     def data_quality_report(self, bucket: Optional[str] = None):
-        if helpers.is_collection_key(str(self.source or "")):
+        if helpers.is_collection_key(str(self.source)):
             raise ValueError("Collections are not supported")
         if not self.schema:
             raise ValueError("Schema is empty")
diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py
index 392c591..1a69ce1 100755
--- a/src/arche/readers/items.py
+++ b/src/arche/readers/items.py
@@ -41,14 +41,6 @@ def categorize(df: pd.DataFrame) -> pd.DataFrame:
             except TypeError:
                 continue
 
-    def origin_column_name(self, new: str) -> str:
-        if new in self.df.columns:
-            return new
-        for column in self.df.columns:
-            if column in new:
-                return column
-        return ""
-
     @classmethod
     def from_df(cls, df: pd.DataFrame):
         return cls(raw=np.array(df.to_dict("records")), df=df)
@@ -107,7 +99,6 @@ def __init__(
         self.start_index = start_index
         self.start: str = f"{key}/{start_index}"
         self._job: Job = None
-        self._limit: int = 0
         super().__init__(key, count, filters)
 
     @property
@@ -156,7 +147,6 @@ def __init__(
         filters: Optional[api.Filters] = None,
     ):
         self.start = start
-        self._limit: int = 0
         super().__init__(key, count, filters)
 
     @property
diff --git a/tests/readers/test_items.py b/tests/readers/test_items.py
index a23569b..40a4d06 100755
--- a/tests/readers/test_items.py
+++ b/tests/readers/test_items.py
@@ -6,14 +6,6 @@
 import pytest
 
 
-@pytest.mark.parametrize(
-    "name, expected_name", [("price", "price"), ("name_0", "name")]
-)
-def test_origin_column_name(get_cloud_items, name, expected_name):
-    items = Items.from_df(pd.DataFrame(get_cloud_items))
-    assert items.origin_column_name(name) == expected_name
-
-
 @pytest.mark.parametrize(
     "df, expected_raw, expected_df",
     [

From 5d07463159d8706d7dc4260af70a1e4e7421e80d Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 4 Oct 2019 15:14:02 -0300
Subject: [PATCH 25/31] update Pipfile

---
 Pipfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Pipfile b/Pipfile
index 26cbf83..8e1cec6 100755
--- a/Pipfile
+++ b/Pipfile
@@ -14,7 +14,6 @@ fastjsonschema = "*"
 perfect-jsonschema = "*"
 tqdm = "*"
 ipywidgets = "*"
-mypy = "*"
 
 [dev-packages]
 jupyterlab = "*"
@@ -43,6 +42,7 @@ pyarrow = "*"
 cufflinks = "*"
 tables = "*"
 nb-black = "*"
+mypy = "*"
 
 [requires]
 python_version = "3.7"

From d073642ea913dfef6d05782ad9d2d5d08c1bafb5 Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 4 Oct 2019 15:35:35 -0300
Subject: [PATCH 26/31] refactoring

---
 src/arche/tools/api.py       | 2 +-
 src/arche/tools/bitbucket.py | 9 ++++-----
 tox.ini                      | 5 -----
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index f97d210..748ad3f 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -145,7 +145,7 @@ def get_items_with_pool(
     """
     active_connections_limit = 10
     processes_count: int = int(
-        min(max(helpers.cpus_count(), workers), active_connections_limit) or 0
+        min(max(helpers.cpus_count() or 0, workers), active_connections_limit)
     )
     batch_size = math.ceil(count / processes_count)
 
diff --git a/src/arche/tools/bitbucket.py b/src/arche/tools/bitbucket.py
index 117db05..03c86ab 100644
--- a/src/arche/tools/bitbucket.py
+++ b/src/arche/tools/bitbucket.py
@@ -22,11 +22,10 @@ def prepare_request(url: str) -> urllib.request.Request:
 
 def convert_to_api_url(url: str, netloc: str, api_netloc: str) -> str:
     """Support both regular and raw URLs"""
-    try:
-        user, repo, path = re.search(  # type: ignore
-            f"https://{netloc}/(.*?)/(.*?)/(?:raw|src)/(.*)", url
-        ).groups()
-    except AttributeError:
+    match = re.search(f"https://{netloc}/(.*?)/(.*?)/(?:raw|src)/(.*)", url)
+    if match:
+        user, repo, path = match.groups()
+    else:
         raise ValueError("Not a valid bitbucket URL: {url}")
     return f"https://{api_netloc}/2.0/repositories/{user}/{repo}/src/{path}"
 
diff --git a/tox.ini b/tox.ini
index 5aa06e9..256cfff 100755
--- a/tox.ini
+++ b/tox.ini
@@ -27,11 +27,6 @@ deps =
     mypy
 commands = mypy --ignore-missing-imports src/arche tests
 
-[mypy]
-deps =
-    mypy
-commands = mypy src/arche
-
 [flake8]
 select = C,E,F,W,I,D,B,B9
 ignore = W503, E741, E501, E203, I101

From d42dbe1fd5a33a3e6758d1a77cbd62a947ab735c Mon Sep 17 00:00:00 2001
From: Anderson Berg <andersonberg@gmail.com>
Date: Fri, 4 Oct 2019 15:48:42 -0300
Subject: [PATCH 27/31] refactor typing

---
 src/arche/readers/schema.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index fd48cdd..222ac3a 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import json
 import pprint
-from typing import Dict, List, Union, cast, Any, ItemsView, Set
+from typing import Dict, List, Union, cast, Any, ItemsView, Set, DefaultDict
 
 from arche.tools import s3
 import perfect_jsonschema
@@ -54,20 +54,20 @@ def get_enums(self) -> List[str]:
 
     @staticmethod
     def get_tags(schema: RawSchema) -> TaggedFields:
-        tagged_fields: Dict[str, List[str]] = defaultdict(list)
-        # schema["properties"].items() has type:
-        # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
-        properties = cast(ItemsView[str, Dict[str, Any]], schema["properties"].items())
-        for key, value in properties:
-            property_tags = value.get("tag")
-            if property_tags:
-                tagged_fields = Schema.get_field_tags(property_tags, key, tagged_fields)
-        return tagged_fields
+        tagged_fields: DefaultDict[str, List[str]] = defaultdict(list)
+        for key, value in schema["properties"].items():
+            if isinstance(value, Dict):
+                property_tags = value.get("tag")
+                if property_tags:
+                    tagged_fields = Schema.get_field_tags(
+                        property_tags, key, tagged_fields
+                    )
+        return dict(tagged_fields)
 
     @classmethod
     def get_field_tags(
-        cls, tags: Set[Any], field: str, tagged_fields: Dict
-    ) -> TaggedFields:
+        cls, tags: Set[Any], field: str, tagged_fields: DefaultDict
+    ) -> DefaultDict[str, List[str]]:
         tags = cls.parse_tag(tags)
         if not tags:
             raise ValueError(

From 0f3145f9550b3d4582f2ddc95753578779b5f093 Mon Sep 17 00:00:00 2001
From: manycoding <manycoding@users.noreply.github.com>
Date: Fri, 4 Oct 2019 16:08:10 -0300
Subject: [PATCH 28/31] Add to travis, fix request import in mypy 0.730

---
 .travis.yml                  | 2 +-
 src/arche/tools/bitbucket.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9b654e9..7e59224 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ install:
   - pip install tox-travis pip -U --no-cache-dir
 script:
   - tox
-  - tox -e docs
+  - tox -e pep8, mypy, docs
 after_success:
   - tox -e codecov
 deploy:
diff --git a/src/arche/tools/bitbucket.py b/src/arche/tools/bitbucket.py
index 03c86ab..90ffba2 100644
--- a/src/arche/tools/bitbucket.py
+++ b/src/arche/tools/bitbucket.py
@@ -2,7 +2,7 @@
 import os
 import re
 from typing import Dict
-import urllib
+from urllib.request import Request
 
 
 NETLOC = os.getenv("BITBUCKET_NETLOC") or "bitbucket.org"
@@ -11,13 +11,13 @@
 PASS = os.getenv("BITBUCKET_PASSWORD")
 
 
-def prepare_request(url: str) -> urllib.request.Request:
+def prepare_request(url: str) -> Request:
     if not USER or not PASS:
         msg = "Credentials not found: `BITBUCKET_USER` or `BITBUCKET_PASSWORD` not set."
         raise ValueError(msg)
 
     api_url = convert_to_api_url(url, NETLOC, API_NETLOC)
-    return urllib.request.Request(api_url, headers=get_auth_header(USER, PASS))
+    return Request(api_url, headers=get_auth_header(USER, PASS))
 
 
 def convert_to_api_url(url: str, netloc: str, api_netloc: str) -> str:

From e9b66048d7249b4d6ed1eda22f64c9b8d1856a1c Mon Sep 17 00:00:00 2001
From: manycoding <manycoding@users.noreply.github.com>
Date: Fri, 4 Oct 2019 16:13:31 -0300
Subject: [PATCH 29/31] Remove redundant casting

---
 src/arche/tools/api.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/arche/tools/api.py b/src/arche/tools/api.py
index 748ad3f..db19984 100755
--- a/src/arche/tools/api.py
+++ b/src/arche/tools/api.py
@@ -144,9 +144,10 @@ def get_items_with_pool(
         A numpy array of items
     """
     active_connections_limit = 10
-    processes_count: int = int(
-        min(max(helpers.cpus_count() or 0, workers), active_connections_limit)
+    processes_count: int = min(
+        max(helpers.cpus_count() or 0, workers), active_connections_limit
     )
+
     batch_size = math.ceil(count / processes_count)
 
     start_idxs = range(start_index, start_index + count, batch_size)

From 6771ab8a2749058d94f9da28f4494d45cebc0c0b Mon Sep 17 00:00:00 2001
From: manycoding <manycoding@users.noreply.github.com>
Date: Fri, 4 Oct 2019 16:19:10 -0300
Subject: [PATCH 30/31] Another redundant casting

---
 src/arche/readers/schema.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/arche/readers/schema.py b/src/arche/readers/schema.py
index 222ac3a..7d78bb6 100755
--- a/src/arche/readers/schema.py
+++ b/src/arche/readers/schema.py
@@ -2,7 +2,7 @@
 from enum import Enum
 import json
 import pprint
-from typing import Dict, List, Union, cast, Any, ItemsView, Set, DefaultDict
+from typing import Dict, List, Union, Any, Set, DefaultDict
 
 from arche.tools import s3
 import perfect_jsonschema
@@ -41,14 +41,9 @@ def __repr__(self):
         return pprint.pformat(self.raw)
 
     def get_enums(self) -> List[str]:
-        enums = []
-        # self.raw["properties"].items() has type:
-        # ItemsView[str, Union[str, bool, int, float, None, list[Any]]]
-        properties = cast(
-            ItemsView[str, Dict[str, Any]], self.raw["properties"].items()
-        )
-        for k, v in properties:
-            if "enum" in v.keys():
+        enums: List[str] = []
+        for k, v in self.raw["properties"].items():
+            if isinstance(v, Dict) and "enum" in v.keys():
                 enums.append(k)
         return enums
 

From 561c2ca7d8025474c68ab0a84091692e97dacaa3 Mon Sep 17 00:00:00 2001
From: manycoding <manycoding@users.noreply.github.com>
Date: Fri, 4 Oct 2019 16:23:59 -0300
Subject: [PATCH 31/31] Spaces are bad

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7e59224..0b68ddb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ install:
   - pip install tox-travis pip -U --no-cache-dir
 script:
   - tox
-  - tox -e pep8, mypy, docs
+  - tox -e pep8,mypy,docs
 after_success:
   - tox -e codecov
 deploy: