-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandler_gmail.py
128 lines (115 loc) · 5.16 KB
/
handler_gmail.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from datetime import datetime
import imaplib
import email
from email.header import decode_header
from datetime import datetime
from email.utils import parsedate_to_datetime
import html2text
import pandas as pd
from evadb.third_party.types import DBHandler, DBHandlerResponse, DBHandlerStatus
class GmailHandler(DBHandler):
def __init__(self, name: str, **kwargs):
super().__init__(name)
self.email = kwargs.get("email") # email account
self.password = kwargs.get("password") # Special password
# To connect to gmail account
self.imap_server = "imap.gmail.com"
self.imap_port = 993
def connect(self):
# Tries the connection
try:
# First prepares the email server
self.mail = imaplib.IMAP4_SSL(self.imap_server, self.imap_port)
# Tries to connect to the gmail account
self.mail.login(self.email, self.password)
# Returns if it has been able to connect
return DBHandlerStatus(status=True)
except Exception as e:
# If it has not been able to connect return False
return DBHandlerStatus(status=False, error=str(e))
def disconnect(self):
#Tries to disconnect
try:
# It disconnect
self.mail.logout()
except Exception as e:
pass # Handle logout error if needed
def check_connection(self) -> DBHandlerStatus:
try:
# Checks if is receiving a signal from the mail account
self.mail.noop()
except Exception as e:
return DBHandlerStatus(status=False, error=str(e))
return DBHandlerStatus(status=True)
def get_tables(self) -> DBHandlerResponse:
# Each folder of the gmail account is a table
try:
status, folder_data = self.mail.list()
folder_list = []
if status == "OK":
for folder_info in folder_data:
folder_info_str = folder_info.decode('utf-8')
_, folder_name = folder_info_str.split(' "/" ')
folder_name = folder_name.strip('"')
folder_list.append(folder_name)
return DBHandlerResponse(data=folder_list)
except Exception as e:
return DBHandlerResponse(data=None, error=str(e))
def get_columns(self, table_name: str) -> DBHandlerResponse:
columns = [
"sender",
"receiver",
"day",
"subject",
"message",
]
columns_df = pd.DataFrame(columns, columns=["column_name"])
return DBHandlerResponse(data=columns_df)
def _decode_header(self, header):
# Function to decode the program
decoded, encoding = decode_header(header)[0]
if isinstance(decoded, bytes):
decoded = decoded.decode(encoding or "utf-8")
return decoded
def select(self, mailbox) -> DBHandlerResponse:
try:
self.mail.select(mailbox)
status, messages = self.mail.search(None, "ALL")
if status == "OK":
for num in messages[0].split():
_, msg_data = self.mail.fetch(num, "(RFC822)")
raw_email = msg_data[0][1]
msg = email.message_from_bytes(raw_email)
sender = self._decode_header(msg["From"])
receiver = self._decode_header(msg["To"])
subject = self._decode_header(msg["Subject"])
date_str = self._decode_header(msg["Date"])
date_object = parsedate_to_datetime(date_str)
# Convert date string to datetime object
body = ""
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
if "attachment" not in content_disposition:
payload = part.get_payload(decode=True)
if payload is not None:
# If it's HTML, convert to plain text
if content_type == "text/html":
body += html2text.html2text(payload.decode("utf-8", "ignore"))
else:
body += payload.decode("utf-8", "ignore")
# If it is not multipart it gets easily the body
else:
payload = msg.get_payload(decode=True)
if payload is not None:
body = payload.decode("utf-8", "ignore")
yield DBHandlerResponse(data={
"sender": sender,
"receiver": receiver,
"day": date_object.strftime("%Y-%m-%d"),
"subject": subject,
"message": msg.get_payload(),
})
except Exception as e:
return DBHandlerResponse(data=None, error=str(e))