-
Notifications
You must be signed in to change notification settings - Fork 435
/
Copy pathdatabase.py
2571 lines (2308 loc) · 91.3 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (C) 2010-2015 Cuckoo Foundation.
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.
# https://blog.miguelgrinberg.com/post/what-s-new-in-sqlalchemy-2-0
# https://docs.sqlalchemy.org/en/20/changelog/migration_20.html#
import hashlib
import json
import logging
import os
import sys
from contextlib import suppress
from datetime import datetime, timedelta
from typing import Any, List, Optional, Union, cast
# Sflock does a good filetype recon
from sflock.abstracts import File as SflockFile
from sflock.ident import identify as sflock_identify
from lib.cuckoo.common.cape_utils import static_config_lookup, static_extraction
from lib.cuckoo.common.colors import red
from lib.cuckoo.common.config import Config
from lib.cuckoo.common.constants import CUCKOO_ROOT
from lib.cuckoo.common.demux import demux_sample
from lib.cuckoo.common.exceptions import (
CuckooDatabaseError,
CuckooDatabaseInitializationError,
CuckooDependencyError,
CuckooOperationalError,
CuckooUnserviceableTaskError,
)
from lib.cuckoo.common.integrations.parse_pe import PortableExecutable
from lib.cuckoo.common.objects import PCAP, URL, File, Static
from lib.cuckoo.common.path_utils import path_delete, path_exists
from lib.cuckoo.common.utils import bytes2str, create_folder, get_options
try:
from sqlalchemy import (
Boolean,
Column,
DateTime,
Enum,
ForeignKey,
Index,
Integer,
String,
Table,
Text,
create_engine,
event,
func,
not_,
select,
)
from sqlalchemy.exc import IntegrityError, SQLAlchemyError
from sqlalchemy.orm import Query, backref, declarative_base, joinedload, relationship, scoped_session, sessionmaker
Base = declarative_base()
except ImportError: # pragma: no cover
raise CuckooDependencyError("Unable to import sqlalchemy (install with `poetry run pip install sqlalchemy`)")
sandbox_packages = (
"access",
"archive",
"nsis",
"cpl",
"reg",
"regsvr",
"dll",
"exe",
"pdf",
"pub",
"doc",
"xls",
"ppt",
"jar",
"zip",
"rar",
"swf",
"python",
"msi",
"msix",
"ps1",
"msg",
"eml",
"js",
"html",
"hta",
"xps",
"wsf",
"mht",
"doc",
"vbs",
"lnk",
"chm",
"hwp",
"inp",
"vbs",
"js",
"vbejse",
"msbuild",
"sct",
"xslt",
"shellcode",
"shellcode_x64",
"generic",
"iso",
"vhd",
"udf",
"one",
"inf",
)
log = logging.getLogger(__name__)
conf = Config("cuckoo")
repconf = Config("reporting")
distconf = Config("distributed")
web_conf = Config("web")
LINUX_ENABLED = web_conf.linux.enabled
LINUX_STATIC = web_conf.linux.static_only
DYNAMIC_ARCH_DETERMINATION = web_conf.general.dynamic_arch_determination
if repconf.mongodb.enabled:
from dev_utils.mongodb import mongo_find
if repconf.elasticsearchdb.enabled:
from dev_utils.elasticsearchdb import elastic_handler, get_analysis_index
es = elastic_handler
SCHEMA_VERSION = "c2bd0eb5e69d"
TASK_BANNED = "banned"
TASK_PENDING = "pending"
TASK_RUNNING = "running"
TASK_DISTRIBUTED = "distributed"
TASK_COMPLETED = "completed"
TASK_RECOVERED = "recovered"
TASK_REPORTED = "reported"
TASK_FAILED_ANALYSIS = "failed_analysis"
TASK_FAILED_PROCESSING = "failed_processing"
TASK_FAILED_REPORTING = "failed_reporting"
TASK_DISTRIBUTED_COMPLETED = "distributed_completed"
ALL_DB_STATUSES = (
TASK_BANNED,
TASK_PENDING,
TASK_RUNNING,
TASK_DISTRIBUTED,
TASK_COMPLETED,
TASK_RECOVERED,
TASK_REPORTED,
TASK_FAILED_ANALYSIS,
TASK_FAILED_PROCESSING,
TASK_FAILED_REPORTING,
TASK_DISTRIBUTED_COMPLETED,
)
MACHINE_RUNNING = "running"
# Secondary table used in association Machine - Tag.
machines_tags = Table(
"machines_tags",
Base.metadata,
Column("machine_id", Integer, ForeignKey("machines.id")),
Column("tag_id", Integer, ForeignKey("tags.id")),
)
# Secondary table used in association Task - Tag.
tasks_tags = Table(
"tasks_tags",
Base.metadata,
Column("task_id", Integer, ForeignKey("tasks.id", ondelete="cascade")),
Column("tag_id", Integer, ForeignKey("tags.id", ondelete="cascade")),
)
def get_count(q, property):
count_q = q.statement.with_only_columns(func.count(property)).order_by(None)
count = q.session.execute(count_q).scalar()
return count
class Machine(Base):
"""Configured virtual machines to be used as guests."""
__tablename__ = "machines"
id = Column(Integer(), primary_key=True)
name = Column(String(255), nullable=False, unique=True)
label = Column(String(255), nullable=False, unique=True)
arch = Column(String(255), nullable=False)
ip = Column(String(255), nullable=False)
platform = Column(String(255), nullable=False)
tags = relationship("Tag", secondary=machines_tags, backref=backref("machines")) # lazy="subquery"
interface = Column(String(255), nullable=True)
snapshot = Column(String(255), nullable=True)
locked = Column(Boolean(), nullable=False, default=False)
locked_changed_on = Column(DateTime(timezone=False), nullable=True)
status = Column(String(255), nullable=True)
status_changed_on = Column(DateTime(timezone=False), nullable=True)
resultserver_ip = Column(String(255), nullable=False)
resultserver_port = Column(String(255), nullable=False)
reserved = Column(Boolean(), nullable=False, default=False)
def __repr__(self):
return f"<Machine({self.id},'{self.name}')>"
def to_dict(self):
"""Converts object to dict.
@return: dict
"""
d = {}
for column in self.__table__.columns:
value = getattr(self, column.name)
if isinstance(value, datetime):
d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S")
else:
d[column.name] = value
# Tags are a relation so no column to iterate.
d["tags"] = [tag.name for tag in self.tags]
return d
def to_json(self):
"""Converts object to JSON.
@return: JSON data
"""
return json.dumps(self.to_dict())
def __init__(self, name, label, arch, ip, platform, interface, snapshot, resultserver_ip, resultserver_port, reserved):
self.name = name
self.label = label
self.arch = arch
self.ip = ip
self.platform = platform
self.interface = interface
self.snapshot = snapshot
self.resultserver_ip = resultserver_ip
self.resultserver_port = resultserver_port
self.reserved = reserved
class Tag(Base):
"""Tag describing anything you want."""
__tablename__ = "tags"
id = Column(Integer(), primary_key=True)
name = Column(String(255), nullable=False, unique=True)
def __repr__(self):
return f"<Tag({self.id},'{self.name}')>"
def __init__(self, name):
self.name = name
class Guest(Base):
"""Tracks guest run."""
__tablename__ = "guests"
id = Column(Integer(), primary_key=True)
status = Column(String(16), nullable=False)
name = Column(String(255), nullable=False)
label = Column(String(255), nullable=False)
platform = Column(String(255), nullable=False)
manager = Column(String(255), nullable=False)
started_on = Column(DateTime(timezone=False), default=datetime.now, nullable=False)
shutdown_on = Column(DateTime(timezone=False), nullable=True)
task_id = Column(Integer, ForeignKey("tasks.id", ondelete="cascade"), nullable=False, unique=True)
def __repr__(self):
return f"<Guest({self.id}, '{self.name}')>"
def to_dict(self):
"""Converts object to dict.
@return: dict
"""
d = {}
for column in self.__table__.columns:
value = getattr(self, column.name)
if isinstance(value, datetime):
d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S")
else:
d[column.name] = value
return d
def to_json(self):
"""Converts object to JSON.
@return: JSON data
"""
return json.dumps(self.to_dict())
def __init__(self, name, label, platform, manager, task_id):
self.name = name
self.label = label
self.platform = platform
self.manager = manager
self.task_id = task_id
class Sample(Base):
"""Submitted files details."""
__tablename__ = "samples"
id = Column(Integer(), primary_key=True)
file_size = Column(Integer(), nullable=False)
file_type = Column(Text(), nullable=False)
md5 = Column(String(32), nullable=False)
crc32 = Column(String(8), nullable=False)
sha1 = Column(String(40), nullable=False)
sha256 = Column(String(64), nullable=False)
sha512 = Column(String(128), nullable=False)
ssdeep = Column(String(255), nullable=True)
parent = Column(Integer(), nullable=True)
source_url = Column(String(2000), nullable=True)
__table_args__ = (
Index("md5_index", "md5"),
Index("sha1_index", "sha1"),
Index("sha256_index", "sha256", unique=True),
)
def __repr__(self):
return f"<Sample({self.id},'{self.sha256}')>"
def to_dict(self):
"""Converts object to dict.
@return: dict
"""
d = {}
for column in self.__table__.columns:
d[column.name] = getattr(self, column.name)
return d
def to_json(self):
"""Converts object to JSON.
@return: JSON data
"""
return json.dumps(self.to_dict())
def __init__(self, md5, crc32, sha1, sha256, sha512, file_size, file_type=None, ssdeep=None, parent=None, source_url=None):
self.md5 = md5
self.sha1 = sha1
self.crc32 = crc32
self.sha256 = sha256
self.sha512 = sha512
self.file_size = file_size
if file_type:
self.file_type = file_type
if ssdeep:
self.ssdeep = ssdeep
if parent:
self.parent = parent
if source_url:
self.source_url = source_url
class Error(Base):
"""Analysis errors."""
__tablename__ = "errors"
MAX_LENGTH = 1024
id = Column(Integer(), primary_key=True)
message = Column(String(MAX_LENGTH), nullable=False)
task_id = Column(Integer, ForeignKey("tasks.id"), nullable=False)
def to_dict(self):
"""Converts object to dict.
@return: dict
"""
d = {}
for column in self.__table__.columns:
d[column.name] = getattr(self, column.name)
return d
def to_json(self):
"""Converts object to JSON.
@return: JSON data
"""
return json.dumps(self.to_dict())
def __init__(self, message, task_id):
if len(message) > self.MAX_LENGTH:
# Make sure that we don't try to insert an error message longer than what's allowed
# in the database. Provide the beginning and the end of the error.
left_of_ellipses = self.MAX_LENGTH // 2 - 2
right_of_ellipses = self.MAX_LENGTH - left_of_ellipses - 3
message = "...".join((message[:left_of_ellipses], message[-right_of_ellipses:]))
self.message = message
self.task_id = task_id
def __repr__(self):
return f"<Error({self.id},'{self.message}','{self.task_id}')>"
class Task(Base):
"""Analysis task queue."""
__tablename__ = "tasks"
id = Column(Integer(), primary_key=True)
target = Column(Text(), nullable=False)
category = Column(String(255), nullable=False)
cape = Column(String(2048), nullable=True)
timeout = Column(Integer(), server_default="0", nullable=False)
priority = Column(Integer(), server_default="1", nullable=False)
custom = Column(String(255), nullable=True)
machine = Column(String(255), nullable=True)
package = Column(String(255), nullable=True)
route = Column(String(128), nullable=True, default=False)
# Task tags
tags_tasks = Column(String(256), nullable=True)
# Virtual machine tags
tags = relationship("Tag", secondary=tasks_tags, backref=backref("tasks"), lazy="subquery")
options = Column(Text(), nullable=True)
platform = Column(String(255), nullable=True)
memory = Column(Boolean, nullable=False, default=False)
enforce_timeout = Column(Boolean, nullable=False, default=False)
clock = Column(DateTime(timezone=False), default=datetime.now(), nullable=False)
added_on = Column(DateTime(timezone=False), default=datetime.now, nullable=False)
started_on = Column(DateTime(timezone=False), nullable=True)
completed_on = Column(DateTime(timezone=False), nullable=True)
status = Column(
Enum(
TASK_BANNED,
TASK_PENDING,
TASK_RUNNING,
TASK_COMPLETED,
TASK_DISTRIBUTED,
TASK_REPORTED,
TASK_RECOVERED,
TASK_FAILED_ANALYSIS,
TASK_FAILED_PROCESSING,
TASK_FAILED_REPORTING,
name="status_type",
),
server_default=TASK_PENDING,
nullable=False,
)
# Statistics data to identify broken Cuckoos servers or VMs
# Also for doing profiling to improve speed
dropped_files = Column(Integer(), nullable=True)
running_processes = Column(Integer(), nullable=True)
api_calls = Column(Integer(), nullable=True)
domains = Column(Integer(), nullable=True)
signatures_total = Column(Integer(), nullable=True)
signatures_alert = Column(Integer(), nullable=True)
files_written = Column(Integer(), nullable=True)
registry_keys_modified = Column(Integer(), nullable=True)
crash_issues = Column(Integer(), nullable=True)
anti_issues = Column(Integer(), nullable=True)
analysis_started_on = Column(DateTime(timezone=False), nullable=True)
analysis_finished_on = Column(DateTime(timezone=False), nullable=True)
processing_started_on = Column(DateTime(timezone=False), nullable=True)
processing_finished_on = Column(DateTime(timezone=False), nullable=True)
signatures_started_on = Column(DateTime(timezone=False), nullable=True)
signatures_finished_on = Column(DateTime(timezone=False), nullable=True)
reporting_started_on = Column(DateTime(timezone=False), nullable=True)
reporting_finished_on = Column(DateTime(timezone=False), nullable=True)
timedout = Column(Boolean, nullable=False, default=False)
sample_id = Column(Integer, ForeignKey("samples.id"), nullable=True)
sample = relationship("Sample", backref=backref("tasks", lazy="subquery"))
machine_id = Column(Integer, nullable=True)
guest = relationship("Guest", uselist=False, backref=backref("tasks"), cascade="save-update, delete")
errors = relationship("Error", backref=backref("tasks"), cascade="save-update, delete")
shrike_url = Column(String(4096), nullable=True)
shrike_refer = Column(String(4096), nullable=True)
shrike_msg = Column(String(4096), nullable=True)
shrike_sid = Column(Integer(), nullable=True)
# To be removed - Deprecate soon, not used anymore
parent_id = Column(Integer(), nullable=True)
tlp = Column(String(255), nullable=True)
user_id = Column(Integer(), nullable=True)
username = Column(String(256), nullable=True)
__table_args__ = (
Index("category_index", "category"),
Index("status_index", "status"),
Index("added_on_index", "added_on"),
Index("completed_on_index", "completed_on"),
)
def to_dict(self):
"""Converts object to dict.
@return: dict
"""
d = {}
for column in self.__table__.columns:
value = getattr(self, column.name)
if isinstance(value, datetime):
d[column.name] = value.strftime("%Y-%m-%d %H:%M:%S")
else:
d[column.name] = value
# Tags are a relation so no column to iterate.
d["tags"] = [tag.name for tag in self.tags]
return d
def to_json(self):
"""Converts object to JSON.
@return: JSON data
"""
return json.dumps(self.to_dict())
def __init__(self, target=None):
self.target = target
def __repr__(self):
return f"<Task({self.id},'{self.target}')>"
class AlembicVersion(Base):
"""Table used to pinpoint actual database schema release."""
__tablename__ = "alembic_version"
version_num = Column(String(32), nullable=False, primary_key=True)
class _Database:
"""Analysis queue database.
This class handles the creation of the database user for internal queue
management. It also provides some functions for interacting with it.
"""
def __init__(self, dsn=None, schema_check=True):
"""@param dsn: database connection string.
@param schema_check: disable or enable the db schema version check
"""
self.cfg = conf
if dsn:
self._connect_database(dsn)
elif self.cfg.database.connection:
self._connect_database(self.cfg.database.connection)
else:
file_path = os.path.join(CUCKOO_ROOT, "db", "cuckoo.db")
if not path_exists(file_path): # pragma: no cover
db_dir = os.path.dirname(file_path)
if not path_exists(db_dir):
try:
create_folder(folder=db_dir)
except CuckooOperationalError as e:
raise CuckooDatabaseError(f"Unable to create database directory: {e}")
self._connect_database(f"sqlite:///{file_path}")
# Disable SQL logging. Turn it on for debugging.
self.engine.echo = self.cfg.database.log_statements
# Connection timeout.
if self.cfg.database.timeout:
self.engine.pool_timeout = self.cfg.database.timeout
else:
self.engine.pool_timeout = 60
# Create schema.
try:
Base.metadata.create_all(self.engine)
except SQLAlchemyError as e: # pragma: no cover
raise CuckooDatabaseError(f"Unable to create or connect to database: {e}")
# Get db session.
self.session = scoped_session(sessionmaker(bind=self.engine, expire_on_commit=False))
# There should be a better way to clean up orphans. This runs after every flush, which is crazy.
@event.listens_for(self.session, "after_flush")
def delete_tag_orphans(session, ctx):
session.query(Tag).filter(~Tag.tasks.any()).filter(~Tag.machines.any()).delete(synchronize_session=False)
# Deal with schema versioning.
# TODO: it's a little bit dirty, needs refactoring.
with self.session() as tmp_session:
last = tmp_session.query(AlembicVersion).first()
if last is None:
# Set database schema version.
tmp_session.add(AlembicVersion(version_num=SCHEMA_VERSION))
try:
tmp_session.commit()
except SQLAlchemyError as e: # pragma: no cover
tmp_session.rollback()
raise CuckooDatabaseError(f"Unable to set schema version: {e}")
else:
# Check if db version is the expected one.
if last.version_num != SCHEMA_VERSION and schema_check: # pragma: no cover
print(
f"DB schema version mismatch: found {last.version_num}, expected {SCHEMA_VERSION}. Try to apply all migrations"
)
print(red("cd utils/db_migration/ && poetry run alembic upgrade head"))
sys.exit()
def __del__(self):
"""Disconnects pool."""
with suppress(KeyError, AttributeError):
self.engine.dispose()
def _connect_database(self, connection_string):
"""Connect to a Database.
@param connection_string: Connection string specifying the database
"""
try:
# TODO: this is quite ugly, should improve.
if connection_string.startswith("sqlite"):
# Using "check_same_thread" to disable sqlite safety check on multiple threads.
self.engine = create_engine(connection_string, connect_args={"check_same_thread": False})
elif connection_string.startswith("postgres"):
# Disabling SSL mode to avoid some errors using sqlalchemy and multiprocesing.
# See: http://www.postgresql.org/docs/9.0/static/libpq-ssl.html#LIBPQ-SSL-SSLMODE-STATEMENTS
self.engine = create_engine(
connection_string, connect_args={"sslmode": self.cfg.database.psql_ssl_mode}, pool_pre_ping=True
)
else:
self.engine = create_engine(connection_string)
except ImportError as e: # pragma: no cover
lib = e.message.rsplit(maxsplit=1)[-1]
raise CuckooDependencyError(f"Missing database driver, unable to import {lib} (install with `pip install {lib}`)")
def _get_or_create(self, model, **kwargs):
"""Get an ORM instance or create it if not exist.
@param session: SQLAlchemy session object
@param model: model to query
@return: row instance
"""
instance = self.session.query(model).filter_by(**kwargs).first()
if instance:
return instance
else:
instance = model(**kwargs)
self.session.add(instance)
return instance
def drop(self):
"""Drop all tables."""
try:
Base.metadata.drop_all(self.engine)
except SQLAlchemyError as e:
raise CuckooDatabaseError(f"Unable to create or connect to database: {e}")
def clean_machines(self):
"""Clean old stored machines and related tables."""
# Secondary table.
# TODO: this is better done via cascade delete.
# self.engine.execute(machines_tags.delete())
self.session.execute(machines_tags.delete())
self.session.query(Machine).delete()
def delete_machine(self, name) -> bool:
"""Delete a single machine entry from DB."""
machine = self.session.query(Machine).filter_by(name=name).first()
if machine:
self.session.delete(machine)
return True
else:
log.warning("%s does not exist in the database.", name)
return False
def add_machine(
self, name, label, arch, ip, platform, tags, interface, snapshot, resultserver_ip, resultserver_port, reserved, locked=False
) -> Machine:
"""Add a guest machine.
@param name: machine id
@param label: machine label
@param arch: machine arch
@param ip: machine IP address
@param platform: machine supported platform
@param tags: list of comma separated tags
@param interface: sniffing interface for this machine
@param snapshot: snapshot name to use instead of the current one, if configured
@param resultserver_ip: IP address of the Result Server
@param resultserver_port: port of the Result Server
@param reserved: True if the machine can only be used when specifically requested
"""
machine = Machine(
name=name,
label=label,
arch=arch,
ip=ip,
platform=platform,
interface=interface,
snapshot=snapshot,
resultserver_ip=resultserver_ip,
resultserver_port=resultserver_port,
reserved=reserved,
)
# Deal with tags format (i.e., foo,bar,baz)
if tags:
for tag in tags.replace(" ", "").split(","):
machine.tags.append(self._get_or_create(Tag, name=tag))
if locked:
machine.locked = True
self.session.add(machine)
return machine
def set_machine_interface(self, label, interface):
machine = self.session.query(Machine).filter_by(label=label).first()
if machine is None:
log.debug("Database error setting interface: %s not found", label)
return
machine.interface = interface
return machine
def set_vnc_port(self, task_id: int, port: int):
task = self.session.query(Task).filter_by(id=task_id).first()
if task is None:
log.debug("Database error setting VPN port: For task %s", task_id)
return
if task.options:
task.options += f",vnc_port={port}"
else:
task.options = f"vnc_port={port}"
def update_clock(self, task_id):
row = self.session.get(Task, task_id)
if not row:
return
if row.clock == datetime.utcfromtimestamp(0):
if row.category == "file":
row.clock = datetime.utcnow() + timedelta(days=self.cfg.cuckoo.daydelta)
else:
row.clock = datetime.utcnow()
return row.clock
def set_task_status(self, task: Task, status) -> Task:
if status != TASK_DISTRIBUTED_COMPLETED:
task.status = status
if status in (TASK_RUNNING, TASK_DISTRIBUTED):
task.started_on = datetime.now()
elif status in (TASK_COMPLETED, TASK_DISTRIBUTED_COMPLETED):
task.completed_on = datetime.now()
self.session.add(task)
return task
def set_status(self, task_id: int, status) -> Optional[Task]:
"""Set task status.
@param task_id: task identifier
@param status: status string
@return: operation status
"""
task = self.session.get(Task, task_id)
if not task:
return None
return self.set_task_status(task, status)
def create_guest(self, machine: Machine, manager: str, task: Task) -> Guest:
guest = Guest(machine.name, machine.label, machine.platform, manager, task.id)
guest.status = "init"
self.session.add(guest)
return guest
def _package_vm_requires_check(self, package: str) -> list:
"""
We allow to users use their custom tags to tag properly any VM that can run this package
"""
return [vm_tag.strip() for vm_tag in web_conf.packages.get(package).split(",")] if web_conf.packages.get(package) else []
def _task_arch_tags_helper(self, task: Task):
# Are there available machines that match up with a task?
task_archs = [tag.name for tag in task.tags if tag.name in ("x86", "x64")]
task_tags = [tag.name for tag in task.tags if tag.name not in task_archs]
return task_archs, task_tags
def find_machine_to_service_task(self, task: Task) -> Optional[Machine]:
"""Find a machine that is able to service the given task.
Returns: The Machine if an available machine was found; None if there is at least 1 machine
that *could* service it, but they are all currently in use.
Raises: CuckooUnserviceableTaskError if there are no machines in the pool that would be able
to service it.
"""
task_archs, task_tags = self._task_arch_tags_helper(task)
os_version = self._package_vm_requires_check(task.package)
def get_first_machine(query: Query) -> Optional[Machine]:
# Select for update a machine, preferring one that is available and was the one that was used the
# longest time ago. This will give us a machine that can get locked or, if there are none that are
# currently available, we'll at least know that the task is serviceable.
return cast(
Optional[Machine], query.order_by(Machine.locked, Machine.locked_changed_on).with_for_update(of=Machine).first()
)
machines = self.session.query(Machine).options(joinedload(Machine.tags))
filter_kwargs = {
"machines": machines,
"label": task.machine,
"platform": task.platform,
"tags": task_tags,
"archs": task_archs,
"os_version": os_version,
}
filtered_machines = self.filter_machines_to_task(include_reserved=False, **filter_kwargs)
machine = get_first_machine(filtered_machines)
if machine is None and not task.machine and task_tags:
# The task was given at least 1 tag, but there are no non-reserved machines
# that could satisfy the request. So let's see if there are any "reserved"
# machines that can satisfy it.
filtered_machines = self.filter_machines_to_task(include_reserved=True, **filter_kwargs)
machine = get_first_machine(filtered_machines)
if machine is None:
raise CuckooUnserviceableTaskError
if machine.locked:
# There aren't any machines that can service the task NOW, but there is at least one in the pool
# that could service it once it's available.
return None
return machine
def fetch_task(self, categories: list = None):
"""Fetches a task waiting to be processed and locks it for running.
@return: None or task
"""
row = (
self.session.query(Task)
.filter_by(status=TASK_PENDING)
.order_by(Task.priority.desc(), Task.added_on)
# distributed cape
.filter(not_(Task.options.contains("node=")))
)
if categories:
row = row.filter(Task.category.in_(categories))
row = row.first()
if not row:
return None
self.set_status(task_id=row.id, status=TASK_RUNNING)
return row
def guest_get_status(self, task_id):
"""Log guest start.
@param task_id: task id
@return: guest status
"""
guest = self.session.query(Guest).filter_by(task_id=task_id).first()
return guest.status if guest else None
def guest_set_status(self, task_id, status):
"""Log guest start.
@param task_id: task identifier
@param status: status
"""
guest = self.session.query(Guest).filter_by(task_id=task_id).first()
if guest is not None:
guest.status = status
def guest_remove(self, guest_id):
"""Removes a guest start entry."""
guest = self.session.get(Guest, guest_id)
if guest:
self.session.delete(guest)
def guest_stop(self, guest_id):
"""Logs guest stop.
@param guest_id: guest log entry id
"""
guest = self.session.get(Guest, guest_id)
if guest:
guest.shutdown_on = datetime.now()
@staticmethod
def filter_machines_by_arch(machines, arch):
"""Add a filter to the given query for the architecture of the machines.
Allow x64 machines to be returned when requesting x86.
"""
if arch:
if "x86" in arch:
# Prefer x86 machines over x64 if x86 is what was requested.
machines = machines.filter(Machine.arch.in_(("x64", "x86"))).order_by(Machine.arch.desc())
else:
machines = machines.filter(Machine.arch.in_(arch))
return machines
def filter_machines_to_task(
self, machines: Query, label=None, platform=None, tags=None, archs=None, os_version=None, include_reserved=False
) -> Query:
"""Add filters to the given query based on the task
@param machines: Query object for the machines
@param label: label of the machine(s) expected for the task
@param platform: platform of the machine(s) expected for the task
@param tags: tags of the machine(s) expected for the task
@param archs: architectures of the machine(s) expected for the task
@param os_version: Version of the OSs of the machine(s) expected for the task
@param include_reserved: Flag to indicate if the list of machines returned should include reserved machines
@return: list of machines after filtering the inputed one
"""
if label:
machines = machines.filter_by(label=label)
elif not include_reserved:
machines = machines.filter_by(reserved=False)
if platform:
machines = machines.filter_by(platform=platform)
machines = self.filter_machines_by_arch(machines, archs)
if tags:
for tag in tags:
machines = machines.filter(Machine.tags.any(name=tag))
if os_version:
machines = machines.filter(Machine.tags.any(Tag.name.in_(os_version)))
return machines
def list_machines(
self,
locked=None,
label=None,
platform=None,
tags=None,
arch=None,
include_reserved=False,
os_version=None,
) -> List[Machine]:
"""Lists virtual machines.
@return: list of virtual machines
"""
"""
id | name | label | arch |
----+-------+-------+------+
77 | cape1 | win7 | x86 |
78 | cape2 | win10 | x64 |
"""
machines = self.session.query(Machine).options(joinedload(Machine.tags))
if locked is not None and isinstance(locked, bool):
machines = machines.filter_by(locked=locked)
machines = self.filter_machines_to_task(
machines=machines,
label=label,
platform=platform,
tags=tags,
archs=arch,
os_version=os_version,
include_reserved=include_reserved,
)
return machines.all()
def assign_machine_to_task(self, task: Task, machine: Optional[Machine]) -> Task:
if machine:
task.machine = machine.label
task.machine_id = machine.id
else:
task.machine = None
task.machine_id = None
self.session.add(task)
return task
def lock_machine(self, machine: Machine) -> Machine:
"""Places a lock on a free virtual machine.
@param machine: the Machine to lock
@return: locked machine
"""
machine.locked = True
machine.locked_changed_on = datetime.now()
self.set_machine_status(machine, MACHINE_RUNNING)
self.session.add(machine)
return machine
def unlock_machine(self, machine: Machine) -> Machine:
"""Remove lock from a virtual machine.
@param machine: The Machine to unlock.
@return: unlocked machine
"""
machine.locked = False
machine.locked_changed_on = datetime.now()
self.session.merge(machine)
return machine
def count_machines_available(self, label=None, platform=None, tags=None, arch=None, include_reserved=False, os_version=None):
"""How many (relevant) virtual machines are ready for analysis.
@param label: machine ID.
@param platform: machine platform.
@param tags: machine tags
@param arch: machine arch
@param include_reserved: include 'reserved' machines in the result, regardless of whether or not a 'label' was provided.
@return: free virtual machines count
"""
machines = self.session.query(Machine).filter_by(locked=False)
machines = self.filter_machines_to_task(
machines=machines,
label=label,
platform=platform,
tags=tags,
archs=arch,
os_version=os_version,
include_reserved=include_reserved,
)