-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
Copy pathkaggle_api_extended.py
4655 lines (4121 loc) · 178 KB
/
kaggle_api_extended.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python
#
# Copyright 2024 Kaggle Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding=utf-8
from __future__ import print_function
import csv
import io
import os
import shutil
import sys
import tarfile
import tempfile
import time
import zipfile
from os.path import expanduser
from random import random
import bleach
import requests
import urllib3.exceptions as urllib3_exceptions
from requests import RequestException
from kaggle.models.kaggle_models_extended import ResumableUploadResult, File
from requests.adapters import HTTPAdapter
from slugify import slugify
from tqdm import tqdm
from urllib3.util.retry import Retry
from google.protobuf import field_mask_pb2
from kaggle.configuration import Configuration
from kagglesdk import KaggleClient, KaggleEnv
from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, \
ApiStartBlobUploadResponse, ApiBlobType
from kagglesdk.competitions.types.competition_api_service import *
from kagglesdk.datasets.types.dataset_api_service import ApiListDatasetsRequest, \
ApiListDatasetFilesRequest, \
ApiGetDatasetStatusRequest, ApiDownloadDatasetRequest, \
ApiCreateDatasetRequest, ApiCreateDatasetVersionRequestBody, \
ApiCreateDatasetVersionByIdRequest, ApiCreateDatasetVersionRequest, \
ApiDatasetNewFile, ApiUpdateDatasetMetadataRequest, \
ApiGetDatasetMetadataRequest, ApiListDatasetFilesResponse, ApiDatasetFile
from kagglesdk.datasets.types.dataset_enums import DatasetSelectionGroup, \
DatasetSortBy, DatasetFileTypeGroup, DatasetLicenseGroup
from kagglesdk.datasets.types.dataset_types import DatasetSettings, \
SettingsLicense, DatasetCollaborator
from kagglesdk.kernels.types.kernels_api_service import ApiListKernelsRequest, \
ApiListKernelFilesRequest, ApiSaveKernelRequest, ApiGetKernelRequest, \
ApiListKernelSessionOutputRequest, ApiGetKernelSessionStatusRequest
from kagglesdk.kernels.types.kernels_enums import KernelsListSortType, \
KernelsListViewType
from kagglesdk.models.types.model_api_service import ApiListModelsRequest, \
ApiCreateModelRequest, ApiGetModelRequest, ApiDeleteModelRequest, \
ApiUpdateModelRequest, ApiGetModelInstanceRequest, \
ApiCreateModelInstanceRequest, ApiCreateModelInstanceRequestBody, \
ApiListModelInstanceVersionFilesRequest, ApiUpdateModelInstanceRequest, \
ApiDeleteModelInstanceRequest, ApiCreateModelInstanceVersionRequest, \
ApiCreateModelInstanceVersionRequestBody, \
ApiDownloadModelInstanceVersionRequest, ApiDeleteModelInstanceVersionRequest
from kagglesdk.models.types.model_enums import ListModelsOrderBy, \
ModelInstanceType, ModelFramework
from ..models.dataset_column import DatasetColumn
from ..models.upload_file import UploadFile
class DirectoryArchive(object):
def __init__(self, fullpath, format):
self._fullpath = fullpath
self._format = format
self.name = None
self.path = None
def __enter__(self):
self._temp_dir = tempfile.mkdtemp()
_, dir_name = os.path.split(self._fullpath)
self.path = shutil.make_archive(
os.path.join(self._temp_dir, dir_name), self._format, self._fullpath)
_, self.name = os.path.split(self.path)
return self
def __exit__(self, *args):
shutil.rmtree(self._temp_dir)
class ResumableUploadContext(object):
def __init__(self, no_resume=False):
self.no_resume = no_resume
self._temp_dir = os.path.join(tempfile.gettempdir(), '.kaggle/uploads')
self._file_uploads = []
def __enter__(self):
if self.no_resume:
return
self._create_temp_dir()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
if self.no_resume:
return
if exc_type is not None:
# Don't delete the upload file info when there is an error
# to give it a chance to retry/resume on the next invocation.
return
for file_upload in self._file_uploads:
file_upload.cleanup()
def get_upload_info_file_path(self, path):
return os.path.join(
self._temp_dir,
'%s.json' % path.replace(os.path.sep, '_').replace(':', '_'))
def new_resumable_file_upload(self, path, start_blob_upload_request):
file_upload = ResumableFileUpload(path, start_blob_upload_request, self)
self._file_uploads.append(file_upload)
file_upload.load()
return file_upload
def _create_temp_dir(self):
try:
os.makedirs(self._temp_dir)
except FileExistsError:
pass
class ResumableFileUpload(object):
# Reference: https://cloud.google.com/storage/docs/resumable-uploads
# A resumable upload must be completed within a week of being initiated
RESUMABLE_UPLOAD_EXPIRY_SECONDS = 6 * 24 * 3600
def __init__(self, path, start_blob_upload_request, context):
self.path = path
self.start_blob_upload_request = start_blob_upload_request
self.context = context
self.timestamp = int(time.time())
self.start_blob_upload_response = None
self.can_resume = False
self.upload_complete = False
if self.context.no_resume:
return
self._upload_info_file_path = self.context.get_upload_info_file_path(path)
def get_token(self):
if self.upload_complete:
return self.start_blob_upload_response.token
return None
def load(self):
if self.context.no_resume:
return
self._load_previous_if_any()
def _load_previous_if_any(self):
if not os.path.exists(self._upload_info_file_path):
return False
try:
with io.open(self._upload_info_file_path, 'r') as f:
previous = ResumableFileUpload.from_dict(json.load(f), self.context)
if self._is_previous_valid(previous):
self.start_blob_upload_response = previous.start_blob_upload_response
self.timestamp = previous.timestamp
self.can_resume = True
except Exception as e:
print('Error while trying to load upload info:', e)
def _is_previous_valid(self, previous):
return previous.path == self.path and \
previous.start_blob_upload_request == self.start_blob_upload_request and \
previous.timestamp > time.time() - ResumableFileUpload.RESUMABLE_UPLOAD_EXPIRY_SECONDS
def upload_initiated(self, start_blob_upload_response):
if self.context.no_resume:
return
self.start_blob_upload_response = start_blob_upload_response
with io.open(self._upload_info_file_path, 'w') as f:
json.dump(self.to_dict(), f, indent=True)
def upload_completed(self):
if self.context.no_resume:
return
self.upload_complete = True
self._save()
def _save(self):
with io.open(self._upload_info_file_path, 'w') as f:
json.dump(self.to_dict(), f, indent=True)
def cleanup(self):
if self.context.no_resume:
return
try:
os.remove(self._upload_info_file_path)
except OSError:
pass
def to_dict(self):
return {
'path':
self.path,
'start_blob_upload_request':
self.start_blob_upload_request.to_dict(),
'timestamp':
self.timestamp,
'start_blob_upload_response':
self.start_blob_upload_response.to_dict()
if self.start_blob_upload_response is not None else None,
'upload_complete':
self.upload_complete,
}
def from_dict(other, context):
req = ApiStartBlobUploadRequest()
req.from_dict(other['start_blob_upload_request'])
new = ResumableFileUpload(
other['path'],
ApiStartBlobUploadRequest(**other['start_blob_upload_request']),
context)
new.timestamp = other.get('timestamp')
start_blob_upload_response = other.get('start_blob_upload_response')
if start_blob_upload_response is not None:
new.start_blob_upload_response = ApiStartBlobUploadResponse(
**start_blob_upload_response)
new.upload_complete = other.get('upload_complete') or False
return new
def to_str(self):
return str(self.to_dict())
def __repr__(self):
return self.to_str()
class KaggleApi:
__version__ = '1.7.3b0'
CONFIG_NAME_PROXY = 'proxy'
CONFIG_NAME_COMPETITION = 'competition'
CONFIG_NAME_PATH = 'path'
CONFIG_NAME_USER = 'username'
CONFIG_NAME_KEY = 'key'
CONFIG_NAME_SSL_CA_CERT = 'ssl_ca_cert'
HEADER_API_VERSION = 'X-Kaggle-ApiVersion'
DATASET_METADATA_FILE = 'dataset-metadata.json'
OLD_DATASET_METADATA_FILE = 'datapackage.json'
KERNEL_METADATA_FILE = 'kernel-metadata.json'
MODEL_METADATA_FILE = 'model-metadata.json'
MODEL_INSTANCE_METADATA_FILE = 'model-instance-metadata.json'
MAX_NUM_INBOX_FILES_TO_UPLOAD = 1000
MAX_UPLOAD_RESUME_ATTEMPTS = 10
config_dir = os.environ.get('KAGGLE_CONFIG_DIR')
if not config_dir:
config_dir = os.path.join(expanduser('~'), '.kaggle')
# Use ~/.kaggle if it already exists for backwards compatibility,
# otherwise follow XDG base directory specification
if sys.platform.startswith('linux') and not os.path.exists(config_dir):
config_dir = os.path.join((os.environ.get('XDG_CONFIG_HOME') or
os.path.join(expanduser('~'), '.config')),
'kaggle')
if not os.path.exists(config_dir):
os.makedirs(config_dir)
config_file = 'kaggle.json'
config = os.path.join(config_dir, config_file)
config_values = {}
already_printed_version_warning = False
args = {} # DEBUG Add --local to use localhost
if os.environ.get('KAGGLE_API_ENVIRONMENT') == 'LOCALHOST':
args = {'--local'}
# Kernels valid types
valid_push_kernel_types = ['script', 'notebook']
valid_push_language_types = ['python', 'r', 'rmarkdown']
valid_push_pinning_types = ['original', 'latest']
valid_list_languages = ['all', 'python', 'r', 'sqlite', 'julia']
valid_list_kernel_types = ['all', 'script', 'notebook']
valid_list_output_types = ['all', 'visualization', 'data']
valid_list_sort_by = [
'hotness', 'commentCount', 'dateCreated', 'dateRun', 'relevance',
'scoreAscending', 'scoreDescending', 'viewCount', 'voteCount'
]
# Competitions valid types
valid_competition_groups = [
'general', 'entered', 'community', 'hosted', 'unlaunched',
'unlaunched_community'
]
valid_competition_categories = [
'all', 'featured', 'research', 'recruitment', 'gettingStarted', 'masters',
'playground'
]
valid_competition_sort_by = [
'grouped', 'best', 'prize', 'earliestDeadline', 'latestDeadline',
'numberOfTeams', 'relevance', 'recentlyCreated'
]
# Datasets valid types
valid_dataset_file_types = ['all', 'csv', 'sqlite', 'json', 'bigQuery']
valid_dataset_license_names = ['all', 'cc', 'gpl', 'odb', 'other']
valid_dataset_sort_bys = [
'hottest', 'votes', 'updated', 'active', 'published'
]
# Models valid types
valid_model_sort_bys = [
'hotness', 'downloadCount', 'voteCount', 'notebookCount', 'createTime'
]
# Command prefixes that are valid without authentication.
command_prefixes_allowing_anonymous_access = ('datasets download',
'datasets files')
# Attributes
competition_fields = [
'ref', 'deadline', 'category', 'reward', 'teamCount', 'userHasEntered'
]
submission_fields = [
'fileName', 'date', 'description', 'status', 'publicScore', 'privateScore'
]
competition_file_fields = ['name', 'totalBytes', 'creationDate']
competition_file_labels = ['name', 'size', 'creationDate']
competition_leaderboard_fields = [
'teamId', 'teamName', 'submissionDate', 'score'
]
dataset_fields = [
'ref', 'title', 'totalBytes', 'lastUpdated', 'downloadCount', 'voteCount',
'usabilityRating'
]
dataset_labels = [
'ref', 'title', 'size', 'lastUpdated', 'downloadCount', 'voteCount',
'usabilityRating'
]
dataset_file_fields = ['name', 'total_bytes', 'creationDate']
model_fields = ['id', 'ref', 'title', 'subtitle', 'author']
model_all_fields = [
'id', 'ref', 'author', 'slug', 'title', 'subtitle', 'isPrivate',
'description', 'publishTime'
]
model_file_fields = ['name', 'size', 'creationDate']
def _is_retriable(self, e):
return issubclass(type(e), ConnectionError) or \
issubclass(type(e), urllib3_exceptions.ConnectionError) or \
issubclass(type(e), urllib3_exceptions.ConnectTimeoutError) or \
issubclass(type(e), urllib3_exceptions.ProtocolError) or \
issubclass(type(e), requests.exceptions.ConnectionError) or \
issubclass(type(e), requests.exceptions.ConnectTimeout)
def _calculate_backoff_delay(self, attempt, initial_delay_millis,
retry_multiplier, randomness_factor):
delay_ms = initial_delay_millis * (retry_multiplier**attempt)
random_wait_ms = int(random() - 0.5) * 2 * delay_ms * randomness_factor
total_delay = (delay_ms + random_wait_ms) / 1000.0
return total_delay
def with_retry(self,
func,
max_retries=10,
initial_delay_millis=500,
retry_multiplier=1.7,
randomness_factor=0.5):
def retriable_func(*args):
for i in range(1, max_retries + 1):
try:
return func(*args)
except Exception as e:
if self._is_retriable(e) and i < max_retries:
total_delay = self._calculate_backoff_delay(i, initial_delay_millis,
retry_multiplier,
randomness_factor)
print('Request failed: %s. Will retry in %2.1f seconds' %
(e, total_delay))
time.sleep(total_delay)
continue
raise
return retriable_func
## Authentication
def authenticate(self):
"""authenticate the user with the Kaggle API. This method will generate
a configuration, first checking the environment for credential
variables, and falling back to looking for the .kaggle/kaggle.json
configuration file.
"""
config_data = {}
# Ex: 'datasets list', 'competitions files', 'models instances get', etc.
api_command = ' '.join(sys.argv[1:])
# Step 1: try getting username/password from environment
config_data = self.read_config_environment(config_data)
# Step 2: if credentials were not in env read in configuration file
if self.CONFIG_NAME_USER not in config_data \
or self.CONFIG_NAME_KEY not in config_data:
if os.path.exists(self.config):
config_data = self.read_config_file(config_data)
elif self._is_help_or_version_command(api_command) or (len(
sys.argv) > 2 and api_command.startswith(
self.command_prefixes_allowing_anonymous_access)):
# Some API commands should be allowed without authentication.
return
else:
raise IOError('Could not find {}. Make sure it\'s located in'
' {}. Or use the environment method. See setup'
' instructions at'
' https://github.com/Kaggle/kaggle-api/'.format(
self.config_file, self.config_dir))
# Step 3: load into configuration!
self._load_config(config_data)
def _is_help_or_version_command(self, api_command):
"""determines if the string command passed in is for a help or version
command.
Parameters
==========
api_command: a string, 'datasets list', 'competitions files',
'models instances get', etc.
"""
return api_command.endswith(('-h', '--help', '-v', '--version'))
def read_config_environment(self, config_data=None, quiet=False):
"""read_config_environment is the second effort to get a username
and key to authenticate to the Kaggle API. The environment keys
are equivalent to the kaggle.json file, but with "KAGGLE_" prefix
to define a unique namespace.
Parameters
==========
config_data: a partially loaded configuration dictionary (optional)
quiet: suppress verbose print of output (default is False)
"""
# Add all variables that start with KAGGLE_ to config data
if config_data is None:
config_data = {}
for key, val in os.environ.items():
if key.startswith('KAGGLE_'):
config_key = key.replace('KAGGLE_', '', 1).lower()
config_data[config_key] = val
return config_data
## Configuration
def _load_config(self, config_data):
"""the final step of the authenticate steps, where we load the values
from config_data into the Configuration object.
Parameters
==========
config_data: a dictionary with configuration values (keys) to read
into self.config_values
"""
# Username and password are required.
for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]:
if item not in config_data:
raise ValueError('Error: Missing %s in configuration.' % item)
configuration = Configuration()
# Add to the final configuration (required)
configuration.username = config_data[self.CONFIG_NAME_USER]
configuration.password = config_data[self.CONFIG_NAME_KEY]
# Proxy
if self.CONFIG_NAME_PROXY in config_data:
configuration.proxy = config_data[self.CONFIG_NAME_PROXY]
# Cert File
if self.CONFIG_NAME_SSL_CA_CERT in config_data:
configuration.ssl_ca_cert = config_data[self.CONFIG_NAME_SSL_CA_CERT]
# Keep config values with class instance, and load api client!
self.config_values = config_data
def read_config_file(self, config_data=None, quiet=False):
"""read_config_file is the first effort to get a username
and key to authenticate to the Kaggle API. Since we can get the
username and password from the environment, it's not required.
Parameters
==========
config_data: the Configuration object to save a username and
password, if defined
quiet: suppress verbose print of output (default is False)
"""
if config_data is None:
config_data = {}
if os.path.exists(self.config):
try:
if os.name != 'nt':
permissions = os.stat(self.config).st_mode
if (permissions & 4) or (permissions & 32):
print('Warning: Your Kaggle API key is readable by other '
'users on this system! To fix this, you can run ' +
'\'chmod 600 {}\''.format(self.config))
with open(self.config) as f:
config_data = json.load(f)
except:
pass
else:
# Warn the user that configuration will be reliant on environment
if not quiet:
print('No Kaggle API config file found, will use environment.')
return config_data
def _read_config_file(self):
"""read in the configuration file, a json file defined at self.config"""
try:
with open(self.config, 'r') as f:
config_data = json.load(f)
except FileNotFoundError:
config_data = {}
return config_data
def _write_config_file(self, config_data, indent=2):
"""write config data to file.
Parameters
==========
config_data: the Configuration object to save a username and
password, if defined
indent: number of tab indentations to use when writing json
"""
with open(self.config, 'w') as f:
json.dump(config_data, f, indent=indent)
def set_config_value(self, name, value, quiet=False):
"""a client helper function to set a configuration value, meaning
reading in the configuration file (if it exists), saving a new
config value, and then writing back
Parameters
==========
name: the name of the value to set (key in dictionary)
value: the value to set at the key
quiet: disable verbose output if True (default is False)
"""
config_data = self._read_config_file()
if value is not None:
# Update the config file with the value
config_data[name] = value
# Update the instance with the value
self.config_values[name] = value
# If defined by client, set and save!
self._write_config_file(config_data)
if not quiet:
self.print_config_value(name, separator=' is now set to: ')
def unset_config_value(self, name, quiet=False):
"""unset a configuration value
Parameters
==========
name: the name of the value to unset (remove key in dictionary)
quiet: disable verbose output if True (default is False)
"""
config_data = self._read_config_file()
if name in config_data:
del config_data[name]
self._write_config_file(config_data)
if not quiet:
self.print_config_value(name, separator=' is now set to: ')
def get_config_value(self, name):
""" return a config value (with key name) if it's in the config_values,
otherwise return None
Parameters
==========
name: the config value key to get
"""
if name in self.config_values:
return self.config_values[name]
def get_default_download_dir(self, *subdirs):
""" Get the download path for a file. If not defined, return default
from config.
Parameters
==========
subdirs: a single (or list of) subfolders under the basepath
"""
# Look up value for key "path" in the config
path = self.get_config_value(self.CONFIG_NAME_PATH)
# If not set in config, default to present working directory
if path is None:
return os.getcwd()
return os.path.join(path, *subdirs)
def print_config_value(self, name, prefix='- ', separator=': '):
"""print a single configuration value, based on a prefix and separator
Parameters
==========
name: the key of the config valur in self.config_values to print
prefix: the prefix to print
separator: the separator to use (default is : )
"""
value_out = 'None'
if name in self.config_values and self.config_values[name] is not None:
value_out = self.config_values[name]
print(prefix + name + separator + value_out)
def print_config_values(self, prefix='- '):
"""a wrapper to print_config_value to print all configuration values
Parameters
==========
prefix: the character prefix to put before the printed config value
defaults to "- "
"""
print('Configuration values from ' + self.config_dir)
self.print_config_value(self.CONFIG_NAME_USER, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_PATH, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_PROXY, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_COMPETITION, prefix=prefix)
def build_kaggle_client(self):
env = KaggleEnv.STAGING if '--staging' in self.args \
else KaggleEnv.ADMIN if '--admin' in self.args \
else KaggleEnv.LOCAL if '--local' in self.args \
else KaggleEnv.PROD
verbose = '--verbose' in self.args or '-v' in self.args
# config = self.api_client.configuration
return KaggleClient(
env=env,
verbose=verbose,
username=self.config_values['username'],
password=self.config_values['key'])
def camel_to_snake(self, name):
"""
:param name: field in camel case
:return: field in snake case
"""
name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
def lookup_enum(self, enum_class, item_name):
item = self.camel_to_snake(item_name).upper()
try:
return enum_class[item]
except KeyError:
prefix = self.camel_to_snake(enum_class.__name__).upper()
return enum_class[f'{prefix}_{self.camel_to_snake(item_name).upper()}']
def short_enum_name(self, value):
full_name = str(value)
names = full_name.split('.')
prefix_len = len(self.camel_to_snake(names[0])) + 1 # underscore
return names[1][prefix_len:].lower()
## Competitions
def competitions_list(self,
group=None,
category=None,
sort_by=None,
page=1,
search=None):
""" Make a call to list competitions, format the response, and return
a list of ApiCompetition instances
Parameters
==========
page: the page to return (default is 1)
search: a search term to use (default is empty string)
sort_by: how to sort the result, see valid_competition_sort_by for options
category: category to filter result to; use 'all' to get closed competitions
group: group to filter result to
"""
if group:
if group not in self.valid_competition_groups:
raise ValueError('Invalid group specified. Valid options are ' +
str(self.valid_competition_groups))
if group == 'all':
group = CompetitionListTab.COMPETITION_LIST_TAB_EVERYTHING
else:
group = self.lookup_enum(CompetitionListTab, group)
if category:
if category not in self.valid_competition_categories:
raise ValueError('Invalid category specified. Valid options are ' +
str(self.valid_competition_categories))
category = self.lookup_enum(HostSegment, category)
if sort_by:
if sort_by not in self.valid_competition_sort_by:
raise ValueError('Invalid sort_by specified. Valid options are ' +
str(self.valid_competition_sort_by))
sort_by = self.lookup_enum(CompetitionSortBy, sort_by)
with self.build_kaggle_client() as kaggle:
request = ApiListCompetitionsRequest()
request.group = group
request.page = page
request.category = category
request.search = search
request.sort_by = sort_by
response = kaggle.competitions.competition_api_client.list_competitions(
request)
return response.competitions
def competitions_list_cli(self,
group=None,
category=None,
sort_by=None,
page=1,
search=None,
csv_display=False):
""" A wrapper for competitions_list for the client.
Parameters
==========
group: group to filter result to
category: category to filter result to
sort_by: how to sort the result, see valid_sort_by for options
page: the page to return (default is 1)
search: a search term to use (default is empty string)
csv_display: if True, print comma separated values
"""
competitions = self.competitions_list(
group=group,
category=category,
sort_by=sort_by,
page=page,
search=search)
if competitions:
if csv_display:
self.print_csv(competitions, self.competition_fields)
else:
self.print_table(competitions, self.competition_fields)
else:
print('No competitions found')
def competition_submit(self, file_name, message, competition, quiet=False):
""" Submit a competition.
Parameters
==========
file_name: the competition metadata file
message: the submission description
competition: the competition name; if not given use the 'competition' config value
quiet: suppress verbose output (default is False)
"""
if competition is None:
competition = self.get_config_value(self.CONFIG_NAME_COMPETITION)
if competition is not None and not quiet:
print('Using competition: ' + competition)
if competition is None:
raise ValueError('No competition specified')
else:
with self.build_kaggle_client() as kaggle:
request = ApiStartSubmissionUploadRequest()
request.competition_name = competition
request.file_name = os.path.basename(file_name)
request.content_length = os.path.getsize(file_name)
request.last_modified_epoch_seconds = int(os.path.getmtime(file_name))
response = kaggle.competitions.competition_api_client.start_submission_upload(
request)
upload_status = self.upload_complete(file_name, response.create_url,
quiet)
if upload_status != ResumableUploadResult.COMPLETE:
# Actual error is printed during upload_complete. Not
# ideal but changing would not be backwards compatible
return "Could not submit to competition"
submit_request = ApiCreateSubmissionRequest()
submit_request.competition_name = competition
submit_request.blob_file_tokens = response.token
submit_request.submission_description = message
submit_response = kaggle.competitions.competition_api_client.create_submission(
submit_request)
return submit_response
def competition_submit_cli(self,
file_name,
message,
competition,
competition_opt=None,
quiet=False):
""" Submit a competition using the client. Arguments are same as for
competition_submit, except for extra arguments provided here.
Parameters
==========
file_name: the competition metadata file
message: the submission description
competition: the competition name; if not given use the 'competition' config value
quiet: suppress verbose output (default is False)
competition_opt: an alternative competition option provided by cli
"""
competition = competition or competition_opt
try:
submit_result = self.competition_submit(file_name, message, competition,
quiet)
except RequestException as e:
if e.response and e.response.status_code == 404:
print('Could not find competition - please verify that you '
'entered the correct competition ID and that the '
'competition is still accepting submissions.')
return None
else:
raise e
return submit_result.message
def competition_submissions(self,
competition,
group=None,
sort=None,
page_token=0,
page_size=20):
""" Get the list of Submission for a particular competition.
Parameters
==========
competition: the name of the competition
group: the submission group
sort: the sort-by option
page_token: token for pagination
page_size: the number of items per page
"""
with self.build_kaggle_client() as kaggle:
request = ApiListSubmissionsRequest()
request.competition_name = competition
request.page = page_token
request.group = group
request.sort_by = sort
response = kaggle.competitions.competition_api_client.list_submissions(
request)
return response.submissions
def competition_submissions_cli(self,
competition=None,
competition_opt=None,
csv_display=False,
page_token=None,
page_size=20,
quiet=False):
""" A wrapper to competition_submission, will return either json or csv
to the user. Additional parameters are listed below, see
competition_submissions for rest.
Parameters
==========
competition: the name of the competition. If None, look to config
competition_opt: an alternative competition option provided by cli
csv_display: if True, print comma separated values
page_token: token for pagination
page_size: the number of items per page
quiet: suppress verbose output (default is False)
"""
competition = competition or competition_opt
if competition is None:
competition = self.get_config_value(self.CONFIG_NAME_COMPETITION)
if competition is not None and not quiet:
print('Using competition: ' + competition)
if competition is None:
raise ValueError('No competition specified')
else:
submissions = self.competition_submissions(
competition, page_token=page_token, page_size=page_size)
if submissions:
if csv_display:
self.print_csv(submissions, self.submission_fields)
else:
self.print_table(submissions, self.submission_fields)
else:
print('No submissions found')
def competition_list_files(self, competition, page_token=None, page_size=20):
""" List files for a competition.
Parameters
==========
competition: the name of the competition
page_token: the page token for pagination
page_size: the number of items per page
"""
with self.build_kaggle_client() as kaggle:
request = ApiListDataFilesRequest()
request.competition_name = competition
request.page_token = page_token
request.page_size = page_size
response = kaggle.competitions.competition_api_client.list_data_files(
request)
return response
def competition_list_files_cli(self,
competition,
competition_opt=None,
csv_display=False,
page_token=None,
page_size=20,
quiet=False):
""" List files for a competition, if it exists.
Parameters
==========
competition: the name of the competition. If None, look to config
competition_opt: an alternative competition option provided by cli
csv_display: if True, print comma separated values
page_token: the page token for pagination
page_size: the number of items per page
quiet: suppress verbose output (default is False)
"""
competition = competition or competition_opt
if competition is None:
competition = self.get_config_value(self.CONFIG_NAME_COMPETITION)
if competition is not None and not quiet:
print('Using competition: ' + competition)
if competition is None:
raise ValueError('No competition specified')
else:
result = self.competition_list_files(competition, page_token, page_size)
next_page_token = result.next_page_token
if next_page_token:
print('Next Page Token = {}'.format(next_page_token))
if result:
if csv_display:
self.print_csv(result.files, self.competition_file_fields,
self.competition_file_labels)
else:
self.print_table(result.files, self.competition_file_fields,
self.competition_file_labels)
else:
print('No files found')
def competition_download_file(self,
competition,
file_name,
path=None,
force=False,
quiet=False):
""" Download a competition file to a designated location, or use
a default location.
Parameters
=========