-
-
Notifications
You must be signed in to change notification settings - Fork 8k
/
slack.py
1500 lines (1287 loc) · 54.8 KB
/
slack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import datetime
import logging
import os
import posixpath
import random
import secrets
import shutil
import zipfile
from collections import defaultdict
from email.headerregistry import Address
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Type, TypeVar
from urllib.parse import urlsplit
import orjson
import requests
from django.conf import settings
from django.forms.models import model_to_dict
from django.utils.timezone import now as timezone_now
from zerver.data_import.import_util import (
ZerverFieldsT,
build_attachment,
build_avatar,
build_defaultstream,
build_huddle,
build_message,
build_realm,
build_recipient,
build_stream,
build_subscription,
build_usermessages,
build_zerver_realm,
create_converted_data_files,
long_term_idle_helper,
make_subscriber_map,
process_avatars,
process_emojis,
process_uploads,
)
from zerver.data_import.sequencer import NEXT_ID
from zerver.data_import.slack_message_conversion import (
convert_to_zulip_markdown,
get_user_full_name,
)
from zerver.lib.emoji import codepoint_to_name
from zerver.lib.export import MESSAGE_BATCH_CHUNK_SIZE
from zerver.lib.storage import static_path
from zerver.lib.upload.base import resize_logo, sanitize_name
from zerver.models import (
CustomProfileField,
CustomProfileFieldValue,
Reaction,
Realm,
RealmEmoji,
Recipient,
UserProfile,
)
SlackToZulipUserIDT = Dict[str, int]
AddedChannelsT = Dict[str, Tuple[str, int]]
AddedMPIMsT = Dict[str, Tuple[str, int]]
DMMembersT = Dict[str, Tuple[str, str]]
SlackToZulipRecipientT = Dict[str, int]
# Generic type for SlackBotEmail class
SlackBotEmailT = TypeVar("SlackBotEmailT", bound="SlackBotEmail")
# We can look up unicode codepoints for Slack emoji using iamcal emoji
# data. https://emojipedia.org/slack/, documents Slack's emoji names
# are derived from https://github.com/iamcal/emoji-data; this seems
# likely to remain true since Cal is a Slack's cofounder.
emoji_data_file_path = static_path("generated/emoji/emoji-datasource-google-emoji.json")
with open(emoji_data_file_path, "rb") as emoji_data_file:
emoji_data = orjson.loads(emoji_data_file.read())
def get_emoji_code(emoji_dict: Dict[str, Any]) -> str:
# This function is identical with the function with the same name at
# tools/setup/emoji/emoji_setup_utils.py.
# This function is unlikely to be changed, unless iamcal changes their data
# structure.
emoji_code = emoji_dict.get("non_qualified") or emoji_dict["unified"]
return emoji_code.lower()
# Build the translation dict from Slack emoji name to codepoint.
slack_emoji_name_to_codepoint: Dict[str, str] = {}
for emoji_dict in emoji_data:
short_name = emoji_dict["short_name"]
emoji_code = get_emoji_code(emoji_dict)
slack_emoji_name_to_codepoint[short_name] = emoji_code
for sn in emoji_dict["short_names"]:
if sn != short_name:
slack_emoji_name_to_codepoint[sn] = emoji_code
class SlackBotEmail:
duplicate_email_count: Dict[str, int] = {}
# Mapping of `bot_id` to final email assigned to the bot.
assigned_email: Dict[str, str] = {}
@classmethod
def get_email(cls: Type[SlackBotEmailT], user_profile: ZerverFieldsT, domain_name: str) -> str:
slack_bot_id = user_profile["bot_id"]
if slack_bot_id in cls.assigned_email:
return cls.assigned_email[slack_bot_id]
if "real_name_normalized" in user_profile:
slack_bot_name = user_profile["real_name_normalized"]
elif "first_name" in user_profile:
slack_bot_name = user_profile["first_name"]
else:
raise AssertionError("Could not identify bot type")
email = Address(
username=slack_bot_name.replace("Bot", "").replace(" ", "").lower() + "-bot",
domain=domain_name,
).addr_spec
if email in cls.duplicate_email_count:
cls.duplicate_email_count[email] += 1
address = Address(addr_spec=email)
email_username = address.username + "-" + str(cls.duplicate_email_count[email])
email = Address(username=email_username, domain=address.domain).addr_spec
else:
cls.duplicate_email_count[email] = 1
cls.assigned_email[slack_bot_id] = email
return email
def rm_tree(path: str) -> None:
if os.path.exists(path):
shutil.rmtree(path)
def slack_workspace_to_realm(
domain_name: str,
realm_id: int,
user_list: List[ZerverFieldsT],
realm_subdomain: str,
slack_data_dir: str,
custom_emoji_list: ZerverFieldsT,
) -> Tuple[
ZerverFieldsT,
SlackToZulipUserIDT,
SlackToZulipRecipientT,
AddedChannelsT,
AddedMPIMsT,
DMMembersT,
List[ZerverFieldsT],
ZerverFieldsT,
]:
"""
Returns:
1. realm, converted realm data
2. slack_user_id_to_zulip_user_id, which is a dictionary to map from Slack user id to Zulip user id
3. slack_recipient_name_to_zulip_recipient_id, which is a dictionary to map from Slack recipient
name(channel names, mpim names, usernames, etc) to Zulip recipient id
4. added_channels, which is a dictionary to map from channel name to channel id, Zulip stream_id
5. added_mpims, which is a dictionary to map from MPIM name to MPIM id, Zulip huddle_id
6. dm_members, which is a dictionary to map from DM id to tuple of DM participants.
7. avatars, which is list to map avatars to Zulip avatar records.json
8. emoji_url_map, which is maps emoji name to its Slack URL
"""
NOW = float(timezone_now().timestamp())
zerver_realm: List[ZerverFieldsT] = build_zerver_realm(realm_id, realm_subdomain, NOW, "Slack")
realm = build_realm(zerver_realm, realm_id, domain_name)
(
zerver_userprofile,
avatars,
slack_user_id_to_zulip_user_id,
zerver_customprofilefield,
zerver_customprofilefield_value,
) = users_to_zerver_userprofile(slack_data_dir, user_list, realm_id, int(NOW), domain_name)
(
realm,
added_channels,
added_mpims,
dm_members,
slack_recipient_name_to_zulip_recipient_id,
) = channels_to_zerver_stream(
slack_data_dir, realm_id, realm, slack_user_id_to_zulip_user_id, zerver_userprofile
)
zerver_realmemoji, emoji_url_map = build_realmemoji(custom_emoji_list, realm_id)
realm["zerver_realmemoji"] = zerver_realmemoji
# See https://zulip.com/help/set-default-streams-for-new-users
# for documentation on zerver_defaultstream
realm["zerver_userprofile"] = zerver_userprofile
realm["zerver_customprofilefield"] = zerver_customprofilefield
realm["zerver_customprofilefieldvalue"] = zerver_customprofilefield_value
return (
realm,
slack_user_id_to_zulip_user_id,
slack_recipient_name_to_zulip_recipient_id,
added_channels,
added_mpims,
dm_members,
avatars,
emoji_url_map,
)
def build_realmemoji(
custom_emoji_list: ZerverFieldsT, realm_id: int
) -> Tuple[List[ZerverFieldsT], ZerverFieldsT]:
zerver_realmemoji = []
emoji_url_map = {}
emoji_id = 0
for emoji_name, url in custom_emoji_list.items():
split_url = urlsplit(url)
if split_url.hostname == "emoji.slack-edge.com":
# Some of the emojis we get from the API have invalid links
# this is to prevent errors related to them
realmemoji = RealmEmoji(
name=emoji_name,
id=emoji_id,
file_name=posixpath.basename(split_url.path),
deactivated=False,
)
realmemoji_dict = model_to_dict(realmemoji, exclude=["realm", "author"])
realmemoji_dict["author"] = None
realmemoji_dict["realm"] = realm_id
emoji_url_map[emoji_name] = url
zerver_realmemoji.append(realmemoji_dict)
emoji_id += 1
return zerver_realmemoji, emoji_url_map
def users_to_zerver_userprofile(
slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int, timestamp: Any, domain_name: str
) -> Tuple[
List[ZerverFieldsT],
List[ZerverFieldsT],
SlackToZulipUserIDT,
List[ZerverFieldsT],
List[ZerverFieldsT],
]:
"""
Returns:
1. zerver_userprofile, which is a list of user profile
2. avatar_list, which is list to map avatars to Zulip avatar records.json
3. slack_user_id_to_zulip_user_id, which is a dictionary to map from Slack user ID to Zulip
user id
4. zerver_customprofilefield, which is a list of all custom profile fields
5. zerver_customprofilefield_values, which is a list of user profile fields
"""
logging.info("######### IMPORTING USERS STARTED #########\n")
zerver_userprofile = []
zerver_customprofilefield: List[ZerverFieldsT] = []
zerver_customprofilefield_values: List[ZerverFieldsT] = []
avatar_list: List[ZerverFieldsT] = []
slack_user_id_to_zulip_user_id = {}
# The user data we get from the Slack API does not contain custom profile data
# Hence we get it from the Slack zip file
slack_data_file_user_list = get_data_file(slack_data_dir + "/users.json")
slack_user_id_to_custom_profile_fields: ZerverFieldsT = {}
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT = {}
for user in slack_data_file_user_list:
process_slack_custom_fields(user, slack_user_id_to_custom_profile_fields)
# We have only one primary owner in Slack, see link
# https://get.slack.help/hc/en-us/articles/201912948-Owners-and-Administrators
# This is to import the primary owner first from all the users
user_id_count = custom_profile_field_value_id_count = custom_profile_field_id_count = 0
primary_owner_id = user_id_count
user_id_count += 1
for user in users:
slack_user_id = user["id"]
if user.get("is_primary_owner", False):
user_id = primary_owner_id
else:
user_id = user_id_count
email = get_user_email(user, domain_name)
# ref: https://zulip.com/help/change-your-profile-picture
avatar_url = build_avatar_url(
slack_user_id, user["team_id"], user["profile"]["avatar_hash"]
)
build_avatar(user_id, realm_id, email, avatar_url, timestamp, avatar_list)
role = UserProfile.ROLE_MEMBER
if get_owner(user):
role = UserProfile.ROLE_REALM_OWNER
elif get_admin(user):
role = UserProfile.ROLE_REALM_ADMINISTRATOR
if get_guest(user):
role = UserProfile.ROLE_GUEST
timezone = get_user_timezone(user)
if slack_user_id in slack_user_id_to_custom_profile_fields:
(
slack_custom_field_name_to_zulip_custom_field_id,
custom_profile_field_id_count,
) = build_customprofile_field(
zerver_customprofilefield,
slack_user_id_to_custom_profile_fields[slack_user_id],
custom_profile_field_id_count,
realm_id,
slack_custom_field_name_to_zulip_custom_field_id,
)
custom_profile_field_value_id_count = build_customprofilefields_values(
slack_custom_field_name_to_zulip_custom_field_id,
slack_user_id_to_custom_profile_fields[slack_user_id],
user_id,
custom_profile_field_value_id_count,
zerver_customprofilefield_values,
)
userprofile = UserProfile(
full_name=get_user_full_name(user),
is_active=not user.get("deleted", False) and not user["is_mirror_dummy"],
is_mirror_dummy=user["is_mirror_dummy"],
id=user_id,
email=email,
delivery_email=email,
avatar_source="U",
is_bot=user.get("is_bot", False),
role=role,
bot_type=1 if user.get("is_bot", False) else None,
date_joined=timestamp,
timezone=timezone,
last_login=timestamp,
)
userprofile_dict = model_to_dict(userprofile)
# Set realm id separately as the corresponding realm is not yet a Realm model instance
userprofile_dict["realm"] = realm_id
zerver_userprofile.append(userprofile_dict)
slack_user_id_to_zulip_user_id[slack_user_id] = user_id
if not user.get("is_primary_owner", False):
user_id_count += 1
logging.info("%s -> %s", user["name"], userprofile_dict["email"])
process_customprofilefields(zerver_customprofilefield, zerver_customprofilefield_values)
logging.info("######### IMPORTING USERS FINISHED #########\n")
return (
zerver_userprofile,
avatar_list,
slack_user_id_to_zulip_user_id,
zerver_customprofilefield,
zerver_customprofilefield_values,
)
def build_customprofile_field(
customprofile_field: List[ZerverFieldsT],
fields: ZerverFieldsT,
custom_profile_field_id: int,
realm_id: int,
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT,
) -> Tuple[ZerverFieldsT, int]:
# The name of the custom profile field is not provided in the Slack data
# Hash keys of the fields are provided
# Reference: https://api.slack.com/methods/users.profile.set
for field, value in fields.items():
if field not in slack_custom_field_name_to_zulip_custom_field_id:
slack_custom_fields = ["phone", "skype"]
if field in slack_custom_fields:
field_name = field
else:
field_name = f"Slack custom field {custom_profile_field_id + 1}"
customprofilefield = CustomProfileField(
id=custom_profile_field_id,
name=field_name,
field_type=1, # For now this is defaulted to 'SHORT_TEXT'
# Processing is done in the function 'process_customprofilefields'
)
customprofilefield_dict = model_to_dict(customprofilefield, exclude=["realm"])
customprofilefield_dict["realm"] = realm_id
slack_custom_field_name_to_zulip_custom_field_id[field] = custom_profile_field_id
custom_profile_field_id += 1
customprofile_field.append(customprofilefield_dict)
return slack_custom_field_name_to_zulip_custom_field_id, custom_profile_field_id
def process_slack_custom_fields(
user: ZerverFieldsT, slack_user_id_to_custom_profile_fields: ZerverFieldsT
) -> None:
slack_user_id_to_custom_profile_fields[user["id"]] = {}
if user["profile"].get("fields"):
slack_user_id_to_custom_profile_fields[user["id"]] = user["profile"]["fields"]
slack_custom_fields = ["phone", "skype"]
for field in slack_custom_fields:
if field in user["profile"]:
slack_user_id_to_custom_profile_fields[user["id"]][field] = {
"value": user["profile"][field]
}
def build_customprofilefields_values(
slack_custom_field_name_to_zulip_custom_field_id: ZerverFieldsT,
fields: ZerverFieldsT,
user_id: int,
custom_field_id: int,
custom_field_values: List[ZerverFieldsT],
) -> int:
for field, value in fields.items():
if value["value"] == "":
continue
custom_field_value = CustomProfileFieldValue(id=custom_field_id, value=value["value"])
custom_field_value_dict = model_to_dict(
custom_field_value, exclude=["user_profile", "field"]
)
custom_field_value_dict["user_profile"] = user_id
custom_field_value_dict["field"] = slack_custom_field_name_to_zulip_custom_field_id[field]
custom_field_values.append(custom_field_value_dict)
custom_field_id += 1
return custom_field_id
def process_customprofilefields(
customprofilefield: List[ZerverFieldsT], customprofilefield_value: List[ZerverFieldsT]
) -> None:
for field in customprofilefield:
for field_value in customprofilefield_value:
if field_value["field"] == field["id"] and len(field_value["value"]) > 50:
field["field_type"] = 2 # corresponding to Long text
break
def get_user_email(user: ZerverFieldsT, domain_name: str) -> str:
if "email" in user["profile"]:
return user["profile"]["email"]
if user["is_mirror_dummy"]:
return Address(username=user["name"], domain=f'{user["team_domain"]}.slack.com').addr_spec
if "bot_id" in user["profile"]:
return SlackBotEmail.get_email(user["profile"], domain_name)
if get_user_full_name(user).lower() == "slackbot":
return Address(username="imported-slackbot-bot", domain=domain_name).addr_spec
raise AssertionError(f"Could not find email address for Slack user {user}")
def build_avatar_url(slack_user_id: str, team_id: str, avatar_hash: str) -> str:
avatar_url = f"https://ca.slack-edge.com/{team_id}-{slack_user_id}-{avatar_hash}"
return avatar_url
def get_owner(user: ZerverFieldsT) -> bool:
owner = user.get("is_owner", False)
primary_owner = user.get("is_primary_owner", False)
return primary_owner or owner
def get_admin(user: ZerverFieldsT) -> bool:
admin = user.get("is_admin", False)
return admin
def get_guest(user: ZerverFieldsT) -> bool:
restricted_user = user.get("is_restricted", False)
ultra_restricted_user = user.get("is_ultra_restricted", False)
# Slack's Single channel and multi channel guests both have
# is_restricted set to True. So assuming Slack doesn't change their
# data model, it would also be correct to just check whether
# is_restricted is set to True.
return restricted_user or ultra_restricted_user
def get_user_timezone(user: ZerverFieldsT) -> str:
_default_timezone = "America/New_York"
timezone = user.get("tz", _default_timezone)
if timezone is None or "/" not in timezone:
timezone = _default_timezone
return timezone
def channels_to_zerver_stream(
slack_data_dir: str,
realm_id: int,
realm: Dict[str, Any],
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
zerver_userprofile: List[ZerverFieldsT],
) -> Tuple[
Dict[str, List[ZerverFieldsT]], AddedChannelsT, AddedMPIMsT, DMMembersT, SlackToZulipRecipientT
]:
"""
Returns:
1. realm, converted realm data
2. added_channels, which is a dictionary to map from channel name to channel id, Zulip stream_id
3. added_mpims, which is a dictionary to map from MPIM(multiparty IM) name to MPIM id, Zulip huddle_id
4. dm_members, which is a dictionary to map from DM id to tuple of DM participants.
5. slack_recipient_name_to_zulip_recipient_id, which is a dictionary to map from Slack recipient
name(channel names, mpim names, usernames etc) to Zulip recipient_id
"""
logging.info("######### IMPORTING CHANNELS STARTED #########\n")
added_channels = {}
added_mpims = {}
dm_members = {}
slack_recipient_name_to_zulip_recipient_id = {}
realm["zerver_stream"] = []
realm["zerver_huddle"] = []
realm["zerver_subscription"] = []
realm["zerver_recipient"] = []
realm["zerver_defaultstream"] = []
subscription_id_count = recipient_id_count = 0
stream_id_count = defaultstream_id = 0
huddle_id_count = 0
def process_channels(channels: List[Dict[str, Any]], invite_only: bool = False) -> None:
nonlocal stream_id_count
nonlocal recipient_id_count
nonlocal defaultstream_id
nonlocal subscription_id_count
for channel in channels:
# map Slack's topic and purpose content into Zulip's stream description.
# WARN This mapping is lossy since the topic.creator, topic.last_set,
# purpose.creator, purpose.last_set fields are not preserved.
description = channel["purpose"]["value"]
stream_id = stream_id_count
recipient_id = recipient_id_count
stream = build_stream(
float(channel["created"]),
realm_id,
channel["name"],
description,
stream_id,
channel["is_archived"],
invite_only,
)
realm["zerver_stream"].append(stream)
slack_default_channels = ["general", "random"]
if channel["name"] in slack_default_channels and not stream["deactivated"]:
defaultstream = build_defaultstream(realm_id, stream_id, defaultstream_id)
realm["zerver_defaultstream"].append(defaultstream)
defaultstream_id += 1
added_channels[stream["name"]] = (channel["id"], stream_id)
recipient = build_recipient(stream_id, recipient_id, Recipient.STREAM)
realm["zerver_recipient"].append(recipient)
slack_recipient_name_to_zulip_recipient_id[stream["name"]] = recipient_id
subscription_id_count = get_subscription(
channel["members"],
realm["zerver_subscription"],
recipient_id,
slack_user_id_to_zulip_user_id,
subscription_id_count,
)
stream_id_count += 1
recipient_id_count += 1
logging.info("%s -> created", channel["name"])
# TODO map Slack's pins to Zulip's stars
# There is the security model that Slack's pins are known to the team owner
# as evident from where it is stored at (channels)
# "pins": [
# {
# "id": "1444755381.000003",
# "type": "C",
# "user": "U061A5N1G",
# "owner": "U061A5N1G",
# "created": "1444755463"
# }
# ],
public_channels = get_data_file(slack_data_dir + "/channels.json")
process_channels(public_channels)
try:
private_channels = get_data_file(slack_data_dir + "/groups.json")
except FileNotFoundError:
private_channels = []
process_channels(private_channels, True)
# mpim is the Slack equivalent of huddle.
def process_mpims(mpims: List[Dict[str, Any]]) -> None:
nonlocal huddle_id_count
nonlocal recipient_id_count
nonlocal subscription_id_count
for mpim in mpims:
huddle = build_huddle(huddle_id_count)
realm["zerver_huddle"].append(huddle)
added_mpims[mpim["name"]] = (mpim["id"], huddle_id_count)
recipient = build_recipient(huddle_id_count, recipient_id_count, Recipient.HUDDLE)
realm["zerver_recipient"].append(recipient)
slack_recipient_name_to_zulip_recipient_id[mpim["name"]] = recipient_id_count
subscription_id_count = get_subscription(
mpim["members"],
realm["zerver_subscription"],
recipient_id_count,
slack_user_id_to_zulip_user_id,
subscription_id_count,
)
huddle_id_count += 1
recipient_id_count += 1
logging.info("%s -> created", mpim["name"])
try:
mpims = get_data_file(slack_data_dir + "/mpims.json")
except FileNotFoundError:
mpims = []
process_mpims(mpims)
for slack_user_id, zulip_user_id in slack_user_id_to_zulip_user_id.items():
recipient = build_recipient(zulip_user_id, recipient_id_count, Recipient.PERSONAL)
slack_recipient_name_to_zulip_recipient_id[slack_user_id] = recipient_id_count
sub = build_subscription(recipient_id_count, zulip_user_id, subscription_id_count)
realm["zerver_recipient"].append(recipient)
realm["zerver_subscription"].append(sub)
recipient_id_count += 1
subscription_id_count += 1
def process_dms(dms: List[Dict[str, Any]]) -> None:
for dm in dms:
user_a = dm["members"][0]
user_b = dm["members"][1]
dm_members[dm["id"]] = (user_a, user_b)
try:
dms = get_data_file(slack_data_dir + "/dms.json")
except FileNotFoundError:
dms = []
process_dms(dms)
logging.info("######### IMPORTING STREAMS FINISHED #########\n")
return (
realm,
added_channels,
added_mpims,
dm_members,
slack_recipient_name_to_zulip_recipient_id,
)
def get_subscription(
channel_members: List[str],
zerver_subscription: List[ZerverFieldsT],
recipient_id: int,
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
subscription_id: int,
) -> int:
for slack_user_id in channel_members:
sub = build_subscription(
recipient_id, slack_user_id_to_zulip_user_id[slack_user_id], subscription_id
)
zerver_subscription.append(sub)
subscription_id += 1
return subscription_id
def process_long_term_idle_users(
slack_data_dir: str,
users: List[ZerverFieldsT],
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
added_channels: AddedChannelsT,
added_mpims: AddedMPIMsT,
dm_members: DMMembersT,
zerver_userprofile: List[ZerverFieldsT],
) -> Set[int]:
return long_term_idle_helper(
get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members),
get_message_sending_user,
get_timestamp_from_message,
lambda id: slack_user_id_to_zulip_user_id[id],
iter(user["id"] for user in users),
zerver_userprofile,
)
def convert_slack_workspace_messages(
slack_data_dir: str,
users: List[ZerverFieldsT],
realm_id: int,
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
added_channels: AddedChannelsT,
added_mpims: AddedMPIMsT,
dm_members: DMMembersT,
realm: ZerverFieldsT,
zerver_userprofile: List[ZerverFieldsT],
zerver_realmemoji: List[ZerverFieldsT],
domain_name: str,
output_dir: str,
convert_slack_threads: bool,
chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE,
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
"""
Returns:
1. reactions, which is a list of the reactions
2. uploads, which is a list of uploads to be mapped in uploads records.json
3. attachment, which is a list of the attachments
"""
long_term_idle = process_long_term_idle_users(
slack_data_dir,
users,
slack_user_id_to_zulip_user_id,
added_channels,
added_mpims,
dm_members,
zerver_userprofile,
)
all_messages = get_messages_iterator(slack_data_dir, added_channels, added_mpims, dm_members)
logging.info("######### IMPORTING MESSAGES STARTED #########\n")
total_reactions: List[ZerverFieldsT] = []
total_attachments: List[ZerverFieldsT] = []
total_uploads: List[ZerverFieldsT] = []
dump_file_id = 1
subscriber_map = make_subscriber_map(
zerver_subscription=realm["zerver_subscription"],
)
while True:
message_data = []
_counter = 0
for msg in all_messages:
_counter += 1
message_data.append(msg)
if _counter == chunk_size:
break
if len(message_data) == 0:
break
(
zerver_message,
zerver_usermessage,
attachment,
uploads,
reactions,
) = channel_message_to_zerver_message(
realm_id,
users,
slack_user_id_to_zulip_user_id,
slack_recipient_name_to_zulip_recipient_id,
message_data,
zerver_realmemoji,
subscriber_map,
added_channels,
dm_members,
domain_name,
long_term_idle,
convert_slack_threads,
)
message_json = dict(zerver_message=zerver_message, zerver_usermessage=zerver_usermessage)
message_file = f"/messages-{dump_file_id:06}.json"
logging.info("Writing messages to %s\n", output_dir + message_file)
create_converted_data_files(message_json, output_dir, message_file)
total_reactions += reactions
total_attachments += attachment
total_uploads += uploads
dump_file_id += 1
logging.info("######### IMPORTING MESSAGES FINISHED #########\n")
return total_reactions, total_uploads, total_attachments
def get_messages_iterator(
slack_data_dir: str,
added_channels: Dict[str, Any],
added_mpims: AddedMPIMsT,
dm_members: DMMembersT,
) -> Iterator[ZerverFieldsT]:
"""This function is an iterator that returns all the messages across
all Slack channels, in order by timestamp. It's important to
not read all the messages into memory at once, because for
large imports that can OOM kill."""
dir_names = list(added_channels.keys()) + list(added_mpims.keys()) + list(dm_members.keys())
all_json_names: Dict[str, List[str]] = defaultdict(list)
for dir_name in dir_names:
dir_path = os.path.join(slack_data_dir, dir_name)
json_names = os.listdir(dir_path)
for json_name in json_names:
if json_name.endswith(".json"):
all_json_names[json_name].append(dir_path)
# Sort json_name by date
for json_name in sorted(all_json_names.keys()):
messages_for_one_day: List[ZerverFieldsT] = []
for dir_path in all_json_names[json_name]:
message_dir = os.path.join(dir_path, json_name)
dir_name = os.path.basename(dir_path)
messages = []
for message in get_data_file(message_dir):
if message.get("user") == "U00":
# Skip messages involving the the "U00" user,
# which is apparently used in some channel rename
# messages. It's likely just the result of some
# bug in Slack's export system. Arguably we could
# change this to point to slackbot instead, but
# skipping those messages is simpler.
continue
if dir_name in added_channels:
message["channel_name"] = dir_name
elif dir_name in added_mpims:
message["mpim_name"] = dir_name
elif dir_name in dm_members:
message["pm_name"] = dir_name
messages.append(message)
messages_for_one_day += messages
# we sort the messages according to the timestamp to show messages with
# the proper date order
yield from sorted(messages_for_one_day, key=get_timestamp_from_message)
def channel_message_to_zerver_message(
realm_id: int,
users: List[ZerverFieldsT],
slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
all_messages: List[ZerverFieldsT],
zerver_realmemoji: List[ZerverFieldsT],
subscriber_map: Dict[int, Set[int]],
added_channels: AddedChannelsT,
dm_members: DMMembersT,
domain_name: str,
long_term_idle: Set[int],
convert_slack_threads: bool,
) -> Tuple[
List[ZerverFieldsT],
List[ZerverFieldsT],
List[ZerverFieldsT],
List[ZerverFieldsT],
List[ZerverFieldsT],
]:
"""
Returns:
1. zerver_message, which is a list of the messages
2. zerver_usermessage, which is a list of the usermessages
3. zerver_attachment, which is a list of the attachments
4. uploads_list, which is a list of uploads to be mapped in uploads records.json
5. reaction_list, which is a list of all user reactions
"""
zerver_message = []
zerver_usermessage: List[ZerverFieldsT] = []
uploads_list: List[ZerverFieldsT] = []
zerver_attachment: List[ZerverFieldsT] = []
reaction_list: List[ZerverFieldsT] = []
total_user_messages = 0
total_skipped_user_messages = 0
thread_counter: Dict[str, int] = defaultdict(int)
thread_map: Dict[str, str] = {}
for message in all_messages:
slack_user_id = get_message_sending_user(message)
if not slack_user_id:
# Ignore messages without slack_user_id
# These are Sometimes produced by Slack
continue
subtype = message.get("subtype", False)
if subtype in [
# Zulip doesn't have a pinned_item concept
"pinned_item",
"unpinned_item",
# Slack's channel join/leave notices are spammy
"channel_join",
"channel_leave",
"channel_name",
]:
continue
try:
content, mentioned_user_ids, has_link = convert_to_zulip_markdown(
message["text"], users, added_channels, slack_user_id_to_zulip_user_id
)
except Exception:
print("Slack message unexpectedly missing text representation:")
print(orjson.dumps(message, option=orjson.OPT_INDENT_2).decode())
continue
rendered_content = None
if "channel_name" in message:
is_private = False
recipient_id = slack_recipient_name_to_zulip_recipient_id[message["channel_name"]]
elif "mpim_name" in message:
is_private = True
recipient_id = slack_recipient_name_to_zulip_recipient_id[message["mpim_name"]]
elif "pm_name" in message:
is_private = True
sender = get_message_sending_user(message)
members = dm_members[message["pm_name"]]
if sender == members[0]:
recipient_id = slack_recipient_name_to_zulip_recipient_id[members[1]]
sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[members[0]]
else:
recipient_id = slack_recipient_name_to_zulip_recipient_id[members[0]]
sender_recipient_id = slack_recipient_name_to_zulip_recipient_id[members[1]]
message_id = NEXT_ID("message")
if "reactions" in message:
build_reactions(
reaction_list,
message["reactions"],
slack_user_id_to_zulip_user_id,
message_id,
zerver_realmemoji,
)
# Process different subtypes of slack messages
# Subtypes which have only the action in the message should
# be rendered with '/me' in the content initially
# For example "sh_room_created" has the message 'started a call'
# which should be displayed as '/me started a call'
if subtype in ["bot_add", "sh_room_created", "me_message"]:
content = f"/me {content}"
if subtype == "file_comment":
# The file_comment message type only indicates the
# responsible user in a subfield.
message["user"] = message["comment"]["user"]
file_info = process_message_files(
message=message,
domain_name=domain_name,
realm_id=realm_id,
message_id=message_id,
slack_user_id=slack_user_id,
users=users,
slack_user_id_to_zulip_user_id=slack_user_id_to_zulip_user_id,
zerver_attachment=zerver_attachment,
uploads_list=uploads_list,
)
content += file_info["content"]
has_link = has_link or file_info["has_link"]
has_attachment = file_info["has_attachment"]
has_image = file_info["has_image"]
# Slack's unthreaded messages go into a single topic, while
# threads each generate a unique topic labeled by the date and
# a counter among topics on that day.
topic_name = "imported from Slack"
if convert_slack_threads and "thread_ts" in message:
thread_ts = datetime.datetime.fromtimestamp(
float(message["thread_ts"]), tz=datetime.timezone.utc
)
thread_ts_str = thread_ts.strftime(r"%Y/%m/%d %H:%M:%S")
# The topic name is "2015-08-18 Slack thread 2", where the counter at the end is to disambiguate
# threads with the same date.
if thread_ts_str in thread_map:
topic_name = thread_map[thread_ts_str]
else:
thread_date = thread_ts.strftime(r"%Y-%m-%d")
thread_counter[thread_date] += 1
count = thread_counter[thread_date]
topic_name = f"{thread_date} Slack thread {count}"
thread_map[thread_ts_str] = topic_name
zulip_message = build_message(
topic_name=topic_name,
date_sent=get_timestamp_from_message(message),
message_id=message_id,
content=content,
rendered_content=rendered_content,
user_id=slack_user_id_to_zulip_user_id[slack_user_id],
recipient_id=recipient_id,
realm_id=realm_id,
has_image=has_image,
has_link=has_link,
has_attachment=has_attachment,
)
zerver_message.append(zulip_message)
(num_created, num_skipped) = build_usermessages(
zerver_usermessage=zerver_usermessage,