commit code

eBay · Jan 22, 2019 · 62dd825 · 62dd825
commit 62dd825
Show file tree

Hide file tree

Showing 113 changed files with 808,777 additions and 0 deletions.
diff --git a/.idea/Reasoning-over-Knowledge-Graph-Paths-for-Recommendation.iml b/.idea/Reasoning-over-Knowledge-Graph-Paths-for-Recommendation.iml
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,24 @@
+Additions and Modifications Copyright 2018 eBay Inc.
+=====================================================
+
+The MIT License (MIT)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+
diff --git a/README.md b/README.md
@@ -0,0 +1,36 @@
+# Reasoning Over Knowledge Graph Paths for Recommendation
+This is code related to the AAAI 2019 paper ["Explainable Reasoning over Knowledge Graphs for Recommendation."](https://arxiv.org/pdf/1811.04540.pdf). The code  makes extensive use of machine learning techniques, and will be useful for training and prediction of recommendation attributes of media, or other items as described in the paper.
+
+# Platform Requirements
+This code requires Python and Lua. Please ensure the runtime environments for these are installed.
+
+# Steps to Build a Model File in Training Model & Steps to Make Predictions
+The model details could be found through readMe.pdf.
+
+# Attribution and Acknowledgements
+Acknowledgement and thanks to others for open source work used in this project. 
+Code used in this project is available from the following sources. 
+
+1. https://github.com/rajarshd/ChainsofReasoning <BR>
+Author: Rajarshi Das <BR>
+See [Chains of Reasoning over Entities, Relations, and Text using Recurrent Neural Networks](https://arxiv.org/abs/1607.01426) <BR>
+Licensed under at least [Section D5 of Github Terms of Service.](https://help.github.com/articles/github-terms-of-service/#d-user-generated-content).
+
+2. https://github.com/hexiangnan/neural_collaborative_filtering <BR>
+Author: Dr. Xiangnan He (http://www.comp.nus.edu.sg/~xiangnan/) <BR>
+See Xiangnan He, Lizi Liao, Hanwang Zhang, Liqiang Nie, Xia Hu and Tat-Seng Chua (2017). Neural Collaborative Filtering. In             Proceedings of WWW '17, Perth, Australia, April 03-07, 2017. <BR>
+Licensed under [Apache 2.0.](https://github.com/hexiangnan/neural_collaborative_filtering/blob/master/LICENSE)
+
+3. https://github.com/hexiangnan/neural_factorization_machine <BR>
+Author: Dr. Xiangnan He (http://www.comp.nus.edu.sg/~xiangnan/) <BR>
+See Xiangnan He and Tat-Seng Chua (2017). Neural Factorization Machines for Sparse Predictive Analytics. In Proceedings of SIGIR         '17, Shinjuku, Tokyo, Japan, August 07-11, 2017. <BR>
+Licensed under at least [Section D5 of Github Terms of Service.](https://help.github.com/articles/github-terms-of-service/#d-user-generated-content)
+
+4. https://github.com/HKUST-KnowComp/FMG <BR>
+See [Meta-Graph Based Recommendation Fusion over Heterogeneous Information Networks](http://www.cse.ust.hk/~hzhaoaf/data/kdd17-paper.pdf) <BR>
+Licensed under at least [Section D5 of Github Terms of Service.](https://help.github.com/articles/github-terms-of-service/#d-user-generated-content)
+
+# License
+Modifications Copyright 2018 eBay Inc.<BR>
+Authors/Developers of Modifications: Dingxian, Wang ([email protected]) and Canran, Xu ([email protected]) <BR> 
+New code and modifications to code are licensed under the [MIT License.](https://opensource.org/licenses/MIT).  See LICENSE for the license text.
diff --git a/release/data_prepare/add_relation_label.py b/release/data_prepare/add_relation_label.py
@@ -0,0 +1,121 @@
+#***********************************************************
+#Copyright 2018 eBay Inc.
+#Use of this source code is governed by a MIT-style
+#license that can be found in the LICENSE file or at
+#https://opensource.org/licenses/MIT.
+#***********************************************************
+# -*- coding:utf-8 -*-
+import codecs
+import time
+import sys
+
+# relation dict
+relation_dict = {"rate": "r1", "belong": "r2", "category": "r3",
+                 "_rate": "r4", "_belong": "r5", "_category": "r6"}
+
+
+# Find Paths between head entity and tail entity
+
+def get_relation(head_entity, end_entity):
+    if "s" in head_entity:
+        if "u" in end_entity:
+            return relation_dict["_rate"]
+        elif "p" in end_entity:
+            return relation_dict["_category"]
+        elif "t" in end_entity:
+            return relation_dict["_belong"]
+        else:
+            pass
+    elif "u" in head_entity:
+        if "s" in end_entity:
+            return relation_dict["rate"]
+        else:
+            pass
+    elif "p" in head_entity:
+        if "s" in end_entity:
+            return relation_dict["category"]
+        else:
+            pass
+    elif "t" in head_entity:
+        if "s" in end_entity:
+            return relation_dict["belong"]
+        else:
+            pass
+    else:
+        pass
+
+
+if __name__ == "__main__":
+    # input of positive（user，movie）file
+    user_rate_reader = codecs.open(sys.argv[1], mode="r", encoding="utf-8")
+    head_line = user_rate_reader.readline()
+    ground_truth_list = []
+    line = user_rate_reader.readline()
+    while line:
+        line_list = line.strip().split("\t")
+        ground_truth_list.append((line_list[0], line_list[1]))
+        line = user_rate_reader.readline()
+    user_rate_reader.close()
+
+    ground_truth_list = set(ground_truth_list)
+    print(len(ground_truth_list))
+
+    # input and output path 
+    path_reader = codecs.open(sys.argv[2], mode="r", encoding="utf-8")
+    pos_writer = codecs.open(sys.argv[3], mode="w", encoding="utf-8")
+    neg_writer = codecs.open(sys.argv[4], mode="w", encoding="utf-8")
+
+    line = path_reader.readline()
+    count_num = 0
+    start_time = time.time()
+    pos_path_num = 0
+    neg_path_num = 0
+    pos_pair_num = 0
+    neg_pair_num = 0
+    while line:
+        line_list = line.strip().split("\t")
+        entity_pair = (line_list[0], line_list[1])
+        start_node = line_list[0]
+        end_node = line_list[1]
+        # add relation
+        path_with_relation_list = []
+        path_list = line_list[2].split("###")
+        for path in path_list:
+            temp_path = []
+            node_list = path.split("/")
+            # node_list.index(0, start_node)
+            pre_node = start_node
+            for node in node_list:
+                re_id = get_relation(pre_node, node)
+                temp_path.append(re_id)
+                temp_path.append(node)
+                pre_node = node
+            re_id = get_relation(pre_node, end_node)
+            temp_path.append(re_id)
+            path_with_relation_list.append("-".join(temp_path))
+
+        # add label
+        if entity_pair in ground_truth_list:
+            pos_writer.write("\t".join(entity_pair)+"\t"+"###".join(path_with_relation_list)+"\t1\n")
+            pos_pair_num += 1
+            pos_path_num += len(path_with_relation_list)
+        else:
+            neg_writer.write("\t".join(entity_pair)+"\t"+"###".join(path_with_relation_list)+"\t-1\n")
+            neg_pair_num += 1
+            neg_path_num += len(path_with_relation_list)
+        # read next line
+        line = path_reader.readline()
+
+        count_num += 1
+        if count_num % 10000 == 0:
+            print(count_num, (time.time()-start_time)/(count_num/10000))
+
+        # break
+
+    path_reader.close()
+    pos_writer.close()
+    neg_writer.close()
+
+    print("total cost time:", time.time()-start_time)
+    print("pos pair nums:", pos_pair_num, "pos path nums:", pos_path_num)
+    print("neg pair nums:", neg_pair_num, "neg path nums:", neg_path_num)
diff --git a/release/data_prepare/baseModel_data_convert.py b/release/data_prepare/baseModel_data_convert.py
@@ -0,0 +1,33 @@
+#***********************************************************
+#Copyright 2018 eBay Inc.
+#Use of this source code is governed by a MIT-style
+#license that can be found in the LICENSE file or at
+#https://opensource.org/licenses/MIT.
+#***********************************************************
+# -*- coding:utf-8 -*-
+from __future__ import print_function
+import codecs
+
+if __name__ == "__main__":
+    file_writer = codecs.open("data/output/baseModel/baseModel_train.txt", mode="w", encoding="utf-8")
+    file_reader = codecs.open("data/output/fmg_data/user_song_train.txt", mode="r")
+    line = file_reader.readline()
+
+    while line:
+        line_list = line.strip().split("\t")
+        file_writer.write(line_list[0].replace("u", "")+"\t"+line_list[1].replace("s", "")+"\t"+line_list[2]+"\n")
+        line = file_reader.readline()
+    file_writer.close()
+    file_reader.close()
+
+    file_writer = codecs.open("data/output/baseModel/baseModel_test.txt", mode="w", encoding="utf-8")
+    file_reader = codecs.open("data/output/fmg_test_samples_0.0.txt", mode="r")
+    line = file_reader.readline()
+
+    while line:
+        line_list = line.strip().split("\t")
+        file_writer.write(
+            line_list[0].replace("u", "") + "\t" + line_list[1].replace("s", "") + "\t" + line_list[2] + "\n")
+        line = file_reader.readline()
+    file_writer.close()
+    file_reader.close()
diff --git a/release/data_prepare/clustering.py b/release/data_prepare/clustering.py
@@ -0,0 +1,77 @@
+#***********************************************************
+#Copyright 2018 eBay Inc.
+#Use of this source code is governed by a MIT-style
+#license that can be found in the LICENSE file or at
+#https://opensource.org/licenses/MIT.
+#***********************************************************
+
+# -*- coding:utf-8 -*-
+import codecs
+import gc
+import time
+import sys
+
+if __name__ == "__main__":
+    # input path
+    file_reader = codecs.open(sys.argv[1], mode="r", encoding="utf-8")
+    # output path
+    file_writer = codecs.open(sys.argv[2], mode="w", encoding="utf-8")
+
+    line = file_reader.readline()
+    line_list = line.strip().split("\t")
+    current_user_id = line_list[0]
+    current_user_dict = dict()
+    count_num = 0
+    path_num = 0
+    entity_pair_num = 0
+    start_time = time.time()
+    while line:
+        line_list = line.strip().split("\t")
+        user_id = line_list[0]
+
+        if user_id == current_user_id:
+            entity_tuple = (line_list[0], line_list[-1])
+            if entity_tuple not in current_user_dict:
+                current_user_dict[entity_tuple] = ["/".join(line_list[1:-1])]
+                # print("/".join(line_list[1:-1]))
+            else:
+                current_user_dict[entity_tuple].append("/".join(line_list[1:-1]))
+                # print("/".join(line_list[1:-1]))
+        else:
+            for k, v in current_user_dict.items():
+                file_writer.write(k[0]+"\t"+k[1]+"\t")
+                file_writer.write("###".join(v)+"\n")
+                path_num += len(v)
+                entity_pair_num += 1
+            file_writer.flush()
+
+            del current_user_dict
+            gc.collect()
+
+            current_user_dict = dict()
+            current_user_id = line_list[0]
+
+            entity_tuple = (line_list[0], line_list[-1])
+            if entity_tuple not in current_user_dict:
+                current_user_dict[entity_tuple] = ["/".join(line_list[1:-1])]
+            else:
+                current_user_dict[entity_tuple].append("/".join(line_list[1:-1]))
+
+        count_num += 1
+        if count_num % 10000 == 0:
+            print(count_num, (time.time()-start_time)/(count_num/10000))
+        # read next line
+        line = file_reader.readline()
+
+    # write last batch
+    for k, v in current_user_dict.items():
+        file_writer.write(k[0] + "\t" + k[1] + "\t")
+        file_writer.write("###".join(v) + "\n")
+        path_num += len(v)
+        entity_pair_num += 1
+    file_writer.flush()
+
+    file_writer.close()
+    file_reader.close()
+    print("total cost time:", time.time()-start_time)
+    print("path nums:", path_num, "entity pair nums:", entity_pair_num)