From f8de48d634475a0106b8ba413f22470b82133dba Mon Sep 17 00:00:00 2001 From: peter Date: Mon, 4 Nov 2024 19:48:17 +0800 Subject: [PATCH] update load json --- ...23-03-21-tables.md => 2023-03-21-rw-ms.md} | 28 ++- _posts/2024-11-04-notebook.md | 45 ++++ assets/jupyter/load_json.ipynb | 237 ++++++++++++++++++ papers/fetch_papers.py | 44 ++-- 4 files changed, 334 insertions(+), 20 deletions(-) rename _posts/{2023-03-21-tables.md => 2023-03-21-rw-ms.md} (90%) create mode 100644 _posts/2024-11-04-notebook.md create mode 100644 assets/jupyter/load_json.ipynb diff --git a/_posts/2023-03-21-tables.md b/_posts/2023-03-21-rw-ms.md similarity index 90% rename from _posts/2023-03-21-tables.md rename to _posts/2023-03-21-rw-ms.md index 994f6bbd..40115d11 100644 --- a/_posts/2023-03-21-tables.md +++ b/_posts/2023-03-21-rw-ms.md @@ -28,6 +28,28 @@ A note on the data: This list updates automatically with new papers, sometimes b Below, you'll find the comprehensive paper list. I've also provided [JSON](https://github.com/lorenz-peter/lorenz-peter.github.io/blob/master/assets/json/model_stealing_papers.json) file containing the same data, including one with abstracts. If you use this data for any interesting projects, I'd love to hear about your experiences. +{::nomarkdown} +{% assign jupyter_path = "assets/jupyter/load_json.ipynb" | relative_url %} +{% capture notebook_exists %}{% file_exists assets/jupyter/load_json.ipynb %}{% endcapture %} +{% if notebook_exists == "true" %} +{% jupyter_notebook jupyter_path %} +{% else %} + +

Sorry, the notebook you are looking for does not exist.

+{% endif %} +{:/nomarkdown} + +## Acknowledgment + +The idea is derived from Nicolas Carlini: +[nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html](https://nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html). + +Recently, another website was deployed to discover research trends, [researchtrend.ai](https://researchtrend.ai/communities/AAML). + + +## Table + + -## Acknowledgment - -The idea is derived from Nicolas Carlini: -[nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html](https://nicholas.carlini.com/writing/2019/all-adversarial-example-papers.html). - -Recently, another website was deployed to discover research trends, [researchtrend.ai](https://researchtrend.ai/communities/AAML). diff --git a/_posts/2024-11-04-notebook.md b/_posts/2024-11-04-notebook.md new file mode 100644 index 00000000..2b883b37 --- /dev/null +++ b/_posts/2024-11-04-notebook.md @@ -0,0 +1,45 @@ +--- +layout: post +title: a post with jupyter notebook +date: 2023-07-04 08:57:00-0400 +description: an example of a blog post with jupyter notebook +tags: formatting jupyter +categories: sample-posts +giscus_comments: true +related_posts: false +--- + +To include a jupyter notebook in a post, you can use the following code: + +{% raw %} + +```liquid +{::nomarkdown} +{% assign jupyter_path = 'assets/jupyter/blog.ipynb' | relative_url %} +{% capture notebook_exists %}{% file_exists assets/jupyter/blog.ipynb %}{% endcapture %} +{% if notebook_exists == 'true' %} + {% jupyter_notebook jupyter_path %} +{% else %} +

Sorry, the notebook you are looking for does not exist.

+{% endif %} +{:/nomarkdown} +``` + +{% endraw %} + +Let's break it down: this is possible thanks to [Jekyll Jupyter Notebook plugin](https://github.com/red-data-tools/jekyll-jupyter-notebook) that allows you to embed jupyter notebooks in your posts. It basically calls [`jupyter nbconvert --to html`](https://nbconvert.readthedocs.io/en/latest/usage.html#convert-html) to convert the notebook to an html page and then includes it in the post. Since [Kramdown](https://jekyllrb.com/docs/configuration/markdown/) is the default Markdown renderer for Jekyll, we need to surround the call to the plugin with the [::nomarkdown](https://kramdown.gettalong.org/syntax.html#extensions) tag so that it stops processing this part with Kramdown and outputs the content as-is. + +The plugin takes as input the path to the notebook, but it assumes the file exists. If you want to check if the file exists before calling the plugin, you can use the `file_exists` filter. This avoids getting a 404 error from the plugin and ending up displaying the main page inside of it instead. If the file does not exist, you can output a message to the user. The code displayed above outputs the following: + +{::nomarkdown} +{% assign jupyter_path = "assets/jupyter/blog.ipynb" | relative_url %} +{% capture notebook_exists %}{% file_exists assets/jupyter/blog.ipynb %}{% endcapture %} +{% if notebook_exists == "true" %} +{% jupyter_notebook jupyter_path %} +{% else %} + +

Sorry, the notebook you are looking for does not exist.

+{% endif %} +{:/nomarkdown} + +Note that the jupyter notebook supports both light and dark themes. \ No newline at end of file diff --git a/assets/jupyter/load_json.ipynb b/assets/jupyter/load_json.ipynb new file mode 100644 index 00000000..f76e3fa9 --- /dev/null +++ b/assets/jupyter/load_json.ipynb @@ -0,0 +1,237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load JSON FILE" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetitleauthorlinkabstract
02014-12-30Detecting Malicious Code by Exploiting Depende...Stavros D. Nikolopoulos, and Iosif Polenakishttp://arxiv.org/abs/1412.8712v1In this paper we present an elaborated graph-b...
12014-12-30Percolation Model of Insider Threats to Assess...Jeremy Kepner, Vijay Gadepally, and Pete Micha...http://arxiv.org/abs/1412.8699v1Rules, regulations, and policies are the basis...
22014-12-29Bloom Filters in Adversarial EnvironmentsMoni Naor, and Eylon Yogevhttp://arxiv.org/abs/1412.8356v5Many efficient data structures use randomness,...
32014-12-27Attacks exploiting deviation of mean photon nu...Shihan Sajeed, Igor Radchenko, Sarah Kaiser, J...http://arxiv.org/abs/1412.8032v2The security of quantum communication using a ...
42014-12-24Balancing Isolation and Sharing of Data for Th...Florian Schröder, Raphael M. Reischuk, and Joh...http://arxiv.org/abs/1412.7641v2In the landscape of application ecosystems, to...
..................
1962014-01-15Multipath Private Communication: An Informatio...Hadi Ahmadi, and Reihaneh Safavi-Nainihttp://arxiv.org/abs/1401.3659v1Sending private messages over communication en...
1972014-01-15Intelligent Systems for Information SecurityAyman M. Bahaa-Eldinhttp://arxiv.org/abs/1401.3592v1This thesis aims to use intelligent systems to...
1982014-01-13A reduced semantics for deciding trace equival...David Baelde, Stéphanie Delaune, and Lucca Hir...http://arxiv.org/abs/1401.2854v2Many privacy-type properties of security proto...
1992014-01-12Practical and fast quantum random number gener...You-Qi Nie, Hong-Fei Zhang, Zhen Zhang, Jian W...http://arxiv.org/abs/1401.2594v1We present a practical high-speed quantum rand...
2002014-01-06Power Grid Defense Against Malicious Cascading...Paulo Shakarian, Hansheng Lei, and Roy Lindelaufhttp://arxiv.org/abs/1401.1086v1An adversary looking to disrupt a power grid m...
\n", + "

201 rows × 5 columns

\n", + "" + ], + "text/plain": [ + " date title \\\n", + "0 2014-12-30 Detecting Malicious Code by Exploiting Depende... \n", + "1 2014-12-30 Percolation Model of Insider Threats to Assess... \n", + "2 2014-12-29 Bloom Filters in Adversarial Environments \n", + "3 2014-12-27 Attacks exploiting deviation of mean photon nu... \n", + "4 2014-12-24 Balancing Isolation and Sharing of Data for Th... \n", + ".. ... ... \n", + "196 2014-01-15 Multipath Private Communication: An Informatio... \n", + "197 2014-01-15 Intelligent Systems for Information Security \n", + "198 2014-01-13 A reduced semantics for deciding trace equival... \n", + "199 2014-01-12 Practical and fast quantum random number gener... \n", + "200 2014-01-06 Power Grid Defense Against Malicious Cascading... \n", + "\n", + " author \\\n", + "0 Stavros D. Nikolopoulos, and Iosif Polenakis \n", + "1 Jeremy Kepner, Vijay Gadepally, and Pete Micha... \n", + "2 Moni Naor, and Eylon Yogev \n", + "3 Shihan Sajeed, Igor Radchenko, Sarah Kaiser, J... \n", + "4 Florian Schröder, Raphael M. Reischuk, and Joh... \n", + ".. ... \n", + "196 Hadi Ahmadi, and Reihaneh Safavi-Naini \n", + "197 Ayman M. Bahaa-Eldin \n", + "198 David Baelde, Stéphanie Delaune, and Lucca Hir... \n", + "199 You-Qi Nie, Hong-Fei Zhang, Zhen Zhang, Jian W... \n", + "200 Paulo Shakarian, Hansheng Lei, and Roy Lindelauf \n", + "\n", + " link \\\n", + "0 http://arxiv.org/abs/1412.8712v1 \n", + "1 http://arxiv.org/abs/1412.8699v1 \n", + "2 http://arxiv.org/abs/1412.8356v5 \n", + "3 http://arxiv.org/abs/1412.8032v2 \n", + "4 http://arxiv.org/abs/1412.7641v2 \n", + ".. ... \n", + "196 http://arxiv.org/abs/1401.3659v1 \n", + "197 http://arxiv.org/abs/1401.3592v1 \n", + "198 http://arxiv.org/abs/1401.2854v2 \n", + "199 http://arxiv.org/abs/1401.2594v1 \n", + "200 http://arxiv.org/abs/1401.1086v1 \n", + "\n", + " abstract \n", + "0 In this paper we present an elaborated graph-b... \n", + "1 Rules, regulations, and policies are the basis... \n", + "2 Many efficient data structures use randomness,... \n", + "3 The security of quantum communication using a ... \n", + "4 In the landscape of application ecosystems, to... \n", + ".. ... \n", + "196 Sending private messages over communication en... \n", + "197 This thesis aims to use intelligent systems to... \n", + "198 Many privacy-type properties of security proto... \n", + "199 We present a practical high-speed quantum rand... \n", + "200 An adversary looking to disrupt a power grid m... \n", + "\n", + "[201 rows x 5 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "file_path = \"../json/model_stealing_papers.json\"\n", + "df = pd.read_json(file_path)\n", + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "p310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/papers/fetch_papers.py b/papers/fetch_papers.py index 319f6bd3..e120c13b 100644 --- a/papers/fetch_papers.py +++ b/papers/fetch_papers.py @@ -4,11 +4,8 @@ from dateutil.parser import parse from datetime import datetime -# Construct the default API client. -# client = Client() +# https://info.arxiv.org/help/api/user-manual.html # https://lukasschwab.me/arxiv.py/arxiv.html -# Perform the search using arxiv.Search - def create_author_str(authors): # Join authors with ", " and handle the last author differently @@ -19,21 +16,40 @@ def create_author_str(authors): return authors_str -curr_year = datetime.now().year -submittedDate = "submittedDate:[2014 TO {curr_year}]" -search = arxiv.Search( - query=f"{submittedDate} AND (cat:cs.CR) AND (model stealing OR model extraction OR high-fidelity)", - # max_results=500, - sort_by=arxiv.SortCriterion.SubmittedDate, - sort_order=arxiv.SortOrder.Descending -) + +submittedDate = f"submittedDate:[2017 TO {datetime.now().year}]" +query=f"{submittedDate} AND (cat:cs.CR) AND (model steal* OR model extract* OR high-fidelity)", +query="(cat:cs.CR) AND (model stealing OR model extract OR high-fidelity)", + +# query='"quantum dots"' + +# id_list = [240610011] + +results_generator = arxiv.Client( + page_size=1000, + delay_seconds=3, + num_retries=3 +).results(arxiv.Search( + query=query, + id_list=[], + sort_by=arxiv.SortCriterion.SubmittedDate, + sort_order=arxiv.SortOrder.Descending +)) + + +# search = arxiv.Search( +# query=f"{submittedDate} AND (cat:cs.CR) AND (model stealing OR model extraction OR high-fidelity)", +# # max_results=500, +# sort_by=arxiv.SortCriterion.SubmittedDate, +# sort_order=arxiv.SortOrder.Descending +# ) papers_data = [] # Iterate over the results from search -for result in search.results(): +for result in results_generator: # breakpoint() - formatted_date = result.published.strftime("%Y-%m-%d") + formatted_date = result.published.strftime("%Y-%m") authors = [author.name for author in result.authors] # papers_data.append({'id': result.entry_id, 'title': result.title, 'authors': ', '.join(authors)})