diff --git a/assets/json/model_stealing_papers.json b/assets/json/model_stealing_papers.json index 7f64d331..54a8e66f 100644 --- a/assets/json/model_stealing_papers.json +++ b/assets/json/model_stealing_papers.json @@ -1,4 +1,74 @@ [ + { + "date": "2024-12", + "title": "Leveraging Foundation Language Models (FLMs) for Automated Cohort Extraction from Large EHR Databases", + "author": "Purity Mugambi, Alexandra Meliou, and Madalina Fiterau", + "link": "http://arxiv.org/abs/2412.11472v1", + "abstract": "A crucial step in cohort studies is to extract the required cohort from one\nor more study datasets. This step is time-consuming, especially when a\nresearcher is presented with a dataset that they have not previously worked\nwith. When the cohort has to be extracted from multiple datasets, cohort\nextraction can be extremely laborious. In this study, we present an approach\nfor partially automating cohort extraction from multiple electronic health\nrecord (EHR) databases. We formulate the guided multi-dataset cohort extraction\nproblem in which selection criteria are first converted into queries,\ntranslating them from natural language text to language that maps to database\nentities. Then, using FLMs, columns of interest identified from the queries are\nautomatically matched between the study databases. Finally, the generated\nqueries are run across all databases to extract the study cohort. We propose\nand evaluate an algorithm for automating column matching on two large, popular\nand publicly-accessible EHR databases -- MIMIC-III and eICU. Our approach\nachieves a high top-three accuracy of $92\\%$, correctly matching $12$ out of\nthe $13$ columns of interest, when using a small, pre-trained general purpose\nlanguage model. Furthermore, this accuracy is maintained even as the search\nspace (i.e., size of the database) increases." + }, + { + "date": "2024-12", + "title": "Extracting PAC Decision Trees from Black Box Binary Classifiers: The Gender Bias Study Case on BERT-based Language Models", + "author": "Ana Ozaki, Roberto Confalonieri, Ricardo Guimar\u00e3es, and Anders Imenes", + "link": "http://arxiv.org/abs/2412.10513v1", + "abstract": "Decision trees are a popular machine learning method, known for their\ninherent explainability. In Explainable AI, decision trees can be used as\nsurrogate models for complex black box AI models or as approximations of parts\nof such models. A key challenge of this approach is determining how accurately\nthe extracted decision tree represents the original model and to what extent it\ncan be trusted as an approximation of their behavior. In this work, we\ninvestigate the use of the Probably Approximately Correct (PAC) framework to\nprovide a theoretical guarantee of fidelity for decision trees extracted from\nAI models. Based on theoretical results from the PAC framework, we adapt a\ndecision tree algorithm to ensure a PAC guarantee under certain conditions. We\nfocus on binary classification and conduct experiments where we extract\ndecision trees from BERT-based language models with PAC guarantees. Our results\nindicate occupational gender bias in these models." + }, + { + "date": "2024-12", + "title": "Efficient and Comprehensive Feature Extraction in Large Vision-Language Model for Clinical Pathology Analysis", + "author": "Shengxuming Zhang, Weihan Li, Tianhong Gao, Jiacong Hu, Haoming Luo, Mingli Song, Xiuming Zhang, and Zunlei Feng", + "link": "http://arxiv.org/abs/2412.09521v1", + "abstract": "Pathological diagnosis is vital for determining disease characteristics,\nguiding treatment, and assessing prognosis, relying heavily on detailed,\nmulti-scale analysis of high-resolution whole slide images (WSI). However,\ntraditional pure vision models face challenges of redundant feature extraction,\nwhereas existing large vision-language models (LVLMs) are limited by input\nresolution constraints, hindering their efficiency and accuracy. To overcome\nthese issues, we propose two innovative strategies: the mixed task-guided\nfeature enhancement, which directs feature extraction toward lesion-related\ndetails across scales, and the prompt-guided detail feature completion, which\nintegrates coarse- and fine-grained features from WSI based on specific prompts\nwithout compromising inference speed. Leveraging a comprehensive dataset of\n490,000 samples from diverse pathology tasks-including cancer detection,\ngrading, vascular and neural invasion identification, and so on-we trained the\npathology-specialized LVLM, OmniPath. Extensive experiments demonstrate that\nthis model significantly outperforms existing methods in diagnostic accuracy\nand efficiency, offering an interactive, clinically aligned approach for\nauxiliary diagnosis in a wide range of pathology applications." + }, + { + "date": "2024-12", + "title": "Kajal: Extracting Grammar of a Source Code Using Large Language Models", + "author": "Mohammad Jalili Torkamani", + "link": "http://arxiv.org/abs/2412.08842v1", + "abstract": "Understanding and extracting the grammar of a domain-specific language (DSL)\nis crucial for various software engineering tasks; however, manually creating\nthese grammars is time-intensive and error-prone. This paper presents Kajal, a\nnovel approach that automatically infers grammar from DSL code snippets by\nleveraging Large Language Models (LLMs) through prompt engineering and few-shot\nlearning. Kajal dynamically constructs input prompts, using contextual\ninformation to guide the LLM in generating the corresponding grammars, which\nare iteratively refined through a feedback-driven approach. Our experiments\nshow that Kajal achieves 60% accuracy with few-shot learning and 45% without\nit, demonstrating the significant impact of few-shot learning on the tool's\neffectiveness. This approach offers a promising solution for automating DSL\ngrammar extraction, and future work will explore using smaller, open-source\nLLMs and testing on larger datasets to further validate Kajal's performance." + }, + { + "date": "2024-12", + "title": "A Unified Model For Voice and Accent Conversion In Speech and Singing using Self-Supervised Learning and Feature Extraction", + "author": "Sowmya Cheripally", + "link": "http://arxiv.org/abs/2412.08312v1", + "abstract": "This paper presents a new voice conversion model capable of transforming both\nspeaking and singing voices. It addresses key challenges in current systems,\nsuch as conveying emotions, managing pronunciation and accent changes, and\nreproducing non-verbal sounds. One of the model's standout features is its\nability to perform accent conversion on hybrid voice samples that encompass\nboth speech and singing, allowing it to change the speaker's accent while\npreserving the original content and prosody. The proposed model uses an\nencoder-decoder architecture: the encoder is based on HuBERT to process the\nspeech's acoustic and linguistic content, while the HiFi-GAN decoder audio\nmatches the target speaker's voice. The model incorporates fundamental\nfrequency (f0) features and singer embeddings to enhance performance while\nensuring the pitch & tone accuracy and vocal identity are preserved during\ntransformation. This approach improves how naturally and flexibly voice style\ncan be transformed, showing strong potential for applications in voice dubbing,\ncontent creation, and technologies like Text-to-Speech (TTS) and Interactive\nVoice Response (IVR) systems." + }, + { + "date": "2024-12", + "title": "Large Language Models Merging for Enhancing the Link Stealing Attack on Graph Neural Networks", + "author": "Faqian Guan, Tianqing Zhu, Wenhan Chang, Wei Ren, and Wanlei Zhou", + "link": "http://arxiv.org/abs/2412.05830v1", + "abstract": "Graph Neural Networks (GNNs), specifically designed to process the graph\ndata, have achieved remarkable success in various applications. Link stealing\nattacks on graph data pose a significant privacy threat, as attackers aim to\nextract sensitive relationships between nodes (entities), potentially leading\nto academic misconduct, fraudulent transactions, or other malicious activities.\nPrevious studies have primarily focused on single datasets and did not explore\ncross-dataset attacks, let alone attacks that leverage the combined knowledge\nof multiple attackers. However, we find that an attacker can combine the data\nknowledge of multiple attackers to create a more effective attack model, which\ncan be referred to cross-dataset attacks. Moreover, if knowledge can be\nextracted with the help of Large Language Models (LLMs), the attack capability\nwill be more significant. In this paper, we propose a novel link stealing\nattack method that takes advantage of cross-dataset and Large Language Models\n(LLMs). The LLM is applied to process datasets with different data structures\nin cross-dataset attacks. Each attacker fine-tunes the LLM on their specific\ndataset to generate a tailored attack model. We then introduce a novel model\nmerging method to integrate the parameters of these attacker-specific models\neffectively. The result is a merged attack model with superior generalization\ncapabilities, enabling effective attacks not only on the attackers' datasets\nbut also on previously unseen (out-of-domain) datasets. We conducted extensive\nexperiments in four datasets to demonstrate the effectiveness of our method.\nAdditional experiments with three different GNN and LLM architectures further\nillustrate the generality of our approach." + }, + { + "date": "2024-12", + "title": "Espresso: High Compression For Rich Extraction From Videos for Your Vision-Language Model", + "author": "Keunwoo Peter Yu, Achal Dave, Rares Ambrus, and Jean Mercat", + "link": "http://arxiv.org/abs/2412.04729v2", + "abstract": "Most of the current vision-language models (VLMs) for videos struggle to\nunderstand videos longer than a few seconds. This is primarily due to the fact\nthat they do not scale to utilizing a large number of frames. In order to\naddress this limitation, we propose Espresso, a novel method that extracts and\ncompresses spatial and temporal information separately. Through extensive\nevaluations, we show that spatial and temporal compression in Espresso each\nhave a positive impact on the long-form video understanding capabilities; when\ncombined, their positive impact increases. Furthermore, we show that Espresso's\nperformance scales well with more training data, and that Espresso is far more\neffective than the existing projectors for VLMs in long-form video\nunderstanding. Moreover, we devise a more difficult evaluation setting for\nEgoSchema called \"needle-in-a-haystack\" that multiplies the lengths of the\ninput videos. Espresso achieves SOTA performance on this task, outperforming\nthe SOTA VLMs that have been trained on much more training data." + }, + { + "date": "2024-12", + "title": "Prompting Large Language Models for Clinical Temporal Relation Extraction", + "author": "Jianping He, Laila Rasmy, Haifang Li, Jianfu Li, Zenan Sun, Evan Yu, Degui Zhi, and Cui Tao", + "link": "http://arxiv.org/abs/2412.04512v1", + "abstract": "Objective: This paper aims to prompt large language models (LLMs) for\nclinical temporal relation extraction (CTRE) in both few-shot and fully\nsupervised settings. Materials and Methods: This study utilizes four LLMs:\nEncoder-based GatorTron-Base (345M)/Large (8.9B); Decoder-based\nLLaMA3-8B/MeLLaMA-13B. We developed full (FFT) and parameter-efficient (PEFT)\nfine-tuning strategies and evaluated these strategies on the 2012 i2b2 CTRE\ntask. We explored four fine-tuning strategies for GatorTron-Base: (1) Standard\nFine-Tuning, (2) Hard-Prompting with Unfrozen LLMs, (3) Soft-Prompting with\nFrozen LLMs, and (4) Low-Rank Adaptation (LoRA) with Frozen LLMs. For\nGatorTron-Large, we assessed two PEFT strategies-Soft-Prompting and LoRA with\nFrozen LLMs-leveraging Quantization techniques. Additionally, LLaMA3-8B and\nMeLLaMA-13B employed two PEFT strategies: LoRA strategy with Quantization\n(QLoRA) applied to Frozen LLMs using instruction tuning and standard\nfine-tuning. Results: Under fully supervised settings, Hard-Prompting with\nUnfrozen GatorTron-Base achieved the highest F1 score (89.54%), surpassing the\nSOTA model (85.70%) by 3.74%. Additionally, two variants of QLoRA adapted to\nGatorTron-Large and Standard Fine-Tuning of GatorTron-Base exceeded the SOTA\nmodel by 2.36%, 1.88%, and 0.25%, respectively. Decoder-based models with\nfrozen parameters outperformed their Encoder-based counterparts in this\nsetting; however, the trend reversed in few-shot scenarios. Discussions and\nConclusions: This study presented new methods that significantly improved CTRE\nperformance, benefiting downstream tasks reliant on CTRE systems. The findings\nunderscore the importance of selecting appropriate models and fine-tuning\nstrategies based on task requirements and data availability. Future work will\nexplore larger models and broader CTRE applications." + }, + { + "date": "2024-12", + "title": "A Review on Scientific Knowledge Extraction using Large Language Models in Biomedical Sciences", + "author": "Gabriel Lino Garcia, Jo\u00e3o Renato Ribeiro Manesco, Pedro Henrique Paiola, Lucas Miranda, Maria Paola de Salvo, and Jo\u00e3o Paulo Papa", + "link": "http://arxiv.org/abs/2412.03531v1", + "abstract": "The rapid advancement of large language models (LLMs) has opened new\nboundaries in the extraction and synthesis of medical knowledge, particularly\nwithin evidence synthesis. This paper reviews the state-of-the-art applications\nof LLMs in the biomedical domain, exploring their effectiveness in automating\ncomplex tasks such as evidence synthesis and data extraction from a biomedical\ncorpus of documents. While LLMs demonstrate remarkable potential, significant\nchallenges remain, including issues related to hallucinations, contextual\nunderstanding, and the ability to generalize across diverse medical tasks. We\nhighlight critical gaps in the current research literature, particularly the\nneed for unified benchmarks to standardize evaluations and ensure reliability\nin real-world applications. In addition, we propose directions for future\nresearch, emphasizing the integration of state-of-the-art techniques such as\nretrieval-augmented generation (RAG) to enhance LLM performance in evidence\nsynthesis. By addressing these challenges and utilizing the strengths of LLMs,\nwe aim to improve access to medical literature and facilitate meaningful\ndiscoveries in healthcare." + }, + { + "date": "2024-11", + "title": "Human Evaluation of Procedural Knowledge Graph Extraction from Text with Large Language Models", + "author": "Valentina Anita Carriero, Antonia Azzini, Ilaria Baroni, Mario Scrocca, and Irene Celino", + "link": "http://arxiv.org/abs/2412.03589v1", + "abstract": "Procedural Knowledge is the know-how expressed in the form of sequences of\nsteps needed to perform some tasks. Procedures are usually described by means\nof natural language texts, such as recipes or maintenance manuals, possibly\nspread across different documents and systems, and their interpretation and\nsubsequent execution is often left to the reader. Representing such procedures\nin a Knowledge Graph (KG) can be the basis to build digital tools to support\nthose users who need to apply or execute them. In this paper, we leverage Large\nLanguage Model (LLM) capabilities and propose a prompt engineering approach to\nextract steps, actions, objects, equipment and temporal information from a\ntextual procedure, in order to populate a Procedural KG according to a\npre-defined ontology. We evaluate the KG extraction results by means of a user\nstudy, in order to qualitatively and quantitatively assess the perceived\nquality and usefulness of the LLM-extracted procedural knowledge. We show that\nLLMs can produce outputs of acceptable quality and we assess the subjective\nperception of AI by human evaluators." + }, { "date": "2024-11", "title": "A survey on cutting-edge relation extraction techniques based on language models", @@ -6,6 +76,13 @@ "link": "http://arxiv.org/abs/2411.18157v1", "abstract": "This comprehensive survey delves into the latest advancements in Relation\nExtraction (RE), a pivotal task in natural language processing essential for\napplications across biomedical, financial, and legal sectors. This study\nhighlights the evolution and current state of RE techniques by analyzing 137\npapers presented at the Association for Computational Linguistics (ACL)\nconferences over the past four years, focusing on models that leverage language\nmodels. Our findings underscore the dominance of BERT-based methods in\nachieving state-of-the-art results for RE while also noting the promising\ncapabilities of emerging large language models (LLMs) like T5, especially in\nfew-shot relation extraction scenarios where they excel in identifying\npreviously unseen relations." }, + { + "date": "2024-11", + "title": "DocEDA: Automated Extraction and Design of Analog Circuits from Documents with Large Language Model", + "author": "Hong Cai Chen, Longchang Wu, Ming Gao, Lingrui Shen, Jiarui Zhong, and Yipin Xu", + "link": "http://arxiv.org/abs/2412.05301v1", + "abstract": "Efficient and accurate extraction of electrical parameters from circuit\ndatasheets and design documents is critical for accelerating circuit design in\nElectronic Design Automation (EDA). Traditional workflows often rely on\nengineers manually searching and extracting these parameters, which is\ntime-consuming, and prone to human error. To address these challenges, we\nintroduce DocEDA, an automated system that leverages advanced computer vision\ntechniques and Large Language Models (LLMs) to extract electrical parameters\nseamlessly from documents. The layout analysis model specifically designed for\ndatasheet is proposed to classify documents into circuit-related parts.\nUtilizing the inherent Chain-of-Thought reasoning capabilities of LLMs, DocEDA\nautomates the extraction of electronic component parameters from documents. For\ncircuit diagrams parsing, an improved GAM-YOLO model is hybrid with topology\nidentification to transform diagrams into circuit netlists. Then, a space\nmapping enhanced optimization framework is evoked for optimization the layout\nin the document. Experimental evaluations demonstrate that DocEDA significantly\nenhances the efficiency of processing circuit design documents and the accuracy\nof electrical parameter extraction. It exhibits adaptability to various circuit\ndesign scenarios and document formats, offering a novel solution for EDA with\nthe potential to transform traditional methodologies." + }, { "date": "2024-11", "title": "RAMIE: Retrieval-Augmented Multi-task Information Extraction with Large Language Models on Dietary Supplements", @@ -409,7 +486,7 @@ "date": "2024-07", "title": "FIARSE: Model-Heterogeneous Federated Learning via Importance-Aware Submodel Extraction", "author": "Feijie Wu, Xingchen Wang, Yaqing Wang, Tianci Liu, Lu Su, and Jing Gao", - "link": "http://arxiv.org/abs/2407.19389v2", + "link": "http://arxiv.org/abs/2407.19389v3", "abstract": "In federated learning (FL), accommodating clients' varied computational\ncapacities poses a challenge, often limiting the participation of those with\nconstrained resources in global model training. To address this issue, the\nconcept of model heterogeneity through submodel extraction has emerged,\noffering a tailored solution that aligns the model's complexity with each\nclient's computational capacity. In this work, we propose Federated\nImportance-Aware Submodel Extraction (FIARSE), a novel approach that\ndynamically adjusts submodels based on the importance of model parameters,\nthereby overcoming the limitations of previous static and dynamic submodel\nextraction methods. Compared to existing works, the proposed method offers a\ntheoretical foundation for the submodel extraction and eliminates the need for\nadditional information beyond the model parameters themselves to determine\nparameter importance, significantly reducing the overhead on clients. Extensive\nexperiments are conducted on various datasets to showcase the superior\nperformance of the proposed FIARSE." }, { @@ -601,13 +678,6 @@ "link": "http://arxiv.org/abs/2406.03144v1", "abstract": "In order to solve the problems such as difficult to extract effective\nfeatures and low accuracy of sales volume prediction caused by complex\nrelationships such as market sales volume in time series prediction, we\nproposed a time series prediction method of market sales volume based on\nSequential General VMD and spatial smoothing Long short-term memory neural\nnetwork (SS-LSTM) combination model. Firstly, the spatial smoothing algorithm\nis used to decompose and calculate the sample data of related industry sectors\naffected by the linkage effect of market sectors, extracting modal features\ncontaining information via Sequential General VMD on overall market and\nspecific price trends; Then, according to the background of different Market\ndata sets, LSTM network is used to model and predict the price of fundamental\ndata and modal characteristics. The experimental results of data prediction\nwith seasonal and periodic trends show that this method can achieve higher\nprice prediction accuracy and more accurate accuracy in specific market\ncontexts compared to traditional prediction methods Describe the changes in\nmarket sales volume." }, - { - "date": "2024-06", - "title": "Stealing Image-to-Image Translation Models With a Single Query", - "author": "Nurit Spingarn-Eliezer, and Tomer Michaeli", - "link": "http://arxiv.org/abs/2406.00828v1", - "abstract": "Training deep neural networks requires significant computational resources\nand large datasets that are often confidential or expensive to collect. As a\nresult, owners tend to protect their models by allowing access only via an API.\nMany works demonstrated the possibility of stealing such protected models by\nrepeatedly querying the API. However, to date, research has predominantly\nfocused on stealing classification models, for which a very large number of\nqueries has been found necessary. In this paper, we study the possibility of\nstealing image-to-image models. Surprisingly, we find that many such models can\nbe stolen with as little as a single, small-sized, query image using simple\ndistillation. We study this phenomenon on a wide variety of model\narchitectures, datasets, and tasks, including denoising, deblurring, deraining,\nsuper-resolution, and biological image-to-image translation. Remarkably, we\nfind that the vulnerability to stealing attacks is shared by CNNs and by models\nwith attention mechanisms, and that stealing is commonly possible even without\nknowing the architecture of the target model." - }, { "date": "2024-05", "title": "Large Language Model Watermark Stealing With Mixed Integer Programming", @@ -1328,75 +1398,5 @@ "author": "Yu Enokibori", "link": "http://arxiv.org/abs/2310.19283v3", "abstract": "Although many deep learning (DL) algorithms have been proposed for the\nIMU-based HAR domain, traditional machine learning that utilizes handcrafted\ntime series features (TSFs) still often performs well. It is not rare that\ncombinations among DL and TSFs show better accuracy than DL-only approaches.\nHowever, there is a problem with time series features in IMU-based HAR. The\namount of derived features can vary greatly depending on the method used to\nselect the 3D basis. Fortunately, DL's strengths include capturing the features\nof input data and adaptively deriving parameters. Thus, as a new DNN model for\nIMU-based human activity recognition (HAR), this paper proposes rTsfNet, a DNN\nmodel with Multi-head 3D Rotation and Time Series Feature Extraction. rTsfNet\nautomatically selects 3D bases from which features should be derived by\nextracting 3D rotation parameters within the DNN. Then, time series features\n(TSFs), based on many researchers' wisdom, are derived to achieve HAR using\nMLP. Although rTsfNet is a model that does not use CNN, it achieved higher\naccuracy than existing models under well-managed benchmark conditions and\nmultiple datasets: UCI HAR, PAMAP2, Daphnet, and OPPORTUNITY, all of which\ntarget different activities." - }, - { - "date": "2023-10", - "title": "Open Visual Knowledge Extraction via Relation-Oriented Multimodality Model Prompting", - "author": "Hejie Cui, Xinyu Fang, Zihan Zhang, Ran Xu, Xuan Kan, Xin Liu, Yue Yu, Manling Li, Yangqiu Song, and Carl Yang", - "link": "http://arxiv.org/abs/2310.18804v1", - "abstract": "Images contain rich relational knowledge that can help machines understand\nthe world. Existing methods on visual knowledge extraction often rely on the\npre-defined format (e.g., sub-verb-obj tuples) or vocabulary (e.g., relation\ntypes), restricting the expressiveness of the extracted knowledge. In this\nwork, we take a first exploration to a new paradigm of open visual knowledge\nextraction. To achieve this, we present OpenVik which consists of an open\nrelational region detector to detect regions potentially containing relational\nknowledge and a visual knowledge generator that generates format-free knowledge\nby prompting the large multimodality model with the detected region of\ninterest. We also explore two data enhancement techniques for diversifying the\ngenerated format-free visual knowledge. Extensive knowledge quality evaluations\nhighlight the correctness and uniqueness of the extracted open visual knowledge\nby OpenVik. Moreover, integrating our extracted knowledge across various visual\nreasoning applications shows consistent improvements, indicating the real-world\napplicability of OpenVik." - }, - { - "date": "2023-10", - "title": "Can large language models replace humans in the systematic review process? Evaluating GPT-4's efficacy in screening and extracting data from peer-reviewed and grey literature in multiple languages", - "author": "Qusai Khraisha, Sophie Put, Johanna Kappenberg, Azza Warraitch, and Kristin Hadfield", - "link": "http://arxiv.org/abs/2310.17526v2", - "abstract": "Systematic reviews are vital for guiding practice, research, and policy, yet\nthey are often slow and labour-intensive. Large language models (LLMs) could\noffer a way to speed up and automate systematic reviews, but their performance\nin such tasks has not been comprehensively evaluated against humans, and no\nstudy has tested GPT-4, the biggest LLM so far. This pre-registered study\nevaluates GPT-4's capability in title/abstract screening, full-text review, and\ndata extraction across various literature types and languages using a\n'human-out-of-the-loop' approach. Although GPT-4 had accuracy on par with human\nperformance in most tasks, results were skewed by chance agreement and dataset\nimbalance. After adjusting for these, there was a moderate level of performance\nfor data extraction, and - barring studies that used highly reliable prompts -\nscreening performance levelled at none to moderate for different stages and\nlanguages. When screening full-text literature using highly reliable prompts,\nGPT-4's performance was 'almost perfect.' Penalising GPT-4 for missing key\nstudies using highly reliable prompts improved its performance even more. Our\nfindings indicate that, currently, substantial caution should be used if LLMs\nare being used to conduct systematic reviews, but suggest that, for certain\nsystematic review tasks delivered under reliable prompts, LLMs can rival human\nperformance." - }, - { - "date": "2023-10", - "title": "Prompt-Driven Building Footprint Extraction in Aerial Images with Offset-Building Model", - "author": "Kai Li, Yupeng Deng, Yunlong Kong, Diyou Liu, Jingbo Chen, Yu Meng, and Junxian Ma", - "link": "http://arxiv.org/abs/2310.16717v3", - "abstract": "More accurate extraction of invisible building footprints from\nvery-high-resolution (VHR) aerial images relies on roof segmentation and\nroof-to-footprint offset extraction. Existing state-of-the-art methods based on\ninstance segmentation suffer from poor generalization when extended to\nlarge-scale data production and fail to achieve low-cost human interactive\nannotation. The latest prompt paradigms inspire us to design a promptable\nframework for roof and offset extraction, which transforms end-to-end\nalgorithms into promptable methods. Within this framework, we propose a novel\nOffset-Building Model (OBM). To rigorously evaluate the algorithm's\ncapabilities, we introduce a prompt-based evaluation method, where our model\nreduces offset errors by 16.6% and improves roof Intersection over Union (IoU)\nby 10.8% compared to other models. Leveraging the common patterns in predicting\noffsets, we propose Distance-NMS (DNMS) algorithms, enabling the model to\nfurther reduce offset vector loss by 6.5%. To further validate the\ngeneralization of models, we tested them using a new dataset with over 7,000\nmanually annotated instance samples. Our algorithms and dataset are available\nat https://anonymous.4open.science/r/OBM-B3EC." - }, - { - "date": "2023-10", - "title": "Defense Against Model Extraction Attacks on Recommender Systems", - "author": "Sixiao Zhang, Hongzhi Yin, Hongxu Chen, and Cheng Long", - "link": "http://arxiv.org/abs/2310.16335v1", - "abstract": "The robustness of recommender systems has become a prominent topic within the\nresearch community. Numerous adversarial attacks have been proposed, but most\nof them rely on extensive prior knowledge, such as all the white-box attacks or\nmost of the black-box attacks which assume that certain external knowledge is\navailable. Among these attacks, the model extraction attack stands out as a\npromising and practical method, involving training a surrogate model by\nrepeatedly querying the target model. However, there is a significant gap in\nthe existing literature when it comes to defending against model extraction\nattacks on recommender systems. In this paper, we introduce Gradient-based\nRanking Optimization (GRO), which is the first defense strategy designed to\ncounter such attacks. We formalize the defense as an optimization problem,\naiming to minimize the loss of the protected target model while maximizing the\nloss of the attacker's surrogate model. Since top-k ranking lists are\nnon-differentiable, we transform them into swap matrices which are instead\ndifferentiable. These swap matrices serve as input to a student model that\nemulates the surrogate model's behavior. By back-propagating the loss of the\nstudent model, we obtain gradients for the swap matrices. These gradients are\nused to compute a swap loss, which maximizes the loss of the student model. We\nconducted experiments on three benchmark datasets to evaluate the performance\nof GRO, and the results demonstrate its superior effectiveness in defending\nagainst model extraction attacks." - }, - { - "date": "2023-10", - "title": "Efficient Data Learning for Open Information Extraction with Pre-trained Language Models", - "author": "Zhiyuan Fan, and Shizhu He", - "link": "http://arxiv.org/abs/2310.15021v2", - "abstract": "Open Information Extraction (OpenIE) is a fundamental yet challenging task in\nNatural Language Processing, which involves extracting all triples (subject,\npredicate, object) from a given sentence. While labeling-based methods have\ntheir merits, generation-based techniques offer unique advantages, such as the\nability to generate tokens not present in the original sentence. However, these\ngeneration-based methods often require a significant amount of training data to\nlearn the task form of OpenIE and substantial training time to overcome slow\nmodel convergence due to the order penalty. In this paper, we introduce a novel\nframework, OK-IE, that ingeniously transforms the task form of OpenIE into the\npre-training task form of the T5 model, thereby reducing the need for extensive\ntraining data. Furthermore, we introduce an innovative concept of Anchor to\ncontrol the sequence of model outputs, effectively eliminating the impact of\norder penalty on model convergence and significantly reducing training time.\nExperimental results indicate that, compared to previous SOTA methods, OK-IE\nrequires only 1/100 of the training data (900 instances) and 1/120 of the\ntraining time (3 minutes) to achieve comparable results." - }, - { - "date": "2023-10", - "title": "Knowledge Extraction and Distillation from Large-Scale Image-Text Colonoscopy Records Leveraging Large Language and Vision Models", - "author": "Shuo Wang, Yan Zhu, Xiaoyuan Luo, Zhiwei Yang, Yizhe Zhang, Peiyao Fu, Manning Wang, Zhijian Song, Quanlin Li, Pinghong Zhou, and Yike Guo", - "link": "http://arxiv.org/abs/2310.11173v1", - "abstract": "The development of artificial intelligence systems for colonoscopy analysis\noften necessitates expert-annotated image datasets. However, limitations in\ndataset size and diversity impede model performance and generalisation.\nImage-text colonoscopy records from routine clinical practice, comprising\nmillions of images and text reports, serve as a valuable data source, though\nannotating them is labour-intensive. Here we leverage recent advancements in\nlarge language and vision models and propose EndoKED, a data mining paradigm\nfor deep knowledge extraction and distillation. EndoKED automates the\ntransformation of raw colonoscopy records into image datasets with pixel-level\nannotation. We validate EndoKED using multi-centre datasets of raw colonoscopy\nrecords (~1 million images), demonstrating its superior performance in training\npolyp detection and segmentation models. Furthermore, the EndoKED pre-trained\nvision backbone enables data-efficient and generalisable learning for optical\nbiopsy, achieving expert-level performance in both retrospective and\nprospective validation." - }, - { - "date": "2023-10", - "title": "Document-Level In-Context Few-Shot Relation Extraction via Pre-Trained Language Models", - "author": "Yilmazcan Ozyurt, Stefan Feuerriegel, and Ce Zhang", - "link": "http://arxiv.org/abs/2310.11085v4", - "abstract": "Document-level relation extraction aims at inferring structured human\nknowledge from textual documents. State-of-the-art methods for this task use\npre-trained language models (LMs) via fine-tuning, yet fine-tuning is\ncomputationally expensive and cannot adapt to new relation types or new LMs. As\na remedy, we leverage the generalization capabilities of pre-trained LMs and\npresent a novel framework for document-level in-context few-shot relation\nextraction. Our framework has three strengths: it eliminates the need (1) for\nnamed entity recognition and (2) for human annotations of documents, and (3) it\ncan be updated to new LMs without re-training. We evaluate our framework using\nDocRED, the largest publicly available dataset for document-level relation\nextraction, and demonstrate that our framework achieves state-of-the-art\nperformance. We further show that our framework actually performs much better\nthan the original labels from the development set of DocRED. Finally, we\nconduct an extensive benchmark demonstrating the effectiveness of our\nframework, achieving state-of-the-art results across six relation extraction\ndatasets and outperforming more than 30 baseline methods. Unlike our framework,\nthe baseline methods have large computational overhead (e.g., from\nfine-tuning). To the best of our knowledge, we are the first to reformulate the\ndocument-level relation extraction task as a tailored in-context few-shot\nlearning paradigm." - }, - { - "date": "2023-10", - "title": "Convolutional Neural Network Model for Diabetic Retinopathy Feature Extraction and Classification", - "author": "Sharan Subramanian, and Leilani H. Gilpin", - "link": "http://arxiv.org/abs/2310.10806v1", - "abstract": "The application of Artificial Intelligence in the medical market brings up\nincreasing concerns but aids in more timely diagnosis of silent progressing\ndiseases like Diabetic Retinopathy. In order to diagnose Diabetic Retinopathy\n(DR), ophthalmologists use color fundus images, or pictures of the back of the\nretina, to identify small distinct features through a difficult and\ntime-consuming process. Our work creates a novel CNN model and identifies the\nseverity of DR through fundus image input. We classified 4 known DR features,\nincluding micro-aneurysms, cotton wools, exudates, and hemorrhages, through\nconvolutional layers and were able to provide an accurate diagnostic without\nadditional user input. The proposed model is more interpretable and robust to\noverfitting. We present initial results with a sensitivity of 97% and an\naccuracy of 71%. Our contribution is an interpretable model with similar\naccuracy to more complex models. With that, our model advances the field of DR\ndetection and proves to be a key step towards AI-focused medical diagnosis." - }, - { - "date": "2023-10", - "title": "SCME: A Self-Contrastive Method for Data-free and Query-Limited Model Extraction Attack", - "author": "Renyang Liu, Jinhong Zhang, Kwok-Yan Lam, Jun Zhao, and Wei Zhou", - "link": "http://arxiv.org/abs/2310.09792v1", - "abstract": "Previous studies have revealed that artificial intelligence (AI) systems are\nvulnerable to adversarial attacks. Among them, model extraction attacks fool\nthe target model by generating adversarial examples on a substitute model. The\ncore of such an attack is training a substitute model as similar to the target\nmodel as possible, where the simulation process can be categorized in a\ndata-dependent and data-free manner. Compared with the data-dependent method,\nthe data-free one has been proven to be more practical in the real world since\nit trains the substitute model with synthesized data. However, the distribution\nof these fake data lacks diversity and cannot detect the decision boundary of\nthe target model well, resulting in the dissatisfactory simulation effect.\nBesides, these data-free techniques need a vast number of queries to train the\nsubstitute model, increasing the time and computing consumption and the risk of\nexposure. To solve the aforementioned problems, in this paper, we propose a\nnovel data-free model extraction method named SCME (Self-Contrastive Model\nExtraction), which considers both the inter- and intra-class diversity in\nsynthesizing fake data. In addition, SCME introduces the Mixup operation to\naugment the fake data, which can explore the target model's decision boundary\neffectively and improve the simulating capacity. Extensive experiments show\nthat the proposed method can yield diversified fake data. Moreover, our method\nhas shown superiority in many different attack settings under the query-limited\nscenario, especially for untargeted attacks, the SCME outperforms SOTA methods\nby 11.43\\% on average for five baseline datasets." - }, - { - "date": "2023-10", - "title": "Notes on Applicability of Explainable AI Methods to Machine Learning Models Using Features Extracted by Persistent Homology", - "author": "Naofumi Hama", - "link": "http://arxiv.org/abs/2310.09780v1", - "abstract": "Data analysis that uses the output of topological data analysis as input for\nmachine learning algorithms has been the subject of extensive research. This\napproach offers a means of capturing the global structure of data. Persistent\nhomology (PH), a common methodology within the field of TDA, has found\nwide-ranging applications in machine learning. One of the key reasons for the\nsuccess of the PH-ML pipeline lies in the deterministic nature of feature\nextraction conducted through PH. The ability to achieve satisfactory levels of\naccuracy with relatively simple downstream machine learning models, when\nprocessing these extracted features, underlines the pipeline's superior\ninterpretability. However, it must be noted that this interpretation has\nencountered issues. Specifically, it fails to accurately reflect the feasible\nparameter region in the data generation process, and the physical or chemical\nconstraints that restrict this process. Against this backdrop, we explore the\npotential application of explainable AI methodologies to this PH-ML pipeline.\nWe apply this approach to the specific problem of predicting gas adsorption in\nmetal-organic frameworks and demonstrate that it can yield suggestive results.\nThe codes to reproduce our results are available at\nhttps://github.com/naofumihama/xai_ph_ml" } ] \ No newline at end of file