From de6bb5b035c98601bce139ceea4fa2127f1f03f4 Mon Sep 17 00:00:00 2001 From: Linus Gasser Date: Tue, 16 Apr 2024 09:22:22 +0200 Subject: [PATCH] Update DLAB --- data/DLAB/projects.yaml | 129 ++++++++++++++++++++++++++++++++++--- data/labs.yaml | 3 + updates/2024-big-update.md | 4 ++ 3 files changed, 127 insertions(+), 9 deletions(-) diff --git a/data/DLAB/projects.yaml b/data/DLAB/projects.yaml index 94b0bdd..3a5b84b 100644 --- a/data/DLAB/projects.yaml +++ b/data/DLAB/projects.yaml @@ -31,7 +31,7 @@ projects: title: Privacy-Preserving Distributed Learning with Secret Gradient Descent url: https://arxiv.org/pdf/1906.11993.pdf date_added: 2021-11-05 - date_updated: 2022-07-04 + date_updated: 2024-04-16 maturity: 1 invariant-language-models: @@ -55,7 +55,7 @@ projects: tags: - Natural Language date_added: 2021-11-05 - date_updated: 2022-07-04 + date_updated: 2024-04-16 eighenthemes: name: Eigenthemes @@ -86,7 +86,7 @@ projects: text: EMNLP 2021 url: https://2021.emnlp.org/ date_added: 2021-11-05 - date_updated: 2022-07-04 + date_updated: 2024-04-16 quotebank: name: Quotebank @@ -119,7 +119,7 @@ projects: title: Quotebank url: https://quotebank.dlab.tools date_added: 2022-09-28 - date_updated: 2022-09-28 + date_updated: 2024-04-16 dipps: name: DiPPS @@ -149,7 +149,7 @@ projects: title: Differentially Private Propensity Scores for Bias Correction url: https://arxiv.org/abs/2210.02360 date_added: 2023-03-16 - date_updated: 2023-03-16 + date_updated: 2024-04-16 genie: name: GenIE @@ -168,7 +168,7 @@ projects: code: type: Lab GitHub url: https://github.com/epfl-dlab/GenIE - date_last_commit: 2023-02-23 + date_last_commit: 2023-03-28 language: Python license: MIT information: @@ -176,7 +176,7 @@ projects: title: "GenIE: Generative Information Extraction" url: https://arxiv.org/abs/2112.08340 date_added: 2023-03-16 - date_updated: 2023-03-16 + date_updated: 2024-04-16 synthie: name: SynthIE @@ -191,7 +191,7 @@ projects: code: type: Lab GitHub url: https://github.com/epfl-dlab/SynthIE - date_last_commit: 2023-03-08 + date_last_commit: 2023-05-27 language: Python license: MIT information: @@ -199,4 +199,115 @@ projects: title: "Exploiting Asymmetry for Synthetic Training Data Generation: SynthIE and the Case of Information Extraction" url: https://arxiv.org/abs/2303.04132 date_added: 2023-03-16 - date_updated: 2023-03-16 + date_updated: 2024-04-16 + + aiflows: + name: aiFlows + description: > + Modular AI collaboration framework + type: "Framework" + categories: + - "Learning" + applications: + - "Info" + tags: + - Machine Learning + - Cloud + - Protocol + layman_desc: > + aiFlows simplifies the design and implementation of complex workflows involving humans, + AI systems, and tools. + It enables modularity by allowing Flows to be stacked like LEGO blocks, reusability by + sharing Flows on the FlowVerse, remote peer-to-peer collaboration between Flows, and + concurrent execution of multiple Flows. + The goal is to empower researchers and practitioners with complete control and + customizability over their AI workflows. + tech_desc: > + aiFlows is a framework centered around Flows and messages. + Flows are independent, self-contained computational building blocks that can complete + semantically meaningful units of work. + Flows communicate via a standardized message-based interface, enabling modularity, + reusability, remote peer-to-peer collaboration, and concurrency. + url: https://epfl-dlab.github.io/aiflows/docs/built_with_sphinx/html/index.html + code: + type: Lab Github + url: https://github.com/epfl-dlab/aiflows?tab=readme-ov-file + date_last_commit: 2024-04-12 + language: Python + license: MIT + date_added: 2024-04-16 + + transformers-cfg: + name: Transformers CFG + description: > + Grammar-constrained text generation with Transformers models + type: "Library" + categories: + - "Learning" + applications: + - "Info" + tags: + - Machine Learning + - Natural Language + layman_desc: > + The transformers_cfg library allows you to control the output of + language models like GPT-3 by providing a set of rules (grammar) that + the generated text must follow. + This is useful for generating structured data like code, JSON + objects, or any text that needs to conform to specific patterns or + rules. + The library works with popular language models and provides an + easy way to incorporate grammar constraints into the text generation + process without modifying the underlying models. + tech_desc: > + Transformers_cfg is an extension library for the Hugging Face + Transformers library that enables grammar-constrained text generation. + It provides tools and functionalities to work with context-free + grammars (CFGs) for natural language processing tasks involving CFGs. + The library supports various Transformer models, including LLaMa, + GPT, Bloom, Mistral, and Falcon, and offers features like multilingual + grammar support and integration with Text-Generation-WebUI. + code: + type: Lab Github + url: https://github.com/epfl-dlab/transformers-CFG + date_last_commit: 2024-04-13 + language: Python + license: MIT + date_added: 2024-04-16 + + multilingual-entity-insertion: + name: "Entity Insertion in Wikipedia" + description: > + Multilingual entity insertion in Wikipedia articles + type: "Experiments" + categories: + - "Learning" + applications: + - "Info" + tags: + - Machine Learning + - Natural Language + layman_desc: > + Automatically adding relevant links to entities in Wikipedia articles + across different languages is a challenging task. This project provides + a solution by processing data from Wikipedia dumps and training machine + learning models. The data processing extracts information like articles, + links, and mentions from the dumps. The modeling code trains models to + rank candidate text spans for inserting an entity link. The models are + evaluated against various baselines like keyword matching and language + models. This helps in improving the quality and consistency of Wikipedia + by suggesting relevant entity links across multiple languages. + tech_desc: > + Proposes a framework for inserting entities into Wikipedia articles + across multiple languages. It processes Wikipedia dumps to extract + data and train models for entity insertion. The key components are: + 1) Data processing pipeline to extract relevant data from Wikipedia dumps. + 2) Modeling code for training entity insertion models using a ranking + loss or pointwise loss. 3) Benchmarking code to evaluate models against + baselines like BM25, EntQA, and GPT language models. + code: + type: Lab Github + url: https://github.com/epfl-dlab/multilingual-entity-insertion + date_last_commit: 2024-04-15 + language: Jupyter Notebook + date_added: 2024-04-16 diff --git a/data/labs.yaml b/data/labs.yaml index e996b84..b31010d 100644 --- a/data/labs.yaml +++ b/data/labs.yaml @@ -76,6 +76,9 @@ labs: applying algorithms and techniques in areas including social and information network analysis, machine learning, computational social science, data mining, natural language processing, and human computation. url: https://dlab.epfl.ch/ + information: + - title: Github Repository + url: https://github.com/epfl-dlab DSLAB: name: Dependable Systems Lab diff --git a/updates/2024-big-update.md b/updates/2024-big-update.md index 6ada3f0..a24ebf7 100644 --- a/updates/2024-big-update.md +++ b/updates/2024-big-update.md @@ -371,6 +371,10 @@ Linus - Updated github project dates - Sent email on 2024-04-16 - DLAB - Robert West + - Added aiFlows, transformers-cfg, multilingual-entity-insertion (no license) + - Lab webpage errors + - on https://dlab.epfl.ch/, `Swiss Data Science Center': `https://datascience.ch/project/the-human-measurement-project-hmp/' + - Sent email on 2024-04-16 [//]: # " Labs w/o code - to be checked next time around "